Character.java revision 25fbd653a83f3a22235eb61e6e34f46ef3e990b7
1/*
2 * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package java.lang;
27
28import java.util.Arrays;
29import java.util.HashMap;
30import java.util.Locale;
31import java.util.Map;
32
33/**
34 * The {@code Character} class wraps a value of the primitive
35 * type {@code char} in an object. An object of type
36 * {@code Character} contains a single field whose type is
37 * {@code char}.
38 * <p>
39 * In addition, this class provides several methods for determining
40 * a character's category (lowercase letter, digit, etc.) and for converting
41 * characters from uppercase to lowercase and vice versa.
42 * <p>
43 * Character information is based on the Unicode Standard, version 6.2.0.
44 * <p>
45 * The methods and data of class {@code Character} are defined by
46 * the information in the <i>UnicodeData</i> file that is part of the
47 * Unicode Character Database maintained by the Unicode
48 * Consortium. This file specifies various properties including name
49 * and general category for every defined Unicode code point or
50 * character range.
51 * <p>
52 * The file and its description are available from the Unicode Consortium at:
53 * <ul>
54 * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
55 * </ul>
56 *
57 * <h3><a name="unicode">Unicode Character Representations</a></h3>
58 *
59 * <p>The {@code char} data type (and therefore the value that a
60 * {@code Character} object encapsulates) are based on the
61 * original Unicode specification, which defined characters as
62 * fixed-width 16-bit entities. The Unicode Standard has since been
63 * changed to allow for characters whose representation requires more
64 * than 16 bits.  The range of legal <em>code point</em>s is now
65 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
66 * (Refer to the <a
67 * href="http://www.unicode.org/reports/tr27/#notation"><i>
68 * definition</i></a> of the U+<i>n</i> notation in the Unicode
69 * Standard.)
70 *
71 * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is
72 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
73 * <a name="supplementary">Characters</a> whose code points are greater
74 * than U+FFFF are called <em>supplementary character</em>s.  The Java
75 * platform uses the UTF-16 representation in {@code char} arrays and
76 * in the {@code String} and {@code StringBuffer} classes. In
77 * this representation, supplementary characters are represented as a pair
78 * of {@code char} values, the first from the <em>high-surrogates</em>
79 * range, (&#92;uD800-&#92;uDBFF), the second from the
80 * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
81 *
82 * <p>A {@code char} value, therefore, represents Basic
83 * Multilingual Plane (BMP) code points, including the surrogate
84 * code points, or code units of the UTF-16 encoding. An
85 * {@code int} value represents all Unicode code points,
86 * including supplementary code points. The lower (least significant)
87 * 21 bits of {@code int} are used to represent Unicode code
88 * points and the upper (most significant) 11 bits must be zero.
89 * Unless otherwise specified, the behavior with respect to
90 * supplementary characters and surrogate {@code char} values is
91 * as follows:
92 *
93 * <ul>
94 * <li>The methods that only accept a {@code char} value cannot support
95 * supplementary characters. They treat {@code char} values from the
96 * surrogate ranges as undefined characters. For example,
97 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
98 * this specific value if followed by any low-surrogate value in a string
99 * would represent a letter.
100 *
101 * <li>The methods that accept an {@code int} value support all
102 * Unicode characters, including supplementary characters. For
103 * example, {@code Character.isLetter(0x2F81A)} returns
104 * {@code true} because the code point value represents a letter
105 * (a CJK ideograph).
106 * </ul>
107 *
108 * <p>In the Java SE API documentation, <em>Unicode code point</em> is
109 * used for character values in the range between U+0000 and U+10FFFF,
110 * and <em>Unicode code unit</em> is used for 16-bit
111 * {@code char} values that are code units of the <em>UTF-16</em>
112 * encoding. For more information on Unicode terminology, refer to the
113 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
114 *
115 * @author  Lee Boynton
116 * @author  Guy Steele
117 * @author  Akira Tanaka
118 * @author  Martin Buchholz
119 * @author  Ulf Zibis
120 * @since   1.0
121 */
122public final
123class Character implements java.io.Serializable, Comparable<Character> {
124    /**
125     * The minimum radix available for conversion to and from strings.
126     * The constant value of this field is the smallest value permitted
127     * for the radix argument in radix-conversion methods such as the
128     * {@code digit} method, the {@code forDigit} method, and the
129     * {@code toString} method of class {@code Integer}.
130     *
131     * @see     Character#digit(char, int)
132     * @see     Character#forDigit(int, int)
133     * @see     Integer#toString(int, int)
134     * @see     Integer#valueOf(String)
135     */
136    public static final int MIN_RADIX = 2;
137
138    /**
139     * The maximum radix available for conversion to and from strings.
140     * The constant value of this field is the largest value permitted
141     * for the radix argument in radix-conversion methods such as the
142     * {@code digit} method, the {@code forDigit} method, and the
143     * {@code toString} method of class {@code Integer}.
144     *
145     * @see     Character#digit(char, int)
146     * @see     Character#forDigit(int, int)
147     * @see     Integer#toString(int, int)
148     * @see     Integer#valueOf(String)
149     */
150    public static final int MAX_RADIX = 36;
151
152    /**
153     * The constant value of this field is the smallest value of type
154     * {@code char}, {@code '\u005Cu0000'}.
155     *
156     * @since   1.0.2
157     */
158    public static final char MIN_VALUE = '\u0000';
159
160    /**
161     * The constant value of this field is the largest value of type
162     * {@code char}, {@code '\u005CuFFFF'}.
163     *
164     * @since   1.0.2
165     */
166    public static final char MAX_VALUE = '\uFFFF';
167
168    /**
169     * The {@code Class} instance representing the primitive type
170     * {@code char}.
171     *
172     * @since   1.1
173     */
174    @SuppressWarnings("unchecked")
175    public static final Class<Character> TYPE = (Class<Character>) char[].class.getComponentType();
176
177    /*
178     * Normative general types
179     */
180
181    /*
182     * General character types
183     */
184
185    /**
186     * General category "Cn" in the Unicode specification.
187     * @since   1.1
188     */
189    public static final byte UNASSIGNED = 0;
190
191    /**
192     * General category "Lu" in the Unicode specification.
193     * @since   1.1
194     */
195    public static final byte UPPERCASE_LETTER = 1;
196
197    /**
198     * General category "Ll" in the Unicode specification.
199     * @since   1.1
200     */
201    public static final byte LOWERCASE_LETTER = 2;
202
203    /**
204     * General category "Lt" in the Unicode specification.
205     * @since   1.1
206     */
207    public static final byte TITLECASE_LETTER = 3;
208
209    /**
210     * General category "Lm" in the Unicode specification.
211     * @since   1.1
212     */
213    public static final byte MODIFIER_LETTER = 4;
214
215    /**
216     * General category "Lo" in the Unicode specification.
217     * @since   1.1
218     */
219    public static final byte OTHER_LETTER = 5;
220
221    /**
222     * General category "Mn" in the Unicode specification.
223     * @since   1.1
224     */
225    public static final byte NON_SPACING_MARK = 6;
226
227    /**
228     * General category "Me" in the Unicode specification.
229     * @since   1.1
230     */
231    public static final byte ENCLOSING_MARK = 7;
232
233    /**
234     * General category "Mc" in the Unicode specification.
235     * @since   1.1
236     */
237    public static final byte COMBINING_SPACING_MARK = 8;
238
239    /**
240     * General category "Nd" in the Unicode specification.
241     * @since   1.1
242     */
243    public static final byte DECIMAL_DIGIT_NUMBER        = 9;
244
245    /**
246     * General category "Nl" in the Unicode specification.
247     * @since   1.1
248     */
249    public static final byte LETTER_NUMBER = 10;
250
251    /**
252     * General category "No" in the Unicode specification.
253     * @since   1.1
254     */
255    public static final byte OTHER_NUMBER = 11;
256
257    /**
258     * General category "Zs" in the Unicode specification.
259     * @since   1.1
260     */
261    public static final byte SPACE_SEPARATOR = 12;
262
263    /**
264     * General category "Zl" in the Unicode specification.
265     * @since   1.1
266     */
267    public static final byte LINE_SEPARATOR = 13;
268
269    /**
270     * General category "Zp" in the Unicode specification.
271     * @since   1.1
272     */
273    public static final byte PARAGRAPH_SEPARATOR = 14;
274
275    /**
276     * General category "Cc" in the Unicode specification.
277     * @since   1.1
278     */
279    public static final byte CONTROL = 15;
280
281    /**
282     * General category "Cf" in the Unicode specification.
283     * @since   1.1
284     */
285    public static final byte FORMAT = 16;
286
287    /**
288     * General category "Co" in the Unicode specification.
289     * @since   1.1
290     */
291    public static final byte PRIVATE_USE = 18;
292
293    /**
294     * General category "Cs" in the Unicode specification.
295     * @since   1.1
296     */
297    public static final byte SURROGATE = 19;
298
299    /**
300     * General category "Pd" in the Unicode specification.
301     * @since   1.1
302     */
303    public static final byte DASH_PUNCTUATION = 20;
304
305    /**
306     * General category "Ps" in the Unicode specification.
307     * @since   1.1
308     */
309    public static final byte START_PUNCTUATION = 21;
310
311    /**
312     * General category "Pe" in the Unicode specification.
313     * @since   1.1
314     */
315    public static final byte END_PUNCTUATION = 22;
316
317    /**
318     * General category "Pc" in the Unicode specification.
319     * @since   1.1
320     */
321    public static final byte CONNECTOR_PUNCTUATION = 23;
322
323    /**
324     * General category "Po" in the Unicode specification.
325     * @since   1.1
326     */
327    public static final byte OTHER_PUNCTUATION = 24;
328
329    /**
330     * General category "Sm" in the Unicode specification.
331     * @since   1.1
332     */
333    public static final byte MATH_SYMBOL = 25;
334
335    /**
336     * General category "Sc" in the Unicode specification.
337     * @since   1.1
338     */
339    public static final byte CURRENCY_SYMBOL = 26;
340
341    /**
342     * General category "Sk" in the Unicode specification.
343     * @since   1.1
344     */
345    public static final byte MODIFIER_SYMBOL = 27;
346
347    /**
348     * General category "So" in the Unicode specification.
349     * @since   1.1
350     */
351    public static final byte OTHER_SYMBOL = 28;
352
353    /**
354     * General category "Pi" in the Unicode specification.
355     * @since   1.4
356     */
357    public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
358
359    /**
360     * General category "Pf" in the Unicode specification.
361     * @since   1.4
362     */
363    public static final byte FINAL_QUOTE_PUNCTUATION = 30;
364
365    /**
366     * Error flag. Use int (code point) to avoid confusion with U+FFFF.
367     */
368    static final int ERROR = 0xFFFFFFFF;
369
370
371    /**
372     * Undefined bidirectional character type. Undefined {@code char}
373     * values have undefined directionality in the Unicode specification.
374     * @since 1.4
375     */
376    public static final byte DIRECTIONALITY_UNDEFINED = -1;
377
378    /**
379     * Strong bidirectional character type "L" in the Unicode specification.
380     * @since 1.4
381     */
382    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
383
384    /**
385     * Strong bidirectional character type "R" in the Unicode specification.
386     * @since 1.4
387     */
388    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
389
390    /**
391    * Strong bidirectional character type "AL" in the Unicode specification.
392     * @since 1.4
393     */
394    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
395
396    /**
397     * Weak bidirectional character type "EN" in the Unicode specification.
398     * @since 1.4
399     */
400    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
401
402    /**
403     * Weak bidirectional character type "ES" in the Unicode specification.
404     * @since 1.4
405     */
406    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
407
408    /**
409     * Weak bidirectional character type "ET" in the Unicode specification.
410     * @since 1.4
411     */
412    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
413
414    /**
415     * Weak bidirectional character type "AN" in the Unicode specification.
416     * @since 1.4
417     */
418    public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
419
420    /**
421     * Weak bidirectional character type "CS" in the Unicode specification.
422     * @since 1.4
423     */
424    public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
425
426    /**
427     * Weak bidirectional character type "NSM" in the Unicode specification.
428     * @since 1.4
429     */
430    public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
431
432    /**
433     * Weak bidirectional character type "BN" in the Unicode specification.
434     * @since 1.4
435     */
436    public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
437
438    /**
439     * Neutral bidirectional character type "B" in the Unicode specification.
440     * @since 1.4
441     */
442    public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
443
444    /**
445     * Neutral bidirectional character type "S" in the Unicode specification.
446     * @since 1.4
447     */
448    public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
449
450    /**
451     * Neutral bidirectional character type "WS" in the Unicode specification.
452     * @since 1.4
453     */
454    public static final byte DIRECTIONALITY_WHITESPACE = 12;
455
456    /**
457     * Neutral bidirectional character type "ON" in the Unicode specification.
458     * @since 1.4
459     */
460    public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
461
462    /**
463     * Strong bidirectional character type "LRE" in the Unicode specification.
464     * @since 1.4
465     */
466    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
467
468    /**
469     * Strong bidirectional character type "LRO" in the Unicode specification.
470     * @since 1.4
471     */
472    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
473
474    /**
475     * Strong bidirectional character type "RLE" in the Unicode specification.
476     * @since 1.4
477     */
478    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
479
480    /**
481     * Strong bidirectional character type "RLO" in the Unicode specification.
482     * @since 1.4
483     */
484    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
485
486    /**
487     * Weak bidirectional character type "PDF" in the Unicode specification.
488     * @since 1.4
489     */
490    public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
491
492    /**
493     * The minimum value of a
494     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
495     * Unicode high-surrogate code unit</a>
496     * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
497     * A high-surrogate is also known as a <i>leading-surrogate</i>.
498     *
499     * @since 1.5
500     */
501    public static final char MIN_HIGH_SURROGATE = '\uD800';
502
503    /**
504     * The maximum value of a
505     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
506     * Unicode high-surrogate code unit</a>
507     * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
508     * A high-surrogate is also known as a <i>leading-surrogate</i>.
509     *
510     * @since 1.5
511     */
512    public static final char MAX_HIGH_SURROGATE = '\uDBFF';
513
514    /**
515     * The minimum value of a
516     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
517     * Unicode low-surrogate code unit</a>
518     * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
519     * A low-surrogate is also known as a <i>trailing-surrogate</i>.
520     *
521     * @since 1.5
522     */
523    public static final char MIN_LOW_SURROGATE  = '\uDC00';
524
525    /**
526     * The maximum value of a
527     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
528     * Unicode low-surrogate code unit</a>
529     * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
530     * A low-surrogate is also known as a <i>trailing-surrogate</i>.
531     *
532     * @since 1.5
533     */
534    public static final char MAX_LOW_SURROGATE  = '\uDFFF';
535
536    /**
537     * The minimum value of a Unicode surrogate code unit in the
538     * UTF-16 encoding, constant {@code '\u005CuD800'}.
539     *
540     * @since 1.5
541     */
542    public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
543
544    /**
545     * The maximum value of a Unicode surrogate code unit in the
546     * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
547     *
548     * @since 1.5
549     */
550    public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
551
552    /**
553     * The minimum value of a
554     * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
555     * Unicode supplementary code point</a>, constant {@code U+10000}.
556     *
557     * @since 1.5
558     */
559    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
560
561    /**
562     * The minimum value of a
563     * <a href="http://www.unicode.org/glossary/#code_point">
564     * Unicode code point</a>, constant {@code U+0000}.
565     *
566     * @since 1.5
567     */
568    public static final int MIN_CODE_POINT = 0x000000;
569
570    /**
571     * The maximum value of a
572     * <a href="http://www.unicode.org/glossary/#code_point">
573     * Unicode code point</a>, constant {@code U+10FFFF}.
574     *
575     * @since 1.5
576     */
577    public static final int MAX_CODE_POINT = 0X10FFFF;
578
579    private static final byte[] DIRECTIONALITY = new byte[] {
580            DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
581            DIRECTIONALITY_EUROPEAN_NUMBER,
582            DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
583            DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
584            DIRECTIONALITY_ARABIC_NUMBER,
585            DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
586            DIRECTIONALITY_PARAGRAPH_SEPARATOR,
587            DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
588            DIRECTIONALITY_OTHER_NEUTRALS,
589            DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
590            DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
591            DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
592            DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
593            DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
594            DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
595            DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
596
597    /**
598     * Instances of this class represent particular subsets of the Unicode
599     * character set.  The only family of subsets defined in the
600     * {@code Character} class is {@link Character.UnicodeBlock}.
601     * Other portions of the Java API may define other subsets for their
602     * own purposes.
603     *
604     * @since 1.2
605     */
606    public static class Subset  {
607
608        private String name;
609
610        /**
611         * Constructs a new {@code Subset} instance.
612         *
613         * @param  name  The name of this subset
614         * @exception NullPointerException if name is {@code null}
615         */
616        protected Subset(String name) {
617            if (name == null) {
618                throw new NullPointerException("name");
619            }
620            this.name = name;
621        }
622
623        /**
624         * Compares two {@code Subset} objects for equality.
625         * This method returns {@code true} if and only if
626         * {@code this} and the argument refer to the same
627         * object; since this method is {@code final}, this
628         * guarantee holds for all subclasses.
629         */
630        public final boolean equals(Object obj) {
631            return (this == obj);
632        }
633
634        /**
635         * Returns the standard hash code as defined by the
636         * {@link Object#hashCode} method.  This method
637         * is {@code final} in order to ensure that the
638         * {@code equals} and {@code hashCode} methods will
639         * be consistent in all subclasses.
640         */
641        public final int hashCode() {
642            return super.hashCode();
643        }
644
645        /**
646         * Returns the name of this subset.
647         */
648        public final String toString() {
649            return name;
650        }
651    }
652
653    // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
654    // for the latest specification of Unicode Blocks.
655
656    /**
657     * A family of character subsets representing the character blocks in the
658     * Unicode specification. Character blocks generally define characters
659     * used for a specific script or purpose. A character is contained by
660     * at most one Unicode block.
661     *
662     * @since 1.2
663     */
664    public static final class UnicodeBlock extends Subset {
665
666        private static Map<String, UnicodeBlock> map = new HashMap<>(256);
667
668        /**
669         * Creates a UnicodeBlock with the given identifier name.
670         * This name must be the same as the block identifier.
671         */
672        private UnicodeBlock(String idName) {
673            super(idName);
674            map.put(idName, this);
675        }
676
677        /**
678         * Creates a UnicodeBlock with the given identifier name and
679         * alias name.
680         */
681        private UnicodeBlock(String idName, String alias) {
682            this(idName);
683            map.put(alias, this);
684        }
685
686        /**
687         * Creates a UnicodeBlock with the given identifier name and
688         * alias names.
689         */
690        private UnicodeBlock(String idName, String... aliases) {
691            this(idName);
692            for (String alias : aliases)
693                map.put(alias, this);
694        }
695
696        /**
697         * Constant for the "Basic Latin" Unicode character block.
698         * @since 1.2
699         */
700        public static final UnicodeBlock  BASIC_LATIN =
701            new UnicodeBlock("BASIC_LATIN",
702                             "BASIC LATIN",
703                             "BASICLATIN");
704
705        /**
706         * Constant for the "Latin-1 Supplement" Unicode character block.
707         * @since 1.2
708         */
709        public static final UnicodeBlock LATIN_1_SUPPLEMENT =
710            new UnicodeBlock("LATIN_1_SUPPLEMENT",
711                             "LATIN-1 SUPPLEMENT",
712                             "LATIN-1SUPPLEMENT");
713
714        /**
715         * Constant for the "Latin Extended-A" Unicode character block.
716         * @since 1.2
717         */
718        public static final UnicodeBlock LATIN_EXTENDED_A =
719            new UnicodeBlock("LATIN_EXTENDED_A",
720                             "LATIN EXTENDED-A",
721                             "LATINEXTENDED-A");
722
723        /**
724         * Constant for the "Latin Extended-B" Unicode character block.
725         * @since 1.2
726         */
727        public static final UnicodeBlock LATIN_EXTENDED_B =
728            new UnicodeBlock("LATIN_EXTENDED_B",
729                             "LATIN EXTENDED-B",
730                             "LATINEXTENDED-B");
731
732        /**
733         * Constant for the "IPA Extensions" Unicode character block.
734         * @since 1.2
735         */
736        public static final UnicodeBlock IPA_EXTENSIONS =
737            new UnicodeBlock("IPA_EXTENSIONS",
738                             "IPA EXTENSIONS",
739                             "IPAEXTENSIONS");
740
741        /**
742         * Constant for the "Spacing Modifier Letters" Unicode character block.
743         * @since 1.2
744         */
745        public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
746            new UnicodeBlock("SPACING_MODIFIER_LETTERS",
747                             "SPACING MODIFIER LETTERS",
748                             "SPACINGMODIFIERLETTERS");
749
750        /**
751         * Constant for the "Combining Diacritical Marks" Unicode character block.
752         * @since 1.2
753         */
754        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
755            new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
756                             "COMBINING DIACRITICAL MARKS",
757                             "COMBININGDIACRITICALMARKS");
758
759        /**
760         * Constant for the "Greek and Coptic" Unicode character block.
761         * <p>
762         * This block was previously known as the "Greek" block.
763         *
764         * @since 1.2
765         */
766        public static final UnicodeBlock GREEK =
767            new UnicodeBlock("GREEK",
768                             "GREEK AND COPTIC",
769                             "GREEKANDCOPTIC");
770
771        /**
772         * Constant for the "Cyrillic" Unicode character block.
773         * @since 1.2
774         */
775        public static final UnicodeBlock CYRILLIC =
776            new UnicodeBlock("CYRILLIC");
777
778        /**
779         * Constant for the "Armenian" Unicode character block.
780         * @since 1.2
781         */
782        public static final UnicodeBlock ARMENIAN =
783            new UnicodeBlock("ARMENIAN");
784
785        /**
786         * Constant for the "Hebrew" Unicode character block.
787         * @since 1.2
788         */
789        public static final UnicodeBlock HEBREW =
790            new UnicodeBlock("HEBREW");
791
792        /**
793         * Constant for the "Arabic" Unicode character block.
794         * @since 1.2
795         */
796        public static final UnicodeBlock ARABIC =
797            new UnicodeBlock("ARABIC");
798
799        /**
800         * Constant for the "Devanagari" Unicode character block.
801         * @since 1.2
802         */
803        public static final UnicodeBlock DEVANAGARI =
804            new UnicodeBlock("DEVANAGARI");
805
806        /**
807         * Constant for the "Bengali" Unicode character block.
808         * @since 1.2
809         */
810        public static final UnicodeBlock BENGALI =
811            new UnicodeBlock("BENGALI");
812
813        /**
814         * Constant for the "Gurmukhi" Unicode character block.
815         * @since 1.2
816         */
817        public static final UnicodeBlock GURMUKHI =
818            new UnicodeBlock("GURMUKHI");
819
820        /**
821         * Constant for the "Gujarati" Unicode character block.
822         * @since 1.2
823         */
824        public static final UnicodeBlock GUJARATI =
825            new UnicodeBlock("GUJARATI");
826
827        /**
828         * Constant for the "Oriya" Unicode character block.
829         * @since 1.2
830         */
831        public static final UnicodeBlock ORIYA =
832            new UnicodeBlock("ORIYA");
833
834        /**
835         * Constant for the "Tamil" Unicode character block.
836         * @since 1.2
837         */
838        public static final UnicodeBlock TAMIL =
839            new UnicodeBlock("TAMIL");
840
841        /**
842         * Constant for the "Telugu" Unicode character block.
843         * @since 1.2
844         */
845        public static final UnicodeBlock TELUGU =
846            new UnicodeBlock("TELUGU");
847
848        /**
849         * Constant for the "Kannada" Unicode character block.
850         * @since 1.2
851         */
852        public static final UnicodeBlock KANNADA =
853            new UnicodeBlock("KANNADA");
854
855        /**
856         * Constant for the "Malayalam" Unicode character block.
857         * @since 1.2
858         */
859        public static final UnicodeBlock MALAYALAM =
860            new UnicodeBlock("MALAYALAM");
861
862        /**
863         * Constant for the "Thai" Unicode character block.
864         * @since 1.2
865         */
866        public static final UnicodeBlock THAI =
867            new UnicodeBlock("THAI");
868
869        /**
870         * Constant for the "Lao" Unicode character block.
871         * @since 1.2
872         */
873        public static final UnicodeBlock LAO =
874            new UnicodeBlock("LAO");
875
876        /**
877         * Constant for the "Tibetan" Unicode character block.
878         * @since 1.2
879         */
880        public static final UnicodeBlock TIBETAN =
881            new UnicodeBlock("TIBETAN");
882
883        /**
884         * Constant for the "Georgian" Unicode character block.
885         * @since 1.2
886         */
887        public static final UnicodeBlock GEORGIAN =
888            new UnicodeBlock("GEORGIAN");
889
890        /**
891         * Constant for the "Hangul Jamo" Unicode character block.
892         * @since 1.2
893         */
894        public static final UnicodeBlock HANGUL_JAMO =
895            new UnicodeBlock("HANGUL_JAMO",
896                             "HANGUL JAMO",
897                             "HANGULJAMO");
898
899        /**
900         * Constant for the "Latin Extended Additional" Unicode character block.
901         * @since 1.2
902         */
903        public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
904            new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
905                             "LATIN EXTENDED ADDITIONAL",
906                             "LATINEXTENDEDADDITIONAL");
907
908        /**
909         * Constant for the "Greek Extended" Unicode character block.
910         * @since 1.2
911         */
912        public static final UnicodeBlock GREEK_EXTENDED =
913            new UnicodeBlock("GREEK_EXTENDED",
914                             "GREEK EXTENDED",
915                             "GREEKEXTENDED");
916
917        /**
918         * Constant for the "General Punctuation" Unicode character block.
919         * @since 1.2
920         */
921        public static final UnicodeBlock GENERAL_PUNCTUATION =
922            new UnicodeBlock("GENERAL_PUNCTUATION",
923                             "GENERAL PUNCTUATION",
924                             "GENERALPUNCTUATION");
925
926        /**
927         * Constant for the "Superscripts and Subscripts" Unicode character
928         * block.
929         * @since 1.2
930         */
931        public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
932            new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
933                             "SUPERSCRIPTS AND SUBSCRIPTS",
934                             "SUPERSCRIPTSANDSUBSCRIPTS");
935
936        /**
937         * Constant for the "Currency Symbols" Unicode character block.
938         * @since 1.2
939         */
940        public static final UnicodeBlock CURRENCY_SYMBOLS =
941            new UnicodeBlock("CURRENCY_SYMBOLS",
942                             "CURRENCY SYMBOLS",
943                             "CURRENCYSYMBOLS");
944
945        /**
946         * Constant for the "Combining Diacritical Marks for Symbols" Unicode
947         * character block.
948         * <p>
949         * This block was previously known as "Combining Marks for Symbols".
950         * @since 1.2
951         */
952        public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
953            new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
954                             "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
955                             "COMBININGDIACRITICALMARKSFORSYMBOLS",
956                             "COMBINING MARKS FOR SYMBOLS",
957                             "COMBININGMARKSFORSYMBOLS");
958
959        /**
960         * Constant for the "Letterlike Symbols" Unicode character block.
961         * @since 1.2
962         */
963        public static final UnicodeBlock LETTERLIKE_SYMBOLS =
964            new UnicodeBlock("LETTERLIKE_SYMBOLS",
965                             "LETTERLIKE SYMBOLS",
966                             "LETTERLIKESYMBOLS");
967
968        /**
969         * Constant for the "Number Forms" Unicode character block.
970         * @since 1.2
971         */
972        public static final UnicodeBlock NUMBER_FORMS =
973            new UnicodeBlock("NUMBER_FORMS",
974                             "NUMBER FORMS",
975                             "NUMBERFORMS");
976
977        /**
978         * Constant for the "Arrows" Unicode character block.
979         * @since 1.2
980         */
981        public static final UnicodeBlock ARROWS =
982            new UnicodeBlock("ARROWS");
983
984        /**
985         * Constant for the "Mathematical Operators" Unicode character block.
986         * @since 1.2
987         */
988        public static final UnicodeBlock MATHEMATICAL_OPERATORS =
989            new UnicodeBlock("MATHEMATICAL_OPERATORS",
990                             "MATHEMATICAL OPERATORS",
991                             "MATHEMATICALOPERATORS");
992
993        /**
994         * Constant for the "Miscellaneous Technical" Unicode character block.
995         * @since 1.2
996         */
997        public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
998            new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
999                             "MISCELLANEOUS TECHNICAL",
1000                             "MISCELLANEOUSTECHNICAL");
1001
1002        /**
1003         * Constant for the "Control Pictures" Unicode character block.
1004         * @since 1.2
1005         */
1006        public static final UnicodeBlock CONTROL_PICTURES =
1007            new UnicodeBlock("CONTROL_PICTURES",
1008                             "CONTROL PICTURES",
1009                             "CONTROLPICTURES");
1010
1011        /**
1012         * Constant for the "Optical Character Recognition" Unicode character block.
1013         * @since 1.2
1014         */
1015        public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1016            new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1017                             "OPTICAL CHARACTER RECOGNITION",
1018                             "OPTICALCHARACTERRECOGNITION");
1019
1020        /**
1021         * Constant for the "Enclosed Alphanumerics" Unicode character block.
1022         * @since 1.2
1023         */
1024        public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1025            new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1026                             "ENCLOSED ALPHANUMERICS",
1027                             "ENCLOSEDALPHANUMERICS");
1028
1029        /**
1030         * Constant for the "Box Drawing" Unicode character block.
1031         * @since 1.2
1032         */
1033        public static final UnicodeBlock BOX_DRAWING =
1034            new UnicodeBlock("BOX_DRAWING",
1035                             "BOX DRAWING",
1036                             "BOXDRAWING");
1037
1038        /**
1039         * Constant for the "Block Elements" Unicode character block.
1040         * @since 1.2
1041         */
1042        public static final UnicodeBlock BLOCK_ELEMENTS =
1043            new UnicodeBlock("BLOCK_ELEMENTS",
1044                             "BLOCK ELEMENTS",
1045                             "BLOCKELEMENTS");
1046
1047        /**
1048         * Constant for the "Geometric Shapes" Unicode character block.
1049         * @since 1.2
1050         */
1051        public static final UnicodeBlock GEOMETRIC_SHAPES =
1052            new UnicodeBlock("GEOMETRIC_SHAPES",
1053                             "GEOMETRIC SHAPES",
1054                             "GEOMETRICSHAPES");
1055
1056        /**
1057         * Constant for the "Miscellaneous Symbols" Unicode character block.
1058         * @since 1.2
1059         */
1060        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1061            new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1062                             "MISCELLANEOUS SYMBOLS",
1063                             "MISCELLANEOUSSYMBOLS");
1064
1065        /**
1066         * Constant for the "Dingbats" Unicode character block.
1067         * @since 1.2
1068         */
1069        public static final UnicodeBlock DINGBATS =
1070            new UnicodeBlock("DINGBATS");
1071
1072        /**
1073         * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1074         * @since 1.2
1075         */
1076        public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1077            new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1078                             "CJK SYMBOLS AND PUNCTUATION",
1079                             "CJKSYMBOLSANDPUNCTUATION");
1080
1081        /**
1082         * Constant for the "Hiragana" Unicode character block.
1083         * @since 1.2
1084         */
1085        public static final UnicodeBlock HIRAGANA =
1086            new UnicodeBlock("HIRAGANA");
1087
1088        /**
1089         * Constant for the "Katakana" Unicode character block.
1090         * @since 1.2
1091         */
1092        public static final UnicodeBlock KATAKANA =
1093            new UnicodeBlock("KATAKANA");
1094
1095        /**
1096         * Constant for the "Bopomofo" Unicode character block.
1097         * @since 1.2
1098         */
1099        public static final UnicodeBlock BOPOMOFO =
1100            new UnicodeBlock("BOPOMOFO");
1101
1102        /**
1103         * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1104         * @since 1.2
1105         */
1106        public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1107            new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1108                             "HANGUL COMPATIBILITY JAMO",
1109                             "HANGULCOMPATIBILITYJAMO");
1110
1111        /**
1112         * Constant for the "Kanbun" Unicode character block.
1113         * @since 1.2
1114         */
1115        public static final UnicodeBlock KANBUN =
1116            new UnicodeBlock("KANBUN");
1117
1118        /**
1119         * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1120         * @since 1.2
1121         */
1122        public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1123            new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1124                             "ENCLOSED CJK LETTERS AND MONTHS",
1125                             "ENCLOSEDCJKLETTERSANDMONTHS");
1126
1127        /**
1128         * Constant for the "CJK Compatibility" Unicode character block.
1129         * @since 1.2
1130         */
1131        public static final UnicodeBlock CJK_COMPATIBILITY =
1132            new UnicodeBlock("CJK_COMPATIBILITY",
1133                             "CJK COMPATIBILITY",
1134                             "CJKCOMPATIBILITY");
1135
1136        /**
1137         * Constant for the "CJK Unified Ideographs" Unicode character block.
1138         * @since 1.2
1139         */
1140        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1141            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1142                             "CJK UNIFIED IDEOGRAPHS",
1143                             "CJKUNIFIEDIDEOGRAPHS");
1144
1145        /**
1146         * Constant for the "Hangul Syllables" Unicode character block.
1147         * @since 1.2
1148         */
1149        public static final UnicodeBlock HANGUL_SYLLABLES =
1150            new UnicodeBlock("HANGUL_SYLLABLES",
1151                             "HANGUL SYLLABLES",
1152                             "HANGULSYLLABLES");
1153
1154        /**
1155         * Constant for the "Private Use Area" Unicode character block.
1156         * @since 1.2
1157         */
1158        public static final UnicodeBlock PRIVATE_USE_AREA =
1159            new UnicodeBlock("PRIVATE_USE_AREA",
1160                             "PRIVATE USE AREA",
1161                             "PRIVATEUSEAREA");
1162
1163        /**
1164         * Constant for the "CJK Compatibility Ideographs" Unicode character
1165         * block.
1166         * @since 1.2
1167         */
1168        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1169            new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1170                             "CJK COMPATIBILITY IDEOGRAPHS",
1171                             "CJKCOMPATIBILITYIDEOGRAPHS");
1172
1173        /**
1174         * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1175         * @since 1.2
1176         */
1177        public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1178            new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1179                             "ALPHABETIC PRESENTATION FORMS",
1180                             "ALPHABETICPRESENTATIONFORMS");
1181
1182        /**
1183         * Constant for the "Arabic Presentation Forms-A" Unicode character
1184         * block.
1185         * @since 1.2
1186         */
1187        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1188            new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1189                             "ARABIC PRESENTATION FORMS-A",
1190                             "ARABICPRESENTATIONFORMS-A");
1191
1192        /**
1193         * Constant for the "Combining Half Marks" Unicode character block.
1194         * @since 1.2
1195         */
1196        public static final UnicodeBlock COMBINING_HALF_MARKS =
1197            new UnicodeBlock("COMBINING_HALF_MARKS",
1198                             "COMBINING HALF MARKS",
1199                             "COMBININGHALFMARKS");
1200
1201        /**
1202         * Constant for the "CJK Compatibility Forms" Unicode character block.
1203         * @since 1.2
1204         */
1205        public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1206            new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1207                             "CJK COMPATIBILITY FORMS",
1208                             "CJKCOMPATIBILITYFORMS");
1209
1210        /**
1211         * Constant for the "Small Form Variants" Unicode character block.
1212         * @since 1.2
1213         */
1214        public static final UnicodeBlock SMALL_FORM_VARIANTS =
1215            new UnicodeBlock("SMALL_FORM_VARIANTS",
1216                             "SMALL FORM VARIANTS",
1217                             "SMALLFORMVARIANTS");
1218
1219        /**
1220         * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1221         * @since 1.2
1222         */
1223        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1224            new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1225                             "ARABIC PRESENTATION FORMS-B",
1226                             "ARABICPRESENTATIONFORMS-B");
1227
1228        /**
1229         * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1230         * block.
1231         * @since 1.2
1232         */
1233        public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1234            new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1235                             "HALFWIDTH AND FULLWIDTH FORMS",
1236                             "HALFWIDTHANDFULLWIDTHFORMS");
1237
1238        /**
1239         * Constant for the "Specials" Unicode character block.
1240         * @since 1.2
1241         */
1242        public static final UnicodeBlock SPECIALS =
1243            new UnicodeBlock("SPECIALS");
1244
1245        /**
1246         * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1247         *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1248         *             {@link #LOW_SURROGATES}. These new constants match
1249         *             the block definitions of the Unicode Standard.
1250         *             The {@link #of(char)} and {@link #of(int)} methods
1251         *             return the new constants, not SURROGATES_AREA.
1252         */
1253        @Deprecated
1254        public static final UnicodeBlock SURROGATES_AREA =
1255            new UnicodeBlock("SURROGATES_AREA");
1256
1257        /**
1258         * Constant for the "Syriac" Unicode character block.
1259         * @since 1.4
1260         */
1261        public static final UnicodeBlock SYRIAC =
1262            new UnicodeBlock("SYRIAC");
1263
1264        /**
1265         * Constant for the "Thaana" Unicode character block.
1266         * @since 1.4
1267         */
1268        public static final UnicodeBlock THAANA =
1269            new UnicodeBlock("THAANA");
1270
1271        /**
1272         * Constant for the "Sinhala" Unicode character block.
1273         * @since 1.4
1274         */
1275        public static final UnicodeBlock SINHALA =
1276            new UnicodeBlock("SINHALA");
1277
1278        /**
1279         * Constant for the "Myanmar" Unicode character block.
1280         * @since 1.4
1281         */
1282        public static final UnicodeBlock MYANMAR =
1283            new UnicodeBlock("MYANMAR");
1284
1285        /**
1286         * Constant for the "Ethiopic" Unicode character block.
1287         * @since 1.4
1288         */
1289        public static final UnicodeBlock ETHIOPIC =
1290            new UnicodeBlock("ETHIOPIC");
1291
1292        /**
1293         * Constant for the "Cherokee" Unicode character block.
1294         * @since 1.4
1295         */
1296        public static final UnicodeBlock CHEROKEE =
1297            new UnicodeBlock("CHEROKEE");
1298
1299        /**
1300         * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1301         * @since 1.4
1302         */
1303        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1304            new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1305                             "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1306                             "UNIFIEDCANADIANABORIGINALSYLLABICS");
1307
1308        /**
1309         * Constant for the "Ogham" Unicode character block.
1310         * @since 1.4
1311         */
1312        public static final UnicodeBlock OGHAM =
1313            new UnicodeBlock("OGHAM");
1314
1315        /**
1316         * Constant for the "Runic" Unicode character block.
1317         * @since 1.4
1318         */
1319        public static final UnicodeBlock RUNIC =
1320            new UnicodeBlock("RUNIC");
1321
1322        /**
1323         * Constant for the "Khmer" Unicode character block.
1324         * @since 1.4
1325         */
1326        public static final UnicodeBlock KHMER =
1327            new UnicodeBlock("KHMER");
1328
1329        /**
1330         * Constant for the "Mongolian" Unicode character block.
1331         * @since 1.4
1332         */
1333        public static final UnicodeBlock MONGOLIAN =
1334            new UnicodeBlock("MONGOLIAN");
1335
1336        /**
1337         * Constant for the "Braille Patterns" Unicode character block.
1338         * @since 1.4
1339         */
1340        public static final UnicodeBlock BRAILLE_PATTERNS =
1341            new UnicodeBlock("BRAILLE_PATTERNS",
1342                             "BRAILLE PATTERNS",
1343                             "BRAILLEPATTERNS");
1344
1345        /**
1346         * Constant for the "CJK Radicals Supplement" Unicode character block.
1347         * @since 1.4
1348         */
1349        public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1350            new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1351                             "CJK RADICALS SUPPLEMENT",
1352                             "CJKRADICALSSUPPLEMENT");
1353
1354        /**
1355         * Constant for the "Kangxi Radicals" Unicode character block.
1356         * @since 1.4
1357         */
1358        public static final UnicodeBlock KANGXI_RADICALS =
1359            new UnicodeBlock("KANGXI_RADICALS",
1360                             "KANGXI RADICALS",
1361                             "KANGXIRADICALS");
1362
1363        /**
1364         * Constant for the "Ideographic Description Characters" Unicode character block.
1365         * @since 1.4
1366         */
1367        public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1368            new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1369                             "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1370                             "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1371
1372        /**
1373         * Constant for the "Bopomofo Extended" Unicode character block.
1374         * @since 1.4
1375         */
1376        public static final UnicodeBlock BOPOMOFO_EXTENDED =
1377            new UnicodeBlock("BOPOMOFO_EXTENDED",
1378                             "BOPOMOFO EXTENDED",
1379                             "BOPOMOFOEXTENDED");
1380
1381        /**
1382         * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1383         * @since 1.4
1384         */
1385        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1386            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1387                             "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1388                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1389
1390        /**
1391         * Constant for the "Yi Syllables" Unicode character block.
1392         * @since 1.4
1393         */
1394        public static final UnicodeBlock YI_SYLLABLES =
1395            new UnicodeBlock("YI_SYLLABLES",
1396                             "YI SYLLABLES",
1397                             "YISYLLABLES");
1398
1399        /**
1400         * Constant for the "Yi Radicals" Unicode character block.
1401         * @since 1.4
1402         */
1403        public static final UnicodeBlock YI_RADICALS =
1404            new UnicodeBlock("YI_RADICALS",
1405                             "YI RADICALS",
1406                             "YIRADICALS");
1407
1408        /**
1409         * Constant for the "Cyrillic Supplementary" Unicode character block.
1410         * @since 1.5
1411         */
1412        public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1413            new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1414                             "CYRILLIC SUPPLEMENTARY",
1415                             "CYRILLICSUPPLEMENTARY",
1416                             "CYRILLIC SUPPLEMENT",
1417                             "CYRILLICSUPPLEMENT");
1418
1419        /**
1420         * Constant for the "Tagalog" Unicode character block.
1421         * @since 1.5
1422         */
1423        public static final UnicodeBlock TAGALOG =
1424            new UnicodeBlock("TAGALOG");
1425
1426        /**
1427         * Constant for the "Hanunoo" Unicode character block.
1428         * @since 1.5
1429         */
1430        public static final UnicodeBlock HANUNOO =
1431            new UnicodeBlock("HANUNOO");
1432
1433        /**
1434         * Constant for the "Buhid" Unicode character block.
1435         * @since 1.5
1436         */
1437        public static final UnicodeBlock BUHID =
1438            new UnicodeBlock("BUHID");
1439
1440        /**
1441         * Constant for the "Tagbanwa" Unicode character block.
1442         * @since 1.5
1443         */
1444        public static final UnicodeBlock TAGBANWA =
1445            new UnicodeBlock("TAGBANWA");
1446
1447        /**
1448         * Constant for the "Limbu" Unicode character block.
1449         * @since 1.5
1450         */
1451        public static final UnicodeBlock LIMBU =
1452            new UnicodeBlock("LIMBU");
1453
1454        /**
1455         * Constant for the "Tai Le" Unicode character block.
1456         * @since 1.5
1457         */
1458        public static final UnicodeBlock TAI_LE =
1459            new UnicodeBlock("TAI_LE",
1460                             "TAI LE",
1461                             "TAILE");
1462
1463        /**
1464         * Constant for the "Khmer Symbols" Unicode character block.
1465         * @since 1.5
1466         */
1467        public static final UnicodeBlock KHMER_SYMBOLS =
1468            new UnicodeBlock("KHMER_SYMBOLS",
1469                             "KHMER SYMBOLS",
1470                             "KHMERSYMBOLS");
1471
1472        /**
1473         * Constant for the "Phonetic Extensions" Unicode character block.
1474         * @since 1.5
1475         */
1476        public static final UnicodeBlock PHONETIC_EXTENSIONS =
1477            new UnicodeBlock("PHONETIC_EXTENSIONS",
1478                             "PHONETIC EXTENSIONS",
1479                             "PHONETICEXTENSIONS");
1480
1481        /**
1482         * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1483         * @since 1.5
1484         */
1485        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1486            new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1487                             "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1488                             "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1489
1490        /**
1491         * Constant for the "Supplemental Arrows-A" Unicode character block.
1492         * @since 1.5
1493         */
1494        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1495            new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1496                             "SUPPLEMENTAL ARROWS-A",
1497                             "SUPPLEMENTALARROWS-A");
1498
1499        /**
1500         * Constant for the "Supplemental Arrows-B" Unicode character block.
1501         * @since 1.5
1502         */
1503        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1504            new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1505                             "SUPPLEMENTAL ARROWS-B",
1506                             "SUPPLEMENTALARROWS-B");
1507
1508        /**
1509         * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1510         * character block.
1511         * @since 1.5
1512         */
1513        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1514            new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1515                             "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1516                             "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1517
1518        /**
1519         * Constant for the "Supplemental Mathematical Operators" Unicode
1520         * character block.
1521         * @since 1.5
1522         */
1523        public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1524            new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1525                             "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1526                             "SUPPLEMENTALMATHEMATICALOPERATORS");
1527
1528        /**
1529         * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1530         * block.
1531         * @since 1.5
1532         */
1533        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1534            new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1535                             "MISCELLANEOUS SYMBOLS AND ARROWS",
1536                             "MISCELLANEOUSSYMBOLSANDARROWS");
1537
1538        /**
1539         * Constant for the "Katakana Phonetic Extensions" Unicode character
1540         * block.
1541         * @since 1.5
1542         */
1543        public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1544            new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1545                             "KATAKANA PHONETIC EXTENSIONS",
1546                             "KATAKANAPHONETICEXTENSIONS");
1547
1548        /**
1549         * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1550         * @since 1.5
1551         */
1552        public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1553            new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1554                             "YIJING HEXAGRAM SYMBOLS",
1555                             "YIJINGHEXAGRAMSYMBOLS");
1556
1557        /**
1558         * Constant for the "Variation Selectors" Unicode character block.
1559         * @since 1.5
1560         */
1561        public static final UnicodeBlock VARIATION_SELECTORS =
1562            new UnicodeBlock("VARIATION_SELECTORS",
1563                             "VARIATION SELECTORS",
1564                             "VARIATIONSELECTORS");
1565
1566        /**
1567         * Constant for the "Linear B Syllabary" Unicode character block.
1568         * @since 1.5
1569         */
1570        public static final UnicodeBlock LINEAR_B_SYLLABARY =
1571            new UnicodeBlock("LINEAR_B_SYLLABARY",
1572                             "LINEAR B SYLLABARY",
1573                             "LINEARBSYLLABARY");
1574
1575        /**
1576         * Constant for the "Linear B Ideograms" Unicode character block.
1577         * @since 1.5
1578         */
1579        public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1580            new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1581                             "LINEAR B IDEOGRAMS",
1582                             "LINEARBIDEOGRAMS");
1583
1584        /**
1585         * Constant for the "Aegean Numbers" Unicode character block.
1586         * @since 1.5
1587         */
1588        public static final UnicodeBlock AEGEAN_NUMBERS =
1589            new UnicodeBlock("AEGEAN_NUMBERS",
1590                             "AEGEAN NUMBERS",
1591                             "AEGEANNUMBERS");
1592
1593        /**
1594         * Constant for the "Old Italic" Unicode character block.
1595         * @since 1.5
1596         */
1597        public static final UnicodeBlock OLD_ITALIC =
1598            new UnicodeBlock("OLD_ITALIC",
1599                             "OLD ITALIC",
1600                             "OLDITALIC");
1601
1602        /**
1603         * Constant for the "Gothic" Unicode character block.
1604         * @since 1.5
1605         */
1606        public static final UnicodeBlock GOTHIC =
1607            new UnicodeBlock("GOTHIC");
1608
1609        /**
1610         * Constant for the "Ugaritic" Unicode character block.
1611         * @since 1.5
1612         */
1613        public static final UnicodeBlock UGARITIC =
1614            new UnicodeBlock("UGARITIC");
1615
1616        /**
1617         * Constant for the "Deseret" Unicode character block.
1618         * @since 1.5
1619         */
1620        public static final UnicodeBlock DESERET =
1621            new UnicodeBlock("DESERET");
1622
1623        /**
1624         * Constant for the "Shavian" Unicode character block.
1625         * @since 1.5
1626         */
1627        public static final UnicodeBlock SHAVIAN =
1628            new UnicodeBlock("SHAVIAN");
1629
1630        /**
1631         * Constant for the "Osmanya" Unicode character block.
1632         * @since 1.5
1633         */
1634        public static final UnicodeBlock OSMANYA =
1635            new UnicodeBlock("OSMANYA");
1636
1637        /**
1638         * Constant for the "Cypriot Syllabary" Unicode character block.
1639         * @since 1.5
1640         */
1641        public static final UnicodeBlock CYPRIOT_SYLLABARY =
1642            new UnicodeBlock("CYPRIOT_SYLLABARY",
1643                             "CYPRIOT SYLLABARY",
1644                             "CYPRIOTSYLLABARY");
1645
1646        /**
1647         * Constant for the "Byzantine Musical Symbols" Unicode character block.
1648         * @since 1.5
1649         */
1650        public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1651            new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1652                             "BYZANTINE MUSICAL SYMBOLS",
1653                             "BYZANTINEMUSICALSYMBOLS");
1654
1655        /**
1656         * Constant for the "Musical Symbols" Unicode character block.
1657         * @since 1.5
1658         */
1659        public static final UnicodeBlock MUSICAL_SYMBOLS =
1660            new UnicodeBlock("MUSICAL_SYMBOLS",
1661                             "MUSICAL SYMBOLS",
1662                             "MUSICALSYMBOLS");
1663
1664        /**
1665         * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1666         * @since 1.5
1667         */
1668        public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1669            new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1670                             "TAI XUAN JING SYMBOLS",
1671                             "TAIXUANJINGSYMBOLS");
1672
1673        /**
1674         * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1675         * character block.
1676         * @since 1.5
1677         */
1678        public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1679            new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1680                             "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1681                             "MATHEMATICALALPHANUMERICSYMBOLS");
1682
1683        /**
1684         * Constant for the "CJK Unified Ideographs Extension B" Unicode
1685         * character block.
1686         * @since 1.5
1687         */
1688        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1689            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1690                             "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1691                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1692
1693        /**
1694         * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1695         * @since 1.5
1696         */
1697        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1698            new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1699                             "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1700                             "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1701
1702        /**
1703         * Constant for the "Tags" Unicode character block.
1704         * @since 1.5
1705         */
1706        public static final UnicodeBlock TAGS =
1707            new UnicodeBlock("TAGS");
1708
1709        /**
1710         * Constant for the "Variation Selectors Supplement" Unicode character
1711         * block.
1712         * @since 1.5
1713         */
1714        public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1715            new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1716                             "VARIATION SELECTORS SUPPLEMENT",
1717                             "VARIATIONSELECTORSSUPPLEMENT");
1718
1719        /**
1720         * Constant for the "Supplementary Private Use Area-A" Unicode character
1721         * block.
1722         * @since 1.5
1723         */
1724        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1725            new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1726                             "SUPPLEMENTARY PRIVATE USE AREA-A",
1727                             "SUPPLEMENTARYPRIVATEUSEAREA-A");
1728
1729        /**
1730         * Constant for the "Supplementary Private Use Area-B" Unicode character
1731         * block.
1732         * @since 1.5
1733         */
1734        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1735            new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1736                             "SUPPLEMENTARY PRIVATE USE AREA-B",
1737                             "SUPPLEMENTARYPRIVATEUSEAREA-B");
1738
1739        /**
1740         * Constant for the "High Surrogates" Unicode character block.
1741         * This block represents codepoint values in the high surrogate
1742         * range: U+D800 through U+DB7F
1743         *
1744         * @since 1.5
1745         */
1746        public static final UnicodeBlock HIGH_SURROGATES =
1747            new UnicodeBlock("HIGH_SURROGATES",
1748                             "HIGH SURROGATES",
1749                             "HIGHSURROGATES");
1750
1751        /**
1752         * Constant for the "High Private Use Surrogates" Unicode character
1753         * block.
1754         * This block represents codepoint values in the private use high
1755         * surrogate range: U+DB80 through U+DBFF
1756         *
1757         * @since 1.5
1758         */
1759        public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1760            new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1761                             "HIGH PRIVATE USE SURROGATES",
1762                             "HIGHPRIVATEUSESURROGATES");
1763
1764        /**
1765         * Constant for the "Low Surrogates" Unicode character block.
1766         * This block represents codepoint values in the low surrogate
1767         * range: U+DC00 through U+DFFF
1768         *
1769         * @since 1.5
1770         */
1771        public static final UnicodeBlock LOW_SURROGATES =
1772            new UnicodeBlock("LOW_SURROGATES",
1773                             "LOW SURROGATES",
1774                             "LOWSURROGATES");
1775
1776        /**
1777         * Constant for the "Arabic Supplement" Unicode character block.
1778         * @since 1.7
1779         */
1780        public static final UnicodeBlock ARABIC_SUPPLEMENT =
1781            new UnicodeBlock("ARABIC_SUPPLEMENT",
1782                             "ARABIC SUPPLEMENT",
1783                             "ARABICSUPPLEMENT");
1784
1785        /**
1786         * Constant for the "NKo" Unicode character block.
1787         * @since 1.7
1788         */
1789        public static final UnicodeBlock NKO =
1790            new UnicodeBlock("NKO");
1791
1792        /**
1793         * Constant for the "Samaritan" Unicode character block.
1794         * @since 1.7
1795         */
1796        public static final UnicodeBlock SAMARITAN =
1797            new UnicodeBlock("SAMARITAN");
1798
1799        /**
1800         * Constant for the "Mandaic" Unicode character block.
1801         * @since 1.7
1802         */
1803        public static final UnicodeBlock MANDAIC =
1804            new UnicodeBlock("MANDAIC");
1805
1806        /**
1807         * Constant for the "Ethiopic Supplement" Unicode character block.
1808         * @since 1.7
1809         */
1810        public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1811            new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1812                             "ETHIOPIC SUPPLEMENT",
1813                             "ETHIOPICSUPPLEMENT");
1814
1815        /**
1816         * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1817         * Unicode character block.
1818         * @since 1.7
1819         */
1820        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1821            new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1822                             "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1823                             "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1824
1825        /**
1826         * Constant for the "New Tai Lue" Unicode character block.
1827         * @since 1.7
1828         */
1829        public static final UnicodeBlock NEW_TAI_LUE =
1830            new UnicodeBlock("NEW_TAI_LUE",
1831                             "NEW TAI LUE",
1832                             "NEWTAILUE");
1833
1834        /**
1835         * Constant for the "Buginese" Unicode character block.
1836         * @since 1.7
1837         */
1838        public static final UnicodeBlock BUGINESE =
1839            new UnicodeBlock("BUGINESE");
1840
1841        /**
1842         * Constant for the "Tai Tham" Unicode character block.
1843         * @since 1.7
1844         */
1845        public static final UnicodeBlock TAI_THAM =
1846            new UnicodeBlock("TAI_THAM",
1847                             "TAI THAM",
1848                             "TAITHAM");
1849
1850        /**
1851         * Constant for the "Balinese" Unicode character block.
1852         * @since 1.7
1853         */
1854        public static final UnicodeBlock BALINESE =
1855            new UnicodeBlock("BALINESE");
1856
1857        /**
1858         * Constant for the "Sundanese" Unicode character block.
1859         * @since 1.7
1860         */
1861        public static final UnicodeBlock SUNDANESE =
1862            new UnicodeBlock("SUNDANESE");
1863
1864        /**
1865         * Constant for the "Batak" Unicode character block.
1866         * @since 1.7
1867         */
1868        public static final UnicodeBlock BATAK =
1869            new UnicodeBlock("BATAK");
1870
1871        /**
1872         * Constant for the "Lepcha" Unicode character block.
1873         * @since 1.7
1874         */
1875        public static final UnicodeBlock LEPCHA =
1876            new UnicodeBlock("LEPCHA");
1877
1878        /**
1879         * Constant for the "Ol Chiki" Unicode character block.
1880         * @since 1.7
1881         */
1882        public static final UnicodeBlock OL_CHIKI =
1883            new UnicodeBlock("OL_CHIKI",
1884                             "OL CHIKI",
1885                             "OLCHIKI");
1886
1887        /**
1888         * Constant for the "Vedic Extensions" Unicode character block.
1889         * @since 1.7
1890         */
1891        public static final UnicodeBlock VEDIC_EXTENSIONS =
1892            new UnicodeBlock("VEDIC_EXTENSIONS",
1893                             "VEDIC EXTENSIONS",
1894                             "VEDICEXTENSIONS");
1895
1896        /**
1897         * Constant for the "Phonetic Extensions Supplement" Unicode character
1898         * block.
1899         * @since 1.7
1900         */
1901        public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1902            new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1903                             "PHONETIC EXTENSIONS SUPPLEMENT",
1904                             "PHONETICEXTENSIONSSUPPLEMENT");
1905
1906        /**
1907         * Constant for the "Combining Diacritical Marks Supplement" Unicode
1908         * character block.
1909         * @since 1.7
1910         */
1911        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1912            new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1913                             "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1914                             "COMBININGDIACRITICALMARKSSUPPLEMENT");
1915
1916        /**
1917         * Constant for the "Glagolitic" Unicode character block.
1918         * @since 1.7
1919         */
1920        public static final UnicodeBlock GLAGOLITIC =
1921            new UnicodeBlock("GLAGOLITIC");
1922
1923        /**
1924         * Constant for the "Latin Extended-C" Unicode character block.
1925         * @since 1.7
1926         */
1927        public static final UnicodeBlock LATIN_EXTENDED_C =
1928            new UnicodeBlock("LATIN_EXTENDED_C",
1929                             "LATIN EXTENDED-C",
1930                             "LATINEXTENDED-C");
1931
1932        /**
1933         * Constant for the "Coptic" Unicode character block.
1934         * @since 1.7
1935         */
1936        public static final UnicodeBlock COPTIC =
1937            new UnicodeBlock("COPTIC");
1938
1939        /**
1940         * Constant for the "Georgian Supplement" Unicode character block.
1941         * @since 1.7
1942         */
1943        public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1944            new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1945                             "GEORGIAN SUPPLEMENT",
1946                             "GEORGIANSUPPLEMENT");
1947
1948        /**
1949         * Constant for the "Tifinagh" Unicode character block.
1950         * @since 1.7
1951         */
1952        public static final UnicodeBlock TIFINAGH =
1953            new UnicodeBlock("TIFINAGH");
1954
1955        /**
1956         * Constant for the "Ethiopic Extended" Unicode character block.
1957         * @since 1.7
1958         */
1959        public static final UnicodeBlock ETHIOPIC_EXTENDED =
1960            new UnicodeBlock("ETHIOPIC_EXTENDED",
1961                             "ETHIOPIC EXTENDED",
1962                             "ETHIOPICEXTENDED");
1963
1964        /**
1965         * Constant for the "Cyrillic Extended-A" Unicode character block.
1966         * @since 1.7
1967         */
1968        public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1969            new UnicodeBlock("CYRILLIC_EXTENDED_A",
1970                             "CYRILLIC EXTENDED-A",
1971                             "CYRILLICEXTENDED-A");
1972
1973        /**
1974         * Constant for the "Supplemental Punctuation" Unicode character block.
1975         * @since 1.7
1976         */
1977        public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1978            new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1979                             "SUPPLEMENTAL PUNCTUATION",
1980                             "SUPPLEMENTALPUNCTUATION");
1981
1982        /**
1983         * Constant for the "CJK Strokes" Unicode character block.
1984         * @since 1.7
1985         */
1986        public static final UnicodeBlock CJK_STROKES =
1987            new UnicodeBlock("CJK_STROKES",
1988                             "CJK STROKES",
1989                             "CJKSTROKES");
1990
1991        /**
1992         * Constant for the "Lisu" Unicode character block.
1993         * @since 1.7
1994         */
1995        public static final UnicodeBlock LISU =
1996            new UnicodeBlock("LISU");
1997
1998        /**
1999         * Constant for the "Vai" Unicode character block.
2000         * @since 1.7
2001         */
2002        public static final UnicodeBlock VAI =
2003            new UnicodeBlock("VAI");
2004
2005        /**
2006         * Constant for the "Cyrillic Extended-B" Unicode character block.
2007         * @since 1.7
2008         */
2009        public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2010            new UnicodeBlock("CYRILLIC_EXTENDED_B",
2011                             "CYRILLIC EXTENDED-B",
2012                             "CYRILLICEXTENDED-B");
2013
2014        /**
2015         * Constant for the "Bamum" Unicode character block.
2016         * @since 1.7
2017         */
2018        public static final UnicodeBlock BAMUM =
2019            new UnicodeBlock("BAMUM");
2020
2021        /**
2022         * Constant for the "Modifier Tone Letters" Unicode character block.
2023         * @since 1.7
2024         */
2025        public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2026            new UnicodeBlock("MODIFIER_TONE_LETTERS",
2027                             "MODIFIER TONE LETTERS",
2028                             "MODIFIERTONELETTERS");
2029
2030        /**
2031         * Constant for the "Latin Extended-D" Unicode character block.
2032         * @since 1.7
2033         */
2034        public static final UnicodeBlock LATIN_EXTENDED_D =
2035            new UnicodeBlock("LATIN_EXTENDED_D",
2036                             "LATIN EXTENDED-D",
2037                             "LATINEXTENDED-D");
2038
2039        /**
2040         * Constant for the "Syloti Nagri" Unicode character block.
2041         * @since 1.7
2042         */
2043        public static final UnicodeBlock SYLOTI_NAGRI =
2044            new UnicodeBlock("SYLOTI_NAGRI",
2045                             "SYLOTI NAGRI",
2046                             "SYLOTINAGRI");
2047
2048        /**
2049         * Constant for the "Common Indic Number Forms" Unicode character block.
2050         * @since 1.7
2051         */
2052        public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2053            new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2054                             "COMMON INDIC NUMBER FORMS",
2055                             "COMMONINDICNUMBERFORMS");
2056
2057        /**
2058         * Constant for the "Phags-pa" Unicode character block.
2059         * @since 1.7
2060         */
2061        public static final UnicodeBlock PHAGS_PA =
2062            new UnicodeBlock("PHAGS_PA",
2063                             "PHAGS-PA");
2064
2065        /**
2066         * Constant for the "Saurashtra" Unicode character block.
2067         * @since 1.7
2068         */
2069        public static final UnicodeBlock SAURASHTRA =
2070            new UnicodeBlock("SAURASHTRA");
2071
2072        /**
2073         * Constant for the "Devanagari Extended" Unicode character block.
2074         * @since 1.7
2075         */
2076        public static final UnicodeBlock DEVANAGARI_EXTENDED =
2077            new UnicodeBlock("DEVANAGARI_EXTENDED",
2078                             "DEVANAGARI EXTENDED",
2079                             "DEVANAGARIEXTENDED");
2080
2081        /**
2082         * Constant for the "Kayah Li" Unicode character block.
2083         * @since 1.7
2084         */
2085        public static final UnicodeBlock KAYAH_LI =
2086            new UnicodeBlock("KAYAH_LI",
2087                             "KAYAH LI",
2088                             "KAYAHLI");
2089
2090        /**
2091         * Constant for the "Rejang" Unicode character block.
2092         * @since 1.7
2093         */
2094        public static final UnicodeBlock REJANG =
2095            new UnicodeBlock("REJANG");
2096
2097        /**
2098         * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2099         * @since 1.7
2100         */
2101        public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2102            new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2103                             "HANGUL JAMO EXTENDED-A",
2104                             "HANGULJAMOEXTENDED-A");
2105
2106        /**
2107         * Constant for the "Javanese" Unicode character block.
2108         * @since 1.7
2109         */
2110        public static final UnicodeBlock JAVANESE =
2111            new UnicodeBlock("JAVANESE");
2112
2113        /**
2114         * Constant for the "Cham" Unicode character block.
2115         * @since 1.7
2116         */
2117        public static final UnicodeBlock CHAM =
2118            new UnicodeBlock("CHAM");
2119
2120        /**
2121         * Constant for the "Myanmar Extended-A" Unicode character block.
2122         * @since 1.7
2123         */
2124        public static final UnicodeBlock MYANMAR_EXTENDED_A =
2125            new UnicodeBlock("MYANMAR_EXTENDED_A",
2126                             "MYANMAR EXTENDED-A",
2127                             "MYANMAREXTENDED-A");
2128
2129        /**
2130         * Constant for the "Tai Viet" Unicode character block.
2131         * @since 1.7
2132         */
2133        public static final UnicodeBlock TAI_VIET =
2134            new UnicodeBlock("TAI_VIET",
2135                             "TAI VIET",
2136                             "TAIVIET");
2137
2138        /**
2139         * Constant for the "Ethiopic Extended-A" Unicode character block.
2140         * @since 1.7
2141         */
2142        public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2143            new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2144                             "ETHIOPIC EXTENDED-A",
2145                             "ETHIOPICEXTENDED-A");
2146
2147        /**
2148         * Constant for the "Meetei Mayek" Unicode character block.
2149         * @since 1.7
2150         */
2151        public static final UnicodeBlock MEETEI_MAYEK =
2152            new UnicodeBlock("MEETEI_MAYEK",
2153                             "MEETEI MAYEK",
2154                             "MEETEIMAYEK");
2155
2156        /**
2157         * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2158         * @since 1.7
2159         */
2160        public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2161            new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2162                             "HANGUL JAMO EXTENDED-B",
2163                             "HANGULJAMOEXTENDED-B");
2164
2165        /**
2166         * Constant for the "Vertical Forms" Unicode character block.
2167         * @since 1.7
2168         */
2169        public static final UnicodeBlock VERTICAL_FORMS =
2170            new UnicodeBlock("VERTICAL_FORMS",
2171                             "VERTICAL FORMS",
2172                             "VERTICALFORMS");
2173
2174        /**
2175         * Constant for the "Ancient Greek Numbers" Unicode character block.
2176         * @since 1.7
2177         */
2178        public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2179            new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2180                             "ANCIENT GREEK NUMBERS",
2181                             "ANCIENTGREEKNUMBERS");
2182
2183        /**
2184         * Constant for the "Ancient Symbols" Unicode character block.
2185         * @since 1.7
2186         */
2187        public static final UnicodeBlock ANCIENT_SYMBOLS =
2188            new UnicodeBlock("ANCIENT_SYMBOLS",
2189                             "ANCIENT SYMBOLS",
2190                             "ANCIENTSYMBOLS");
2191
2192        /**
2193         * Constant for the "Phaistos Disc" Unicode character block.
2194         * @since 1.7
2195         */
2196        public static final UnicodeBlock PHAISTOS_DISC =
2197            new UnicodeBlock("PHAISTOS_DISC",
2198                             "PHAISTOS DISC",
2199                             "PHAISTOSDISC");
2200
2201        /**
2202         * Constant for the "Lycian" Unicode character block.
2203         * @since 1.7
2204         */
2205        public static final UnicodeBlock LYCIAN =
2206            new UnicodeBlock("LYCIAN");
2207
2208        /**
2209         * Constant for the "Carian" Unicode character block.
2210         * @since 1.7
2211         */
2212        public static final UnicodeBlock CARIAN =
2213            new UnicodeBlock("CARIAN");
2214
2215        /**
2216         * Constant for the "Old Persian" Unicode character block.
2217         * @since 1.7
2218         */
2219        public static final UnicodeBlock OLD_PERSIAN =
2220            new UnicodeBlock("OLD_PERSIAN",
2221                             "OLD PERSIAN",
2222                             "OLDPERSIAN");
2223
2224        /**
2225         * Constant for the "Imperial Aramaic" Unicode character block.
2226         * @since 1.7
2227         */
2228        public static final UnicodeBlock IMPERIAL_ARAMAIC =
2229            new UnicodeBlock("IMPERIAL_ARAMAIC",
2230                             "IMPERIAL ARAMAIC",
2231                             "IMPERIALARAMAIC");
2232
2233        /**
2234         * Constant for the "Phoenician" Unicode character block.
2235         * @since 1.7
2236         */
2237        public static final UnicodeBlock PHOENICIAN =
2238            new UnicodeBlock("PHOENICIAN");
2239
2240        /**
2241         * Constant for the "Lydian" Unicode character block.
2242         * @since 1.7
2243         */
2244        public static final UnicodeBlock LYDIAN =
2245            new UnicodeBlock("LYDIAN");
2246
2247        /**
2248         * Constant for the "Kharoshthi" Unicode character block.
2249         * @since 1.7
2250         */
2251        public static final UnicodeBlock KHAROSHTHI =
2252            new UnicodeBlock("KHAROSHTHI");
2253
2254        /**
2255         * Constant for the "Old South Arabian" Unicode character block.
2256         * @since 1.7
2257         */
2258        public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2259            new UnicodeBlock("OLD_SOUTH_ARABIAN",
2260                             "OLD SOUTH ARABIAN",
2261                             "OLDSOUTHARABIAN");
2262
2263        /**
2264         * Constant for the "Avestan" Unicode character block.
2265         * @since 1.7
2266         */
2267        public static final UnicodeBlock AVESTAN =
2268            new UnicodeBlock("AVESTAN");
2269
2270        /**
2271         * Constant for the "Inscriptional Parthian" Unicode character block.
2272         * @since 1.7
2273         */
2274        public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2275            new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2276                             "INSCRIPTIONAL PARTHIAN",
2277                             "INSCRIPTIONALPARTHIAN");
2278
2279        /**
2280         * Constant for the "Inscriptional Pahlavi" Unicode character block.
2281         * @since 1.7
2282         */
2283        public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2284            new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2285                             "INSCRIPTIONAL PAHLAVI",
2286                             "INSCRIPTIONALPAHLAVI");
2287
2288        /**
2289         * Constant for the "Old Turkic" Unicode character block.
2290         * @since 1.7
2291         */
2292        public static final UnicodeBlock OLD_TURKIC =
2293            new UnicodeBlock("OLD_TURKIC",
2294                             "OLD TURKIC",
2295                             "OLDTURKIC");
2296
2297        /**
2298         * Constant for the "Rumi Numeral Symbols" Unicode character block.
2299         * @since 1.7
2300         */
2301        public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2302            new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2303                             "RUMI NUMERAL SYMBOLS",
2304                             "RUMINUMERALSYMBOLS");
2305
2306        /**
2307         * Constant for the "Brahmi" Unicode character block.
2308         * @since 1.7
2309         */
2310        public static final UnicodeBlock BRAHMI =
2311            new UnicodeBlock("BRAHMI");
2312
2313        /**
2314         * Constant for the "Kaithi" Unicode character block.
2315         * @since 1.7
2316         */
2317        public static final UnicodeBlock KAITHI =
2318            new UnicodeBlock("KAITHI");
2319
2320        /**
2321         * Constant for the "Cuneiform" Unicode character block.
2322         * @since 1.7
2323         */
2324        public static final UnicodeBlock CUNEIFORM =
2325            new UnicodeBlock("CUNEIFORM");
2326
2327        /**
2328         * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2329         * character block.
2330         * @since 1.7
2331         */
2332        public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2333            new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2334                             "CUNEIFORM NUMBERS AND PUNCTUATION",
2335                             "CUNEIFORMNUMBERSANDPUNCTUATION");
2336
2337        /**
2338         * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2339         * @since 1.7
2340         */
2341        public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2342            new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2343                             "EGYPTIAN HIEROGLYPHS",
2344                             "EGYPTIANHIEROGLYPHS");
2345
2346        /**
2347         * Constant for the "Bamum Supplement" Unicode character block.
2348         * @since 1.7
2349         */
2350        public static final UnicodeBlock BAMUM_SUPPLEMENT =
2351            new UnicodeBlock("BAMUM_SUPPLEMENT",
2352                             "BAMUM SUPPLEMENT",
2353                             "BAMUMSUPPLEMENT");
2354
2355        /**
2356         * Constant for the "Kana Supplement" Unicode character block.
2357         * @since 1.7
2358         */
2359        public static final UnicodeBlock KANA_SUPPLEMENT =
2360            new UnicodeBlock("KANA_SUPPLEMENT",
2361                             "KANA SUPPLEMENT",
2362                             "KANASUPPLEMENT");
2363
2364        /**
2365         * Constant for the "Ancient Greek Musical Notation" Unicode character
2366         * block.
2367         * @since 1.7
2368         */
2369        public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2370            new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2371                             "ANCIENT GREEK MUSICAL NOTATION",
2372                             "ANCIENTGREEKMUSICALNOTATION");
2373
2374        /**
2375         * Constant for the "Counting Rod Numerals" Unicode character block.
2376         * @since 1.7
2377         */
2378        public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2379            new UnicodeBlock("COUNTING_ROD_NUMERALS",
2380                             "COUNTING ROD NUMERALS",
2381                             "COUNTINGRODNUMERALS");
2382
2383        /**
2384         * Constant for the "Mahjong Tiles" Unicode character block.
2385         * @since 1.7
2386         */
2387        public static final UnicodeBlock MAHJONG_TILES =
2388            new UnicodeBlock("MAHJONG_TILES",
2389                             "MAHJONG TILES",
2390                             "MAHJONGTILES");
2391
2392        /**
2393         * Constant for the "Domino Tiles" Unicode character block.
2394         * @since 1.7
2395         */
2396        public static final UnicodeBlock DOMINO_TILES =
2397            new UnicodeBlock("DOMINO_TILES",
2398                             "DOMINO TILES",
2399                             "DOMINOTILES");
2400
2401        /**
2402         * Constant for the "Playing Cards" Unicode character block.
2403         * @since 1.7
2404         */
2405        public static final UnicodeBlock PLAYING_CARDS =
2406            new UnicodeBlock("PLAYING_CARDS",
2407                             "PLAYING CARDS",
2408                             "PLAYINGCARDS");
2409
2410        /**
2411         * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2412         * block.
2413         * @since 1.7
2414         */
2415        public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2416            new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2417                             "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2418                             "ENCLOSEDALPHANUMERICSUPPLEMENT");
2419
2420        /**
2421         * Constant for the "Enclosed Ideographic Supplement" Unicode character
2422         * block.
2423         * @since 1.7
2424         */
2425        public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2426            new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2427                             "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2428                             "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2429
2430        /**
2431         * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2432         * character block.
2433         * @since 1.7
2434         */
2435        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2436            new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2437                             "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2438                             "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2439
2440        /**
2441         * Constant for the "Emoticons" Unicode character block.
2442         * @since 1.7
2443         */
2444        public static final UnicodeBlock EMOTICONS =
2445            new UnicodeBlock("EMOTICONS");
2446
2447        /**
2448         * Constant for the "Transport And Map Symbols" Unicode character block.
2449         * @since 1.7
2450         */
2451        public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2452            new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2453                             "TRANSPORT AND MAP SYMBOLS",
2454                             "TRANSPORTANDMAPSYMBOLS");
2455
2456        /**
2457         * Constant for the "Alchemical Symbols" Unicode character block.
2458         * @since 1.7
2459         */
2460        public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2461            new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2462                             "ALCHEMICAL SYMBOLS",
2463                             "ALCHEMICALSYMBOLS");
2464
2465        /**
2466         * Constant for the "CJK Unified Ideographs Extension C" Unicode
2467         * character block.
2468         * @since 1.7
2469         */
2470        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2471            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2472                             "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2473                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2474
2475        /**
2476         * Constant for the "CJK Unified Ideographs Extension D" Unicode
2477         * character block.
2478         * @since 1.7
2479         */
2480        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2481            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2482                             "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2483                             "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2484
2485        /**
2486         * Constant for the "Arabic Extended-A" Unicode character block.
2487         * @since 1.8
2488         */
2489        public static final UnicodeBlock ARABIC_EXTENDED_A =
2490            new UnicodeBlock("ARABIC_EXTENDED_A",
2491                             "ARABIC EXTENDED-A",
2492                             "ARABICEXTENDED-A");
2493
2494        /**
2495         * Constant for the "Sundanese Supplement" Unicode character block.
2496         * @since 1.8
2497         */
2498        public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2499            new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2500                             "SUNDANESE SUPPLEMENT",
2501                             "SUNDANESESUPPLEMENT");
2502
2503        /**
2504         * Constant for the "Meetei Mayek Extensions" Unicode character block.
2505         * @since 1.8
2506         */
2507        public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2508            new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2509                             "MEETEI MAYEK EXTENSIONS",
2510                             "MEETEIMAYEKEXTENSIONS");
2511
2512        /**
2513         * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2514         * @since 1.8
2515         */
2516        public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2517            new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2518                             "MEROITIC HIEROGLYPHS",
2519                             "MEROITICHIEROGLYPHS");
2520
2521        /**
2522         * Constant for the "Meroitic Cursive" Unicode character block.
2523         * @since 1.8
2524         */
2525        public static final UnicodeBlock MEROITIC_CURSIVE =
2526            new UnicodeBlock("MEROITIC_CURSIVE",
2527                             "MEROITIC CURSIVE",
2528                             "MEROITICCURSIVE");
2529
2530        /**
2531         * Constant for the "Sora Sompeng" Unicode character block.
2532         * @since 1.8
2533         */
2534        public static final UnicodeBlock SORA_SOMPENG =
2535            new UnicodeBlock("SORA_SOMPENG",
2536                             "SORA SOMPENG",
2537                             "SORASOMPENG");
2538
2539        /**
2540         * Constant for the "Chakma" Unicode character block.
2541         * @since 1.8
2542         */
2543        public static final UnicodeBlock CHAKMA =
2544            new UnicodeBlock("CHAKMA");
2545
2546        /**
2547         * Constant for the "Sharada" Unicode character block.
2548         * @since 1.8
2549         */
2550        public static final UnicodeBlock SHARADA =
2551            new UnicodeBlock("SHARADA");
2552
2553        /**
2554         * Constant for the "Takri" Unicode character block.
2555         * @since 1.8
2556         */
2557        public static final UnicodeBlock TAKRI =
2558            new UnicodeBlock("TAKRI");
2559
2560        /**
2561         * Constant for the "Miao" Unicode character block.
2562         * @since 1.8
2563         */
2564        public static final UnicodeBlock MIAO =
2565            new UnicodeBlock("MIAO");
2566
2567        /**
2568         * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2569         * character block.
2570         * @since 1.8
2571         */
2572        public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2573            new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2574                             "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2575                             "ARABICMATHEMATICALALPHABETICSYMBOLS");
2576
2577        private static final int blockStarts[] = {
2578            0x0000,   // 0000..007F; Basic Latin
2579            0x0080,   // 0080..00FF; Latin-1 Supplement
2580            0x0100,   // 0100..017F; Latin Extended-A
2581            0x0180,   // 0180..024F; Latin Extended-B
2582            0x0250,   // 0250..02AF; IPA Extensions
2583            0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2584            0x0300,   // 0300..036F; Combining Diacritical Marks
2585            0x0370,   // 0370..03FF; Greek and Coptic
2586            0x0400,   // 0400..04FF; Cyrillic
2587            0x0500,   // 0500..052F; Cyrillic Supplement
2588            0x0530,   // 0530..058F; Armenian
2589            0x0590,   // 0590..05FF; Hebrew
2590            0x0600,   // 0600..06FF; Arabic
2591            0x0700,   // 0700..074F; Syriac
2592            0x0750,   // 0750..077F; Arabic Supplement
2593            0x0780,   // 0780..07BF; Thaana
2594            0x07C0,   // 07C0..07FF; NKo
2595            0x0800,   // 0800..083F; Samaritan
2596            0x0840,   // 0840..085F; Mandaic
2597            0x0860,   //             unassigned
2598            0x08A0,   // 08A0..08FF; Arabic Extended-A
2599            0x0900,   // 0900..097F; Devanagari
2600            0x0980,   // 0980..09FF; Bengali
2601            0x0A00,   // 0A00..0A7F; Gurmukhi
2602            0x0A80,   // 0A80..0AFF; Gujarati
2603            0x0B00,   // 0B00..0B7F; Oriya
2604            0x0B80,   // 0B80..0BFF; Tamil
2605            0x0C00,   // 0C00..0C7F; Telugu
2606            0x0C80,   // 0C80..0CFF; Kannada
2607            0x0D00,   // 0D00..0D7F; Malayalam
2608            0x0D80,   // 0D80..0DFF; Sinhala
2609            0x0E00,   // 0E00..0E7F; Thai
2610            0x0E80,   // 0E80..0EFF; Lao
2611            0x0F00,   // 0F00..0FFF; Tibetan
2612            0x1000,   // 1000..109F; Myanmar
2613            0x10A0,   // 10A0..10FF; Georgian
2614            0x1100,   // 1100..11FF; Hangul Jamo
2615            0x1200,   // 1200..137F; Ethiopic
2616            0x1380,   // 1380..139F; Ethiopic Supplement
2617            0x13A0,   // 13A0..13FF; Cherokee
2618            0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2619            0x1680,   // 1680..169F; Ogham
2620            0x16A0,   // 16A0..16FF; Runic
2621            0x1700,   // 1700..171F; Tagalog
2622            0x1720,   // 1720..173F; Hanunoo
2623            0x1740,   // 1740..175F; Buhid
2624            0x1760,   // 1760..177F; Tagbanwa
2625            0x1780,   // 1780..17FF; Khmer
2626            0x1800,   // 1800..18AF; Mongolian
2627            0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2628            0x1900,   // 1900..194F; Limbu
2629            0x1950,   // 1950..197F; Tai Le
2630            0x1980,   // 1980..19DF; New Tai Lue
2631            0x19E0,   // 19E0..19FF; Khmer Symbols
2632            0x1A00,   // 1A00..1A1F; Buginese
2633            0x1A20,   // 1A20..1AAF; Tai Tham
2634            0x1AB0,   //             unassigned
2635            0x1B00,   // 1B00..1B7F; Balinese
2636            0x1B80,   // 1B80..1BBF; Sundanese
2637            0x1BC0,   // 1BC0..1BFF; Batak
2638            0x1C00,   // 1C00..1C4F; Lepcha
2639            0x1C50,   // 1C50..1C7F; Ol Chiki
2640            0x1C80,   //             unassigned
2641            0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
2642            0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2643            0x1D00,   // 1D00..1D7F; Phonetic Extensions
2644            0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2645            0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2646            0x1E00,   // 1E00..1EFF; Latin Extended Additional
2647            0x1F00,   // 1F00..1FFF; Greek Extended
2648            0x2000,   // 2000..206F; General Punctuation
2649            0x2070,   // 2070..209F; Superscripts and Subscripts
2650            0x20A0,   // 20A0..20CF; Currency Symbols
2651            0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2652            0x2100,   // 2100..214F; Letterlike Symbols
2653            0x2150,   // 2150..218F; Number Forms
2654            0x2190,   // 2190..21FF; Arrows
2655            0x2200,   // 2200..22FF; Mathematical Operators
2656            0x2300,   // 2300..23FF; Miscellaneous Technical
2657            0x2400,   // 2400..243F; Control Pictures
2658            0x2440,   // 2440..245F; Optical Character Recognition
2659            0x2460,   // 2460..24FF; Enclosed Alphanumerics
2660            0x2500,   // 2500..257F; Box Drawing
2661            0x2580,   // 2580..259F; Block Elements
2662            0x25A0,   // 25A0..25FF; Geometric Shapes
2663            0x2600,   // 2600..26FF; Miscellaneous Symbols
2664            0x2700,   // 2700..27BF; Dingbats
2665            0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2666            0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2667            0x2800,   // 2800..28FF; Braille Patterns
2668            0x2900,   // 2900..297F; Supplemental Arrows-B
2669            0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2670            0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2671            0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2672            0x2C00,   // 2C00..2C5F; Glagolitic
2673            0x2C60,   // 2C60..2C7F; Latin Extended-C
2674            0x2C80,   // 2C80..2CFF; Coptic
2675            0x2D00,   // 2D00..2D2F; Georgian Supplement
2676            0x2D30,   // 2D30..2D7F; Tifinagh
2677            0x2D80,   // 2D80..2DDF; Ethiopic Extended
2678            0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2679            0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2680            0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2681            0x2F00,   // 2F00..2FDF; Kangxi Radicals
2682            0x2FE0,   //             unassigned
2683            0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2684            0x3000,   // 3000..303F; CJK Symbols and Punctuation
2685            0x3040,   // 3040..309F; Hiragana
2686            0x30A0,   // 30A0..30FF; Katakana
2687            0x3100,   // 3100..312F; Bopomofo
2688            0x3130,   // 3130..318F; Hangul Compatibility Jamo
2689            0x3190,   // 3190..319F; Kanbun
2690            0x31A0,   // 31A0..31BF; Bopomofo Extended
2691            0x31C0,   // 31C0..31EF; CJK Strokes
2692            0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2693            0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2694            0x3300,   // 3300..33FF; CJK Compatibility
2695            0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2696            0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2697            0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2698            0xA000,   // A000..A48F; Yi Syllables
2699            0xA490,   // A490..A4CF; Yi Radicals
2700            0xA4D0,   // A4D0..A4FF; Lisu
2701            0xA500,   // A500..A63F; Vai
2702            0xA640,   // A640..A69F; Cyrillic Extended-B
2703            0xA6A0,   // A6A0..A6FF; Bamum
2704            0xA700,   // A700..A71F; Modifier Tone Letters
2705            0xA720,   // A720..A7FF; Latin Extended-D
2706            0xA800,   // A800..A82F; Syloti Nagri
2707            0xA830,   // A830..A83F; Common Indic Number Forms
2708            0xA840,   // A840..A87F; Phags-pa
2709            0xA880,   // A880..A8DF; Saurashtra
2710            0xA8E0,   // A8E0..A8FF; Devanagari Extended
2711            0xA900,   // A900..A92F; Kayah Li
2712            0xA930,   // A930..A95F; Rejang
2713            0xA960,   // A960..A97F; Hangul Jamo Extended-A
2714            0xA980,   // A980..A9DF; Javanese
2715            0xA9E0,   //             unassigned
2716            0xAA00,   // AA00..AA5F; Cham
2717            0xAA60,   // AA60..AA7F; Myanmar Extended-A
2718            0xAA80,   // AA80..AADF; Tai Viet
2719            0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
2720            0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2721            0xAB30,   //             unassigned
2722            0xABC0,   // ABC0..ABFF; Meetei Mayek
2723            0xAC00,   // AC00..D7AF; Hangul Syllables
2724            0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2725            0xD800,   // D800..DB7F; High Surrogates
2726            0xDB80,   // DB80..DBFF; High Private Use Surrogates
2727            0xDC00,   // DC00..DFFF; Low Surrogates
2728            0xE000,   // E000..F8FF; Private Use Area
2729            0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2730            0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2731            0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2732            0xFE00,   // FE00..FE0F; Variation Selectors
2733            0xFE10,   // FE10..FE1F; Vertical Forms
2734            0xFE20,   // FE20..FE2F; Combining Half Marks
2735            0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2736            0xFE50,   // FE50..FE6F; Small Form Variants
2737            0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2738            0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2739            0xFFF0,   // FFF0..FFFF; Specials
2740            0x10000,  // 10000..1007F; Linear B Syllabary
2741            0x10080,  // 10080..100FF; Linear B Ideograms
2742            0x10100,  // 10100..1013F; Aegean Numbers
2743            0x10140,  // 10140..1018F; Ancient Greek Numbers
2744            0x10190,  // 10190..101CF; Ancient Symbols
2745            0x101D0,  // 101D0..101FF; Phaistos Disc
2746            0x10200,  //               unassigned
2747            0x10280,  // 10280..1029F; Lycian
2748            0x102A0,  // 102A0..102DF; Carian
2749            0x102E0,  //               unassigned
2750            0x10300,  // 10300..1032F; Old Italic
2751            0x10330,  // 10330..1034F; Gothic
2752            0x10350,  //               unassigned
2753            0x10380,  // 10380..1039F; Ugaritic
2754            0x103A0,  // 103A0..103DF; Old Persian
2755            0x103E0,  //               unassigned
2756            0x10400,  // 10400..1044F; Deseret
2757            0x10450,  // 10450..1047F; Shavian
2758            0x10480,  // 10480..104AF; Osmanya
2759            0x104B0,  //               unassigned
2760            0x10800,  // 10800..1083F; Cypriot Syllabary
2761            0x10840,  // 10840..1085F; Imperial Aramaic
2762            0x10860,  //               unassigned
2763            0x10900,  // 10900..1091F; Phoenician
2764            0x10920,  // 10920..1093F; Lydian
2765            0x10940,  //               unassigned
2766            0x10980,  // 10980..1099F; Meroitic Hieroglyphs
2767            0x109A0,  // 109A0..109FF; Meroitic Cursive
2768            0x10A00,  // 10A00..10A5F; Kharoshthi
2769            0x10A60,  // 10A60..10A7F; Old South Arabian
2770            0x10A80,  //               unassigned
2771            0x10B00,  // 10B00..10B3F; Avestan
2772            0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2773            0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2774            0x10B80,  //               unassigned
2775            0x10C00,  // 10C00..10C4F; Old Turkic
2776            0x10C50,  //               unassigned
2777            0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2778            0x10E80,  //               unassigned
2779            0x11000,  // 11000..1107F; Brahmi
2780            0x11080,  // 11080..110CF; Kaithi
2781            0x110D0,  // 110D0..110FF; Sora Sompeng
2782            0x11100,  // 11100..1114F; Chakma
2783            0x11150,  //               unassigned
2784            0x11180,  // 11180..111DF; Sharada
2785            0x111E0,  //               unassigned
2786            0x11680,  // 11680..116CF; Takri
2787            0x116D0,  //               unassigned
2788            0x12000,  // 12000..123FF; Cuneiform
2789            0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2790            0x12480,  //               unassigned
2791            0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2792            0x13430,  //               unassigned
2793            0x16800,  // 16800..16A3F; Bamum Supplement
2794            0x16A40,  //               unassigned
2795            0x16F00,  // 16F00..16F9F; Miao
2796            0x16FA0,  //               unassigned
2797            0x1B000,  // 1B000..1B0FF; Kana Supplement
2798            0x1B100,  //               unassigned
2799            0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2800            0x1D100,  // 1D100..1D1FF; Musical Symbols
2801            0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2802            0x1D250,  //               unassigned
2803            0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2804            0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2805            0x1D380,  //               unassigned
2806            0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2807            0x1D800,  //               unassigned
2808            0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
2809            0x1EF00,  //               unassigned
2810            0x1F000,  // 1F000..1F02F; Mahjong Tiles
2811            0x1F030,  // 1F030..1F09F; Domino Tiles
2812            0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2813            0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2814            0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2815            0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2816            0x1F600,  // 1F600..1F64F; Emoticons
2817            0x1F650,  //               unassigned
2818            0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2819            0x1F700,  // 1F700..1F77F; Alchemical Symbols
2820            0x1F780,  //               unassigned
2821            0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2822            0x2A6E0,  //               unassigned
2823            0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2824            0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2825            0x2B820,  //               unassigned
2826            0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2827            0x2FA20,  //               unassigned
2828            0xE0000,  // E0000..E007F; Tags
2829            0xE0080,  //               unassigned
2830            0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2831            0xE01F0,  //               unassigned
2832            0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2833            0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2834        };
2835
2836        private static final UnicodeBlock[] blocks = {
2837            BASIC_LATIN,
2838            LATIN_1_SUPPLEMENT,
2839            LATIN_EXTENDED_A,
2840            LATIN_EXTENDED_B,
2841            IPA_EXTENSIONS,
2842            SPACING_MODIFIER_LETTERS,
2843            COMBINING_DIACRITICAL_MARKS,
2844            GREEK,
2845            CYRILLIC,
2846            CYRILLIC_SUPPLEMENTARY,
2847            ARMENIAN,
2848            HEBREW,
2849            ARABIC,
2850            SYRIAC,
2851            ARABIC_SUPPLEMENT,
2852            THAANA,
2853            NKO,
2854            SAMARITAN,
2855            MANDAIC,
2856            null,
2857            ARABIC_EXTENDED_A,
2858            DEVANAGARI,
2859            BENGALI,
2860            GURMUKHI,
2861            GUJARATI,
2862            ORIYA,
2863            TAMIL,
2864            TELUGU,
2865            KANNADA,
2866            MALAYALAM,
2867            SINHALA,
2868            THAI,
2869            LAO,
2870            TIBETAN,
2871            MYANMAR,
2872            GEORGIAN,
2873            HANGUL_JAMO,
2874            ETHIOPIC,
2875            ETHIOPIC_SUPPLEMENT,
2876            CHEROKEE,
2877            UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2878            OGHAM,
2879            RUNIC,
2880            TAGALOG,
2881            HANUNOO,
2882            BUHID,
2883            TAGBANWA,
2884            KHMER,
2885            MONGOLIAN,
2886            UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2887            LIMBU,
2888            TAI_LE,
2889            NEW_TAI_LUE,
2890            KHMER_SYMBOLS,
2891            BUGINESE,
2892            TAI_THAM,
2893            null,
2894            BALINESE,
2895            SUNDANESE,
2896            BATAK,
2897            LEPCHA,
2898            OL_CHIKI,
2899            null,
2900            SUNDANESE_SUPPLEMENT,
2901            VEDIC_EXTENSIONS,
2902            PHONETIC_EXTENSIONS,
2903            PHONETIC_EXTENSIONS_SUPPLEMENT,
2904            COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2905            LATIN_EXTENDED_ADDITIONAL,
2906            GREEK_EXTENDED,
2907            GENERAL_PUNCTUATION,
2908            SUPERSCRIPTS_AND_SUBSCRIPTS,
2909            CURRENCY_SYMBOLS,
2910            COMBINING_MARKS_FOR_SYMBOLS,
2911            LETTERLIKE_SYMBOLS,
2912            NUMBER_FORMS,
2913            ARROWS,
2914            MATHEMATICAL_OPERATORS,
2915            MISCELLANEOUS_TECHNICAL,
2916            CONTROL_PICTURES,
2917            OPTICAL_CHARACTER_RECOGNITION,
2918            ENCLOSED_ALPHANUMERICS,
2919            BOX_DRAWING,
2920            BLOCK_ELEMENTS,
2921            GEOMETRIC_SHAPES,
2922            MISCELLANEOUS_SYMBOLS,
2923            DINGBATS,
2924            MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2925            SUPPLEMENTAL_ARROWS_A,
2926            BRAILLE_PATTERNS,
2927            SUPPLEMENTAL_ARROWS_B,
2928            MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2929            SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2930            MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2931            GLAGOLITIC,
2932            LATIN_EXTENDED_C,
2933            COPTIC,
2934            GEORGIAN_SUPPLEMENT,
2935            TIFINAGH,
2936            ETHIOPIC_EXTENDED,
2937            CYRILLIC_EXTENDED_A,
2938            SUPPLEMENTAL_PUNCTUATION,
2939            CJK_RADICALS_SUPPLEMENT,
2940            KANGXI_RADICALS,
2941            null,
2942            IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2943            CJK_SYMBOLS_AND_PUNCTUATION,
2944            HIRAGANA,
2945            KATAKANA,
2946            BOPOMOFO,
2947            HANGUL_COMPATIBILITY_JAMO,
2948            KANBUN,
2949            BOPOMOFO_EXTENDED,
2950            CJK_STROKES,
2951            KATAKANA_PHONETIC_EXTENSIONS,
2952            ENCLOSED_CJK_LETTERS_AND_MONTHS,
2953            CJK_COMPATIBILITY,
2954            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2955            YIJING_HEXAGRAM_SYMBOLS,
2956            CJK_UNIFIED_IDEOGRAPHS,
2957            YI_SYLLABLES,
2958            YI_RADICALS,
2959            LISU,
2960            VAI,
2961            CYRILLIC_EXTENDED_B,
2962            BAMUM,
2963            MODIFIER_TONE_LETTERS,
2964            LATIN_EXTENDED_D,
2965            SYLOTI_NAGRI,
2966            COMMON_INDIC_NUMBER_FORMS,
2967            PHAGS_PA,
2968            SAURASHTRA,
2969            DEVANAGARI_EXTENDED,
2970            KAYAH_LI,
2971            REJANG,
2972            HANGUL_JAMO_EXTENDED_A,
2973            JAVANESE,
2974            null,
2975            CHAM,
2976            MYANMAR_EXTENDED_A,
2977            TAI_VIET,
2978            MEETEI_MAYEK_EXTENSIONS,
2979            ETHIOPIC_EXTENDED_A,
2980            null,
2981            MEETEI_MAYEK,
2982            HANGUL_SYLLABLES,
2983            HANGUL_JAMO_EXTENDED_B,
2984            HIGH_SURROGATES,
2985            HIGH_PRIVATE_USE_SURROGATES,
2986            LOW_SURROGATES,
2987            PRIVATE_USE_AREA,
2988            CJK_COMPATIBILITY_IDEOGRAPHS,
2989            ALPHABETIC_PRESENTATION_FORMS,
2990            ARABIC_PRESENTATION_FORMS_A,
2991            VARIATION_SELECTORS,
2992            VERTICAL_FORMS,
2993            COMBINING_HALF_MARKS,
2994            CJK_COMPATIBILITY_FORMS,
2995            SMALL_FORM_VARIANTS,
2996            ARABIC_PRESENTATION_FORMS_B,
2997            HALFWIDTH_AND_FULLWIDTH_FORMS,
2998            SPECIALS,
2999            LINEAR_B_SYLLABARY,
3000            LINEAR_B_IDEOGRAMS,
3001            AEGEAN_NUMBERS,
3002            ANCIENT_GREEK_NUMBERS,
3003            ANCIENT_SYMBOLS,
3004            PHAISTOS_DISC,
3005            null,
3006            LYCIAN,
3007            CARIAN,
3008            null,
3009            OLD_ITALIC,
3010            GOTHIC,
3011            null,
3012            UGARITIC,
3013            OLD_PERSIAN,
3014            null,
3015            DESERET,
3016            SHAVIAN,
3017            OSMANYA,
3018            null,
3019            CYPRIOT_SYLLABARY,
3020            IMPERIAL_ARAMAIC,
3021            null,
3022            PHOENICIAN,
3023            LYDIAN,
3024            null,
3025            MEROITIC_HIEROGLYPHS,
3026            MEROITIC_CURSIVE,
3027            KHAROSHTHI,
3028            OLD_SOUTH_ARABIAN,
3029            null,
3030            AVESTAN,
3031            INSCRIPTIONAL_PARTHIAN,
3032            INSCRIPTIONAL_PAHLAVI,
3033            null,
3034            OLD_TURKIC,
3035            null,
3036            RUMI_NUMERAL_SYMBOLS,
3037            null,
3038            BRAHMI,
3039            KAITHI,
3040            SORA_SOMPENG,
3041            CHAKMA,
3042            null,
3043            SHARADA,
3044            null,
3045            TAKRI,
3046            null,
3047            CUNEIFORM,
3048            CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3049            null,
3050            EGYPTIAN_HIEROGLYPHS,
3051            null,
3052            BAMUM_SUPPLEMENT,
3053            null,
3054            MIAO,
3055            null,
3056            KANA_SUPPLEMENT,
3057            null,
3058            BYZANTINE_MUSICAL_SYMBOLS,
3059            MUSICAL_SYMBOLS,
3060            ANCIENT_GREEK_MUSICAL_NOTATION,
3061            null,
3062            TAI_XUAN_JING_SYMBOLS,
3063            COUNTING_ROD_NUMERALS,
3064            null,
3065            MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3066            null,
3067            ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3068            null,
3069            MAHJONG_TILES,
3070            DOMINO_TILES,
3071            PLAYING_CARDS,
3072            ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3073            ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3074            MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3075            EMOTICONS,
3076            null,
3077            TRANSPORT_AND_MAP_SYMBOLS,
3078            ALCHEMICAL_SYMBOLS,
3079            null,
3080            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3081            null,
3082            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3083            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3084            null,
3085            CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3086            null,
3087            TAGS,
3088            null,
3089            VARIATION_SELECTORS_SUPPLEMENT,
3090            null,
3091            SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3092            SUPPLEMENTARY_PRIVATE_USE_AREA_B
3093        };
3094
3095
3096        /**
3097         * Returns the object representing the Unicode block containing the
3098         * given character, or {@code null} if the character is not a
3099         * member of a defined block.
3100         *
3101         * <p><b>Note:</b> This method cannot handle
3102         * <a href="Character.html#supplementary"> supplementary
3103         * characters</a>.  To support all Unicode characters, including
3104         * supplementary characters, use the {@link #of(int)} method.
3105         *
3106         * @param   c  The character in question
3107         * @return  The {@code UnicodeBlock} instance representing the
3108         *          Unicode block of which this character is a member, or
3109         *          {@code null} if the character is not a member of any
3110         *          Unicode block
3111         */
3112        public static UnicodeBlock of(char c) {
3113            return of((int)c);
3114        }
3115
3116        /**
3117         * Returns the object representing the Unicode block
3118         * containing the given character (Unicode code point), or
3119         * {@code null} if the character is not a member of a
3120         * defined block.
3121         *
3122         * @param   codePoint the character (Unicode code point) in question.
3123         * @return  The {@code UnicodeBlock} instance representing the
3124         *          Unicode block of which this character is a member, or
3125         *          {@code null} if the character is not a member of any
3126         *          Unicode block
3127         * @exception IllegalArgumentException if the specified
3128         * {@code codePoint} is an invalid Unicode code point.
3129         * @see Character#isValidCodePoint(int)
3130         * @since   1.5
3131         */
3132        public static UnicodeBlock of(int codePoint) {
3133            if (!isValidCodePoint(codePoint)) {
3134                throw new IllegalArgumentException();
3135            }
3136
3137            int top, bottom, current;
3138            bottom = 0;
3139            top = blockStarts.length;
3140            current = top/2;
3141
3142            // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3143            while (top - bottom > 1) {
3144                if (codePoint >= blockStarts[current]) {
3145                    bottom = current;
3146                } else {
3147                    top = current;
3148                }
3149                current = (top + bottom) / 2;
3150            }
3151            return blocks[current];
3152        }
3153
3154        /**
3155         * Returns the UnicodeBlock with the given name. Block
3156         * names are determined by The Unicode Standard. The file
3157         * Blocks-&lt;version&gt;.txt defines blocks for a particular
3158         * version of the standard. The {@link Character} class specifies
3159         * the version of the standard that it supports.
3160         * <p>
3161         * This method accepts block names in the following forms:
3162         * <ol>
3163         * <li> Canonical block names as defined by the Unicode Standard.
3164         * For example, the standard defines a "Basic Latin" block. Therefore, this
3165         * method accepts "Basic Latin" as a valid block name. The documentation of
3166         * each UnicodeBlock provides the canonical name.
3167         * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3168         * is a valid block name for the "Basic Latin" block.
3169         * <li>The text representation of each constant UnicodeBlock identifier.
3170         * For example, this method will return the {@link #BASIC_LATIN} block if
3171         * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3172         * hyphens in the canonical name with underscores.
3173         * </ol>
3174         * Finally, character case is ignored for all of the valid block name forms.
3175         * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3176         * The en_US locale's case mapping rules are used to provide case-insensitive
3177         * string comparisons for block name validation.
3178         * <p>
3179         * If the Unicode Standard changes block names, both the previous and
3180         * current names will be accepted.
3181         *
3182         * @param blockName A {@code UnicodeBlock} name.
3183         * @return The {@code UnicodeBlock} instance identified
3184         *         by {@code blockName}
3185         * @throws IllegalArgumentException if {@code blockName} is an
3186         *         invalid name
3187         * @throws NullPointerException if {@code blockName} is null
3188         * @since 1.5
3189         */
3190        public static final UnicodeBlock forName(String blockName) {
3191            UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3192            if (block == null) {
3193                throw new IllegalArgumentException();
3194            }
3195            return block;
3196        }
3197    }
3198
3199
3200    /**
3201     * A family of character subsets representing the character scripts
3202     * defined in the <a href="http://www.unicode.org/reports/tr24/">
3203     * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3204     * character is assigned to a single Unicode script, either a specific
3205     * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3206     * one of the following three special values,
3207     * {@link Character.UnicodeScript#INHERITED Inherited},
3208     * {@link Character.UnicodeScript#COMMON Common} or
3209     * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3210     *
3211     * @since 1.7
3212     */
3213    public static enum UnicodeScript {
3214        /**
3215         * Unicode script "Common".
3216         */
3217        COMMON,
3218
3219        /**
3220         * Unicode script "Latin".
3221         */
3222        LATIN,
3223
3224        /**
3225         * Unicode script "Greek".
3226         */
3227        GREEK,
3228
3229        /**
3230         * Unicode script "Cyrillic".
3231         */
3232        CYRILLIC,
3233
3234        /**
3235         * Unicode script "Armenian".
3236         */
3237        ARMENIAN,
3238
3239        /**
3240         * Unicode script "Hebrew".
3241         */
3242        HEBREW,
3243
3244        /**
3245         * Unicode script "Arabic".
3246         */
3247        ARABIC,
3248
3249        /**
3250         * Unicode script "Syriac".
3251         */
3252        SYRIAC,
3253
3254        /**
3255         * Unicode script "Thaana".
3256         */
3257        THAANA,
3258
3259        /**
3260         * Unicode script "Devanagari".
3261         */
3262        DEVANAGARI,
3263
3264        /**
3265         * Unicode script "Bengali".
3266         */
3267        BENGALI,
3268
3269        /**
3270         * Unicode script "Gurmukhi".
3271         */
3272        GURMUKHI,
3273
3274        /**
3275         * Unicode script "Gujarati".
3276         */
3277        GUJARATI,
3278
3279        /**
3280         * Unicode script "Oriya".
3281         */
3282        ORIYA,
3283
3284        /**
3285         * Unicode script "Tamil".
3286         */
3287        TAMIL,
3288
3289        /**
3290         * Unicode script "Telugu".
3291         */
3292        TELUGU,
3293
3294        /**
3295         * Unicode script "Kannada".
3296         */
3297        KANNADA,
3298
3299        /**
3300         * Unicode script "Malayalam".
3301         */
3302        MALAYALAM,
3303
3304        /**
3305         * Unicode script "Sinhala".
3306         */
3307        SINHALA,
3308
3309        /**
3310         * Unicode script "Thai".
3311         */
3312        THAI,
3313
3314        /**
3315         * Unicode script "Lao".
3316         */
3317        LAO,
3318
3319        /**
3320         * Unicode script "Tibetan".
3321         */
3322        TIBETAN,
3323
3324        /**
3325         * Unicode script "Myanmar".
3326         */
3327        MYANMAR,
3328
3329        /**
3330         * Unicode script "Georgian".
3331         */
3332        GEORGIAN,
3333
3334        /**
3335         * Unicode script "Hangul".
3336         */
3337        HANGUL,
3338
3339        /**
3340         * Unicode script "Ethiopic".
3341         */
3342        ETHIOPIC,
3343
3344        /**
3345         * Unicode script "Cherokee".
3346         */
3347        CHEROKEE,
3348
3349        /**
3350         * Unicode script "Canadian_Aboriginal".
3351         */
3352        CANADIAN_ABORIGINAL,
3353
3354        /**
3355         * Unicode script "Ogham".
3356         */
3357        OGHAM,
3358
3359        /**
3360         * Unicode script "Runic".
3361         */
3362        RUNIC,
3363
3364        /**
3365         * Unicode script "Khmer".
3366         */
3367        KHMER,
3368
3369        /**
3370         * Unicode script "Mongolian".
3371         */
3372        MONGOLIAN,
3373
3374        /**
3375         * Unicode script "Hiragana".
3376         */
3377        HIRAGANA,
3378
3379        /**
3380         * Unicode script "Katakana".
3381         */
3382        KATAKANA,
3383
3384        /**
3385         * Unicode script "Bopomofo".
3386         */
3387        BOPOMOFO,
3388
3389        /**
3390         * Unicode script "Han".
3391         */
3392        HAN,
3393
3394        /**
3395         * Unicode script "Yi".
3396         */
3397        YI,
3398
3399        /**
3400         * Unicode script "Old_Italic".
3401         */
3402        OLD_ITALIC,
3403
3404        /**
3405         * Unicode script "Gothic".
3406         */
3407        GOTHIC,
3408
3409        /**
3410         * Unicode script "Deseret".
3411         */
3412        DESERET,
3413
3414        /**
3415         * Unicode script "Inherited".
3416         */
3417        INHERITED,
3418
3419        /**
3420         * Unicode script "Tagalog".
3421         */
3422        TAGALOG,
3423
3424        /**
3425         * Unicode script "Hanunoo".
3426         */
3427        HANUNOO,
3428
3429        /**
3430         * Unicode script "Buhid".
3431         */
3432        BUHID,
3433
3434        /**
3435         * Unicode script "Tagbanwa".
3436         */
3437        TAGBANWA,
3438
3439        /**
3440         * Unicode script "Limbu".
3441         */
3442        LIMBU,
3443
3444        /**
3445         * Unicode script "Tai_Le".
3446         */
3447        TAI_LE,
3448
3449        /**
3450         * Unicode script "Linear_B".
3451         */
3452        LINEAR_B,
3453
3454        /**
3455         * Unicode script "Ugaritic".
3456         */
3457        UGARITIC,
3458
3459        /**
3460         * Unicode script "Shavian".
3461         */
3462        SHAVIAN,
3463
3464        /**
3465         * Unicode script "Osmanya".
3466         */
3467        OSMANYA,
3468
3469        /**
3470         * Unicode script "Cypriot".
3471         */
3472        CYPRIOT,
3473
3474        /**
3475         * Unicode script "Braille".
3476         */
3477        BRAILLE,
3478
3479        /**
3480         * Unicode script "Buginese".
3481         */
3482        BUGINESE,
3483
3484        /**
3485         * Unicode script "Coptic".
3486         */
3487        COPTIC,
3488
3489        /**
3490         * Unicode script "New_Tai_Lue".
3491         */
3492        NEW_TAI_LUE,
3493
3494        /**
3495         * Unicode script "Glagolitic".
3496         */
3497        GLAGOLITIC,
3498
3499        /**
3500         * Unicode script "Tifinagh".
3501         */
3502        TIFINAGH,
3503
3504        /**
3505         * Unicode script "Syloti_Nagri".
3506         */
3507        SYLOTI_NAGRI,
3508
3509        /**
3510         * Unicode script "Old_Persian".
3511         */
3512        OLD_PERSIAN,
3513
3514        /**
3515         * Unicode script "Kharoshthi".
3516         */
3517        KHAROSHTHI,
3518
3519        /**
3520         * Unicode script "Balinese".
3521         */
3522        BALINESE,
3523
3524        /**
3525         * Unicode script "Cuneiform".
3526         */
3527        CUNEIFORM,
3528
3529        /**
3530         * Unicode script "Phoenician".
3531         */
3532        PHOENICIAN,
3533
3534        /**
3535         * Unicode script "Phags_Pa".
3536         */
3537        PHAGS_PA,
3538
3539        /**
3540         * Unicode script "Nko".
3541         */
3542        NKO,
3543
3544        /**
3545         * Unicode script "Sundanese".
3546         */
3547        SUNDANESE,
3548
3549        /**
3550         * Unicode script "Batak".
3551         */
3552        BATAK,
3553
3554        /**
3555         * Unicode script "Lepcha".
3556         */
3557        LEPCHA,
3558
3559        /**
3560         * Unicode script "Ol_Chiki".
3561         */
3562        OL_CHIKI,
3563
3564        /**
3565         * Unicode script "Vai".
3566         */
3567        VAI,
3568
3569        /**
3570         * Unicode script "Saurashtra".
3571         */
3572        SAURASHTRA,
3573
3574        /**
3575         * Unicode script "Kayah_Li".
3576         */
3577        KAYAH_LI,
3578
3579        /**
3580         * Unicode script "Rejang".
3581         */
3582        REJANG,
3583
3584        /**
3585         * Unicode script "Lycian".
3586         */
3587        LYCIAN,
3588
3589        /**
3590         * Unicode script "Carian".
3591         */
3592        CARIAN,
3593
3594        /**
3595         * Unicode script "Lydian".
3596         */
3597        LYDIAN,
3598
3599        /**
3600         * Unicode script "Cham".
3601         */
3602        CHAM,
3603
3604        /**
3605         * Unicode script "Tai_Tham".
3606         */
3607        TAI_THAM,
3608
3609        /**
3610         * Unicode script "Tai_Viet".
3611         */
3612        TAI_VIET,
3613
3614        /**
3615         * Unicode script "Avestan".
3616         */
3617        AVESTAN,
3618
3619        /**
3620         * Unicode script "Egyptian_Hieroglyphs".
3621         */
3622        EGYPTIAN_HIEROGLYPHS,
3623
3624        /**
3625         * Unicode script "Samaritan".
3626         */
3627        SAMARITAN,
3628
3629        /**
3630         * Unicode script "Mandaic".
3631         */
3632        MANDAIC,
3633
3634        /**
3635         * Unicode script "Lisu".
3636         */
3637        LISU,
3638
3639        /**
3640         * Unicode script "Bamum".
3641         */
3642        BAMUM,
3643
3644        /**
3645         * Unicode script "Javanese".
3646         */
3647        JAVANESE,
3648
3649        /**
3650         * Unicode script "Meetei_Mayek".
3651         */
3652        MEETEI_MAYEK,
3653
3654        /**
3655         * Unicode script "Imperial_Aramaic".
3656         */
3657        IMPERIAL_ARAMAIC,
3658
3659        /**
3660         * Unicode script "Old_South_Arabian".
3661         */
3662        OLD_SOUTH_ARABIAN,
3663
3664        /**
3665         * Unicode script "Inscriptional_Parthian".
3666         */
3667        INSCRIPTIONAL_PARTHIAN,
3668
3669        /**
3670         * Unicode script "Inscriptional_Pahlavi".
3671         */
3672        INSCRIPTIONAL_PAHLAVI,
3673
3674        /**
3675         * Unicode script "Old_Turkic".
3676         */
3677        OLD_TURKIC,
3678
3679        /**
3680         * Unicode script "Brahmi".
3681         */
3682        BRAHMI,
3683
3684        /**
3685         * Unicode script "Kaithi".
3686         */
3687        KAITHI,
3688
3689        /**
3690         * Unicode script "Meroitic Hieroglyphs".
3691         */
3692        MEROITIC_HIEROGLYPHS,
3693
3694        /**
3695         * Unicode script "Meroitic Cursive".
3696         */
3697        MEROITIC_CURSIVE,
3698
3699        /**
3700         * Unicode script "Sora Sompeng".
3701         */
3702        SORA_SOMPENG,
3703
3704        /**
3705         * Unicode script "Chakma".
3706         */
3707        CHAKMA,
3708
3709        /**
3710         * Unicode script "Sharada".
3711         */
3712        SHARADA,
3713
3714        /**
3715         * Unicode script "Takri".
3716         */
3717        TAKRI,
3718
3719        /**
3720         * Unicode script "Miao".
3721         */
3722        MIAO,
3723
3724        /**
3725         * Unicode script "Unknown".
3726         */
3727        UNKNOWN;
3728
3729        private static final int[] scriptStarts = {
3730            0x0000,   // 0000..0040; COMMON
3731            0x0041,   // 0041..005A; LATIN
3732            0x005B,   // 005B..0060; COMMON
3733            0x0061,   // 0061..007A; LATIN
3734            0x007B,   // 007B..00A9; COMMON
3735            0x00AA,   // 00AA..00AA; LATIN
3736            0x00AB,   // 00AB..00B9; COMMON
3737            0x00BA,   // 00BA..00BA; LATIN
3738            0x00BB,   // 00BB..00BF; COMMON
3739            0x00C0,   // 00C0..00D6; LATIN
3740            0x00D7,   // 00D7..00D7; COMMON
3741            0x00D8,   // 00D8..00F6; LATIN
3742            0x00F7,   // 00F7..00F7; COMMON
3743            0x00F8,   // 00F8..02B8; LATIN
3744            0x02B9,   // 02B9..02DF; COMMON
3745            0x02E0,   // 02E0..02E4; LATIN
3746            0x02E5,   // 02E5..02E9; COMMON
3747            0x02EA,   // 02EA..02EB; BOPOMOFO
3748            0x02EC,   // 02EC..02FF; COMMON
3749            0x0300,   // 0300..036F; INHERITED
3750            0x0370,   // 0370..0373; GREEK
3751            0x0374,   // 0374..0374; COMMON
3752            0x0375,   // 0375..037D; GREEK
3753            0x037E,   // 037E..0383; COMMON
3754            0x0384,   // 0384..0384; GREEK
3755            0x0385,   // 0385..0385; COMMON
3756            0x0386,   // 0386..0386; GREEK
3757            0x0387,   // 0387..0387; COMMON
3758            0x0388,   // 0388..03E1; GREEK
3759            0x03E2,   // 03E2..03EF; COPTIC
3760            0x03F0,   // 03F0..03FF; GREEK
3761            0x0400,   // 0400..0484; CYRILLIC
3762            0x0485,   // 0485..0486; INHERITED
3763            0x0487,   // 0487..0530; CYRILLIC
3764            0x0531,   // 0531..0588; ARMENIAN
3765            0x0589,   // 0589..0589; COMMON
3766            0x058A,   // 058A..0590; ARMENIAN
3767            0x0591,   // 0591..05FF; HEBREW
3768            0x0600,   // 0600..060B; ARABIC
3769            0x060C,   // 060C..060C; COMMON
3770            0x060D,   // 060D..061A; ARABIC
3771            0x061B,   // 061B..061D; COMMON
3772            0x061E,   // 061E..061E; ARABIC
3773            0x061F,   // 061F..061F; COMMON
3774            0x0620,   // 0620..063F; ARABIC
3775            0x0640,   // 0640..0640; COMMON
3776            0x0641,   // 0641..064A; ARABIC
3777            0x064B,   // 064B..0655; INHERITED
3778            0x0656,   // 0656..065F; ARABIC
3779            0x0660,   // 0660..0669; COMMON
3780            0x066A,   // 066A..066F; ARABIC
3781            0x0670,   // 0670..0670; INHERITED
3782            0x0671,   // 0671..06DC; ARABIC
3783            0x06DD,   // 06DD..06DD; COMMON
3784            0x06DE,   // 06DE..06FF; ARABIC
3785            0x0700,   // 0700..074F; SYRIAC
3786            0x0750,   // 0750..077F; ARABIC
3787            0x0780,   // 0780..07BF; THAANA
3788            0x07C0,   // 07C0..07FF; NKO
3789            0x0800,   // 0800..083F; SAMARITAN
3790            0x0840,   // 0840..089F; MANDAIC
3791            0x08A0,   // 08A0..08FF; ARABIC
3792            0x0900,   // 0900..0950; DEVANAGARI
3793            0x0951,   // 0951..0952; INHERITED
3794            0x0953,   // 0953..0963; DEVANAGARI
3795            0x0964,   // 0964..0965; COMMON
3796            0x0966,   // 0966..0980; DEVANAGARI
3797            0x0981,   // 0981..0A00; BENGALI
3798            0x0A01,   // 0A01..0A80; GURMUKHI
3799            0x0A81,   // 0A81..0B00; GUJARATI
3800            0x0B01,   // 0B01..0B81; ORIYA
3801            0x0B82,   // 0B82..0C00; TAMIL
3802            0x0C01,   // 0C01..0C81; TELUGU
3803            0x0C82,   // 0C82..0CF0; KANNADA
3804            0x0D02,   // 0D02..0D81; MALAYALAM
3805            0x0D82,   // 0D82..0E00; SINHALA
3806            0x0E01,   // 0E01..0E3E; THAI
3807            0x0E3F,   // 0E3F..0E3F; COMMON
3808            0x0E40,   // 0E40..0E80; THAI
3809            0x0E81,   // 0E81..0EFF; LAO
3810            0x0F00,   // 0F00..0FD4; TIBETAN
3811            0x0FD5,   // 0FD5..0FD8; COMMON
3812            0x0FD9,   // 0FD9..0FFF; TIBETAN
3813            0x1000,   // 1000..109F; MYANMAR
3814            0x10A0,   // 10A0..10FA; GEORGIAN
3815            0x10FB,   // 10FB..10FB; COMMON
3816            0x10FC,   // 10FC..10FF; GEORGIAN
3817            0x1100,   // 1100..11FF; HANGUL
3818            0x1200,   // 1200..139F; ETHIOPIC
3819            0x13A0,   // 13A0..13FF; CHEROKEE
3820            0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3821            0x1680,   // 1680..169F; OGHAM
3822            0x16A0,   // 16A0..16EA; RUNIC
3823            0x16EB,   // 16EB..16ED; COMMON
3824            0x16EE,   // 16EE..16FF; RUNIC
3825            0x1700,   // 1700..171F; TAGALOG
3826            0x1720,   // 1720..1734; HANUNOO
3827            0x1735,   // 1735..173F; COMMON
3828            0x1740,   // 1740..175F; BUHID
3829            0x1760,   // 1760..177F; TAGBANWA
3830            0x1780,   // 1780..17FF; KHMER
3831            0x1800,   // 1800..1801; MONGOLIAN
3832            0x1802,   // 1802..1803; COMMON
3833            0x1804,   // 1804..1804; MONGOLIAN
3834            0x1805,   // 1805..1805; COMMON
3835            0x1806,   // 1806..18AF; MONGOLIAN
3836            0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3837            0x1900,   // 1900..194F; LIMBU
3838            0x1950,   // 1950..197F; TAI_LE
3839            0x1980,   // 1980..19DF; NEW_TAI_LUE
3840            0x19E0,   // 19E0..19FF; KHMER
3841            0x1A00,   // 1A00..1A1F; BUGINESE
3842            0x1A20,   // 1A20..1AFF; TAI_THAM
3843            0x1B00,   // 1B00..1B7F; BALINESE
3844            0x1B80,   // 1B80..1BBF; SUNDANESE
3845            0x1BC0,   // 1BC0..1BFF; BATAK
3846            0x1C00,   // 1C00..1C4F; LEPCHA
3847            0x1C50,   // 1C50..1CBF; OL_CHIKI
3848            0x1CC0,   // 1CC0..1CCF; SUNDANESE
3849            0x1CD0,   // 1CD0..1CD2; INHERITED
3850            0x1CD3,   // 1CD3..1CD3; COMMON
3851            0x1CD4,   // 1CD4..1CE0; INHERITED
3852            0x1CE1,   // 1CE1..1CE1; COMMON
3853            0x1CE2,   // 1CE2..1CE8; INHERITED
3854            0x1CE9,   // 1CE9..1CEC; COMMON
3855            0x1CED,   // 1CED..1CED; INHERITED
3856            0x1CEE,   // 1CEE..1CF3; COMMON
3857            0x1CF4,   // 1CF4..1CF4; INHERITED
3858            0x1CF5,   // 1CF5..1CFF; COMMON
3859            0x1D00,   // 1D00..1D25; LATIN
3860            0x1D26,   // 1D26..1D2A; GREEK
3861            0x1D2B,   // 1D2B..1D2B; CYRILLIC
3862            0x1D2C,   // 1D2C..1D5C; LATIN
3863            0x1D5D,   // 1D5D..1D61; GREEK
3864            0x1D62,   // 1D62..1D65; LATIN
3865            0x1D66,   // 1D66..1D6A; GREEK
3866            0x1D6B,   // 1D6B..1D77; LATIN
3867            0x1D78,   // 1D78..1D78; CYRILLIC
3868            0x1D79,   // 1D79..1DBE; LATIN
3869            0x1DBF,   // 1DBF..1DBF; GREEK
3870            0x1DC0,   // 1DC0..1DFF; INHERITED
3871            0x1E00,   // 1E00..1EFF; LATIN
3872            0x1F00,   // 1F00..1FFF; GREEK
3873            0x2000,   // 2000..200B; COMMON
3874            0x200C,   // 200C..200D; INHERITED
3875            0x200E,   // 200E..2070; COMMON
3876            0x2071,   // 2071..2073; LATIN
3877            0x2074,   // 2074..207E; COMMON
3878            0x207F,   // 207F..207F; LATIN
3879            0x2080,   // 2080..208F; COMMON
3880            0x2090,   // 2090..209F; LATIN
3881            0x20A0,   // 20A0..20CF; COMMON
3882            0x20D0,   // 20D0..20FF; INHERITED
3883            0x2100,   // 2100..2125; COMMON
3884            0x2126,   // 2126..2126; GREEK
3885            0x2127,   // 2127..2129; COMMON
3886            0x212A,   // 212A..212B; LATIN
3887            0x212C,   // 212C..2131; COMMON
3888            0x2132,   // 2132..2132; LATIN
3889            0x2133,   // 2133..214D; COMMON
3890            0x214E,   // 214E..214E; LATIN
3891            0x214F,   // 214F..215F; COMMON
3892            0x2160,   // 2160..2188; LATIN
3893            0x2189,   // 2189..27FF; COMMON
3894            0x2800,   // 2800..28FF; BRAILLE
3895            0x2900,   // 2900..2BFF; COMMON
3896            0x2C00,   // 2C00..2C5F; GLAGOLITIC
3897            0x2C60,   // 2C60..2C7F; LATIN
3898            0x2C80,   // 2C80..2CFF; COPTIC
3899            0x2D00,   // 2D00..2D2F; GEORGIAN
3900            0x2D30,   // 2D30..2D7F; TIFINAGH
3901            0x2D80,   // 2D80..2DDF; ETHIOPIC
3902            0x2DE0,   // 2DE0..2DFF; CYRILLIC
3903            0x2E00,   // 2E00..2E7F; COMMON
3904            0x2E80,   // 2E80..2FEF; HAN
3905            0x2FF0,   // 2FF0..3004; COMMON
3906            0x3005,   // 3005..3005; HAN
3907            0x3006,   // 3006..3006; COMMON
3908            0x3007,   // 3007..3007; HAN
3909            0x3008,   // 3008..3020; COMMON
3910            0x3021,   // 3021..3029; HAN
3911            0x302A,   // 302A..302D; INHERITED
3912            0x302E,   // 302E..302F; HANGUL
3913            0x3030,   // 3030..3037; COMMON
3914            0x3038,   // 3038..303B; HAN
3915            0x303C,   // 303C..3040; COMMON
3916            0x3041,   // 3041..3098; HIRAGANA
3917            0x3099,   // 3099..309A; INHERITED
3918            0x309B,   // 309B..309C; COMMON
3919            0x309D,   // 309D..309F; HIRAGANA
3920            0x30A0,   // 30A0..30A0; COMMON
3921            0x30A1,   // 30A1..30FA; KATAKANA
3922            0x30FB,   // 30FB..30FC; COMMON
3923            0x30FD,   // 30FD..3104; KATAKANA
3924            0x3105,   // 3105..3130; BOPOMOFO
3925            0x3131,   // 3131..318F; HANGUL
3926            0x3190,   // 3190..319F; COMMON
3927            0x31A0,   // 31A0..31BF; BOPOMOFO
3928            0x31C0,   // 31C0..31EF; COMMON
3929            0x31F0,   // 31F0..31FF; KATAKANA
3930            0x3200,   // 3200..321F; HANGUL
3931            0x3220,   // 3220..325F; COMMON
3932            0x3260,   // 3260..327E; HANGUL
3933            0x327F,   // 327F..32CF; COMMON
3934            0x32D0,   // 32D0..3357; KATAKANA
3935            0x3358,   // 3358..33FF; COMMON
3936            0x3400,   // 3400..4DBF; HAN
3937            0x4DC0,   // 4DC0..4DFF; COMMON
3938            0x4E00,   // 4E00..9FFF; HAN
3939            0xA000,   // A000..A4CF; YI
3940            0xA4D0,   // A4D0..A4FF; LISU
3941            0xA500,   // A500..A63F; VAI
3942            0xA640,   // A640..A69F; CYRILLIC
3943            0xA6A0,   // A6A0..A6FF; BAMUM
3944            0xA700,   // A700..A721; COMMON
3945            0xA722,   // A722..A787; LATIN
3946            0xA788,   // A788..A78A; COMMON
3947            0xA78B,   // A78B..A7FF; LATIN
3948            0xA800,   // A800..A82F; SYLOTI_NAGRI
3949            0xA830,   // A830..A83F; COMMON
3950            0xA840,   // A840..A87F; PHAGS_PA
3951            0xA880,   // A880..A8DF; SAURASHTRA
3952            0xA8E0,   // A8E0..A8FF; DEVANAGARI
3953            0xA900,   // A900..A92F; KAYAH_LI
3954            0xA930,   // A930..A95F; REJANG
3955            0xA960,   // A960..A97F; HANGUL
3956            0xA980,   // A980..A9FF; JAVANESE
3957            0xAA00,   // AA00..AA5F; CHAM
3958            0xAA60,   // AA60..AA7F; MYANMAR
3959            0xAA80,   // AA80..AADF; TAI_VIET
3960            0xAAE0,   // AAE0..AB00; MEETEI_MAYEK
3961            0xAB01,   // AB01..ABBF; ETHIOPIC
3962            0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3963            0xAC00,   // AC00..D7FB; HANGUL
3964            0xD7FC,   // D7FC..F8FF; UNKNOWN
3965            0xF900,   // F900..FAFF; HAN
3966            0xFB00,   // FB00..FB12; LATIN
3967            0xFB13,   // FB13..FB1C; ARMENIAN
3968            0xFB1D,   // FB1D..FB4F; HEBREW
3969            0xFB50,   // FB50..FD3D; ARABIC
3970            0xFD3E,   // FD3E..FD4F; COMMON
3971            0xFD50,   // FD50..FDFC; ARABIC
3972            0xFDFD,   // FDFD..FDFF; COMMON
3973            0xFE00,   // FE00..FE0F; INHERITED
3974            0xFE10,   // FE10..FE1F; COMMON
3975            0xFE20,   // FE20..FE2F; INHERITED
3976            0xFE30,   // FE30..FE6F; COMMON
3977            0xFE70,   // FE70..FEFE; ARABIC
3978            0xFEFF,   // FEFF..FF20; COMMON
3979            0xFF21,   // FF21..FF3A; LATIN
3980            0xFF3B,   // FF3B..FF40; COMMON
3981            0xFF41,   // FF41..FF5A; LATIN
3982            0xFF5B,   // FF5B..FF65; COMMON
3983            0xFF66,   // FF66..FF6F; KATAKANA
3984            0xFF70,   // FF70..FF70; COMMON
3985            0xFF71,   // FF71..FF9D; KATAKANA
3986            0xFF9E,   // FF9E..FF9F; COMMON
3987            0xFFA0,   // FFA0..FFDF; HANGUL
3988            0xFFE0,   // FFE0..FFFF; COMMON
3989            0x10000,  // 10000..100FF; LINEAR_B
3990            0x10100,  // 10100..1013F; COMMON
3991            0x10140,  // 10140..1018F; GREEK
3992            0x10190,  // 10190..101FC; COMMON
3993            0x101FD,  // 101FD..1027F; INHERITED
3994            0x10280,  // 10280..1029F; LYCIAN
3995            0x102A0,  // 102A0..102FF; CARIAN
3996            0x10300,  // 10300..1032F; OLD_ITALIC
3997            0x10330,  // 10330..1037F; GOTHIC
3998            0x10380,  // 10380..1039F; UGARITIC
3999            0x103A0,  // 103A0..103FF; OLD_PERSIAN
4000            0x10400,  // 10400..1044F; DESERET
4001            0x10450,  // 10450..1047F; SHAVIAN
4002            0x10480,  // 10480..107FF; OSMANYA
4003            0x10800,  // 10800..1083F; CYPRIOT
4004            0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
4005            0x10900,  // 10900..1091F; PHOENICIAN
4006            0x10920,  // 10920..1097F; LYDIAN
4007            0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
4008            0x109A0,  // 109A0..109FF; MEROITIC_CURSIVE
4009            0x10A00,  // 10A00..10A5F; KHAROSHTHI
4010            0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
4011            0x10B00,  // 10B00..10B3F; AVESTAN
4012            0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
4013            0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
4014            0x10C00,  // 10C00..10E5F; OLD_TURKIC
4015            0x10E60,  // 10E60..10FFF; ARABIC
4016            0x11000,  // 11000..1107F; BRAHMI
4017            0x11080,  // 11080..110CF; KAITHI
4018            0x110D0,  // 110D0..110FF; SORA_SOMPENG
4019            0x11100,  // 11100..1117F; CHAKMA
4020            0x11180,  // 11180..1167F; SHARADA
4021            0x11680,  // 11680..116CF; TAKRI
4022            0x12000,  // 12000..12FFF; CUNEIFORM
4023            0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
4024            0x16800,  // 16800..16A38; BAMUM
4025            0x16F00,  // 16F00..16F9F; MIAO
4026            0x1B000,  // 1B000..1B000; KATAKANA
4027            0x1B001,  // 1B001..1CFFF; HIRAGANA
4028            0x1D000,  // 1D000..1D166; COMMON
4029            0x1D167,  // 1D167..1D169; INHERITED
4030            0x1D16A,  // 1D16A..1D17A; COMMON
4031            0x1D17B,  // 1D17B..1D182; INHERITED
4032            0x1D183,  // 1D183..1D184; COMMON
4033            0x1D185,  // 1D185..1D18B; INHERITED
4034            0x1D18C,  // 1D18C..1D1A9; COMMON
4035            0x1D1AA,  // 1D1AA..1D1AD; INHERITED
4036            0x1D1AE,  // 1D1AE..1D1FF; COMMON
4037            0x1D200,  // 1D200..1D2FF; GREEK
4038            0x1D300,  // 1D300..1EDFF; COMMON
4039            0x1EE00,  // 1EE00..1EFFF; ARABIC
4040            0x1F000,  // 1F000..1F1FF; COMMON
4041            0x1F200,  // 1F200..1F200; HIRAGANA
4042            0x1F201,  // 1F210..1FFFF; COMMON
4043            0x20000,  // 20000..E0000; HAN
4044            0xE0001,  // E0001..E00FF; COMMON
4045            0xE0100,  // E0100..E01EF; INHERITED
4046            0xE01F0   // E01F0..10FFFF; UNKNOWN
4047
4048        };
4049
4050        private static final UnicodeScript[] scripts = {
4051            COMMON,
4052            LATIN,
4053            COMMON,
4054            LATIN,
4055            COMMON,
4056            LATIN,
4057            COMMON,
4058            LATIN,
4059            COMMON,
4060            LATIN,
4061            COMMON,
4062            LATIN,
4063            COMMON,
4064            LATIN,
4065            COMMON,
4066            LATIN,
4067            COMMON,
4068            BOPOMOFO,
4069            COMMON,
4070            INHERITED,
4071            GREEK,
4072            COMMON,
4073            GREEK,
4074            COMMON,
4075            GREEK,
4076            COMMON,
4077            GREEK,
4078            COMMON,
4079            GREEK,
4080            COPTIC,
4081            GREEK,
4082            CYRILLIC,
4083            INHERITED,
4084            CYRILLIC,
4085            ARMENIAN,
4086            COMMON,
4087            ARMENIAN,
4088            HEBREW,
4089            ARABIC,
4090            COMMON,
4091            ARABIC,
4092            COMMON,
4093            ARABIC,
4094            COMMON,
4095            ARABIC,
4096            COMMON,
4097            ARABIC,
4098            INHERITED,
4099            ARABIC,
4100            COMMON,
4101            ARABIC,
4102            INHERITED,
4103            ARABIC,
4104            COMMON,
4105            ARABIC,
4106            SYRIAC,
4107            ARABIC,
4108            THAANA,
4109            NKO,
4110            SAMARITAN,
4111            MANDAIC,
4112            ARABIC,
4113            DEVANAGARI,
4114            INHERITED,
4115            DEVANAGARI,
4116            COMMON,
4117            DEVANAGARI,
4118            BENGALI,
4119            GURMUKHI,
4120            GUJARATI,
4121            ORIYA,
4122            TAMIL,
4123            TELUGU,
4124            KANNADA,
4125            MALAYALAM,
4126            SINHALA,
4127            THAI,
4128            COMMON,
4129            THAI,
4130            LAO,
4131            TIBETAN,
4132            COMMON,
4133            TIBETAN,
4134            MYANMAR,
4135            GEORGIAN,
4136            COMMON,
4137            GEORGIAN,
4138            HANGUL,
4139            ETHIOPIC,
4140            CHEROKEE,
4141            CANADIAN_ABORIGINAL,
4142            OGHAM,
4143            RUNIC,
4144            COMMON,
4145            RUNIC,
4146            TAGALOG,
4147            HANUNOO,
4148            COMMON,
4149            BUHID,
4150            TAGBANWA,
4151            KHMER,
4152            MONGOLIAN,
4153            COMMON,
4154            MONGOLIAN,
4155            COMMON,
4156            MONGOLIAN,
4157            CANADIAN_ABORIGINAL,
4158            LIMBU,
4159            TAI_LE,
4160            NEW_TAI_LUE,
4161            KHMER,
4162            BUGINESE,
4163            TAI_THAM,
4164            BALINESE,
4165            SUNDANESE,
4166            BATAK,
4167            LEPCHA,
4168            OL_CHIKI,
4169            SUNDANESE,
4170            INHERITED,
4171            COMMON,
4172            INHERITED,
4173            COMMON,
4174            INHERITED,
4175            COMMON,
4176            INHERITED,
4177            COMMON,
4178            INHERITED,
4179            COMMON,
4180            LATIN,
4181            GREEK,
4182            CYRILLIC,
4183            LATIN,
4184            GREEK,
4185            LATIN,
4186            GREEK,
4187            LATIN,
4188            CYRILLIC,
4189            LATIN,
4190            GREEK,
4191            INHERITED,
4192            LATIN,
4193            GREEK,
4194            COMMON,
4195            INHERITED,
4196            COMMON,
4197            LATIN,
4198            COMMON,
4199            LATIN,
4200            COMMON,
4201            LATIN,
4202            COMMON,
4203            INHERITED,
4204            COMMON,
4205            GREEK,
4206            COMMON,
4207            LATIN,
4208            COMMON,
4209            LATIN,
4210            COMMON,
4211            LATIN,
4212            COMMON,
4213            LATIN,
4214            COMMON,
4215            BRAILLE,
4216            COMMON,
4217            GLAGOLITIC,
4218            LATIN,
4219            COPTIC,
4220            GEORGIAN,
4221            TIFINAGH,
4222            ETHIOPIC,
4223            CYRILLIC,
4224            COMMON,
4225            HAN,
4226            COMMON,
4227            HAN,
4228            COMMON,
4229            HAN,
4230            COMMON,
4231            HAN,
4232            INHERITED,
4233            HANGUL,
4234            COMMON,
4235            HAN,
4236            COMMON,
4237            HIRAGANA,
4238            INHERITED,
4239            COMMON,
4240            HIRAGANA,
4241            COMMON,
4242            KATAKANA,
4243            COMMON,
4244            KATAKANA,
4245            BOPOMOFO,
4246            HANGUL,
4247            COMMON,
4248            BOPOMOFO,
4249            COMMON,
4250            KATAKANA,
4251            HANGUL,
4252            COMMON,
4253            HANGUL,
4254            COMMON,
4255            KATAKANA,
4256            COMMON,
4257            HAN,
4258            COMMON,
4259            HAN,
4260            YI,
4261            LISU,
4262            VAI,
4263            CYRILLIC,
4264            BAMUM,
4265            COMMON,
4266            LATIN,
4267            COMMON,
4268            LATIN,
4269            SYLOTI_NAGRI,
4270            COMMON,
4271            PHAGS_PA,
4272            SAURASHTRA,
4273            DEVANAGARI,
4274            KAYAH_LI,
4275            REJANG,
4276            HANGUL,
4277            JAVANESE,
4278            CHAM,
4279            MYANMAR,
4280            TAI_VIET,
4281            MEETEI_MAYEK,
4282            ETHIOPIC,
4283            MEETEI_MAYEK,
4284            HANGUL,
4285            UNKNOWN     ,
4286            HAN,
4287            LATIN,
4288            ARMENIAN,
4289            HEBREW,
4290            ARABIC,
4291            COMMON,
4292            ARABIC,
4293            COMMON,
4294            INHERITED,
4295            COMMON,
4296            INHERITED,
4297            COMMON,
4298            ARABIC,
4299            COMMON,
4300            LATIN,
4301            COMMON,
4302            LATIN,
4303            COMMON,
4304            KATAKANA,
4305            COMMON,
4306            KATAKANA,
4307            COMMON,
4308            HANGUL,
4309            COMMON,
4310            LINEAR_B,
4311            COMMON,
4312            GREEK,
4313            COMMON,
4314            INHERITED,
4315            LYCIAN,
4316            CARIAN,
4317            OLD_ITALIC,
4318            GOTHIC,
4319            UGARITIC,
4320            OLD_PERSIAN,
4321            DESERET,
4322            SHAVIAN,
4323            OSMANYA,
4324            CYPRIOT,
4325            IMPERIAL_ARAMAIC,
4326            PHOENICIAN,
4327            LYDIAN,
4328            MEROITIC_HIEROGLYPHS,
4329            MEROITIC_CURSIVE,
4330            KHAROSHTHI,
4331            OLD_SOUTH_ARABIAN,
4332            AVESTAN,
4333            INSCRIPTIONAL_PARTHIAN,
4334            INSCRIPTIONAL_PAHLAVI,
4335            OLD_TURKIC,
4336            ARABIC,
4337            BRAHMI,
4338            KAITHI,
4339            SORA_SOMPENG,
4340            CHAKMA,
4341            SHARADA,
4342            TAKRI,
4343            CUNEIFORM,
4344            EGYPTIAN_HIEROGLYPHS,
4345            BAMUM,
4346            MIAO,
4347            KATAKANA,
4348            HIRAGANA,
4349            COMMON,
4350            INHERITED,
4351            COMMON,
4352            INHERITED,
4353            COMMON,
4354            INHERITED,
4355            COMMON,
4356            INHERITED,
4357            COMMON,
4358            GREEK,
4359            COMMON,
4360            ARABIC,
4361            COMMON,
4362            HIRAGANA,
4363            COMMON,
4364            HAN,
4365            COMMON,
4366            INHERITED,
4367            UNKNOWN
4368        };
4369
4370        private static HashMap<String, Character.UnicodeScript> aliases;
4371        static {
4372            aliases = new HashMap<>(128);
4373            aliases.put("ARAB", ARABIC);
4374            aliases.put("ARMI", IMPERIAL_ARAMAIC);
4375            aliases.put("ARMN", ARMENIAN);
4376            aliases.put("AVST", AVESTAN);
4377            aliases.put("BALI", BALINESE);
4378            aliases.put("BAMU", BAMUM);
4379            aliases.put("BATK", BATAK);
4380            aliases.put("BENG", BENGALI);
4381            aliases.put("BOPO", BOPOMOFO);
4382            aliases.put("BRAI", BRAILLE);
4383            aliases.put("BRAH", BRAHMI);
4384            aliases.put("BUGI", BUGINESE);
4385            aliases.put("BUHD", BUHID);
4386            aliases.put("CAKM", CHAKMA);
4387            aliases.put("CANS", CANADIAN_ABORIGINAL);
4388            aliases.put("CARI", CARIAN);
4389            aliases.put("CHAM", CHAM);
4390            aliases.put("CHER", CHEROKEE);
4391            aliases.put("COPT", COPTIC);
4392            aliases.put("CPRT", CYPRIOT);
4393            aliases.put("CYRL", CYRILLIC);
4394            aliases.put("DEVA", DEVANAGARI);
4395            aliases.put("DSRT", DESERET);
4396            aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4397            aliases.put("ETHI", ETHIOPIC);
4398            aliases.put("GEOR", GEORGIAN);
4399            aliases.put("GLAG", GLAGOLITIC);
4400            aliases.put("GOTH", GOTHIC);
4401            aliases.put("GREK", GREEK);
4402            aliases.put("GUJR", GUJARATI);
4403            aliases.put("GURU", GURMUKHI);
4404            aliases.put("HANG", HANGUL);
4405            aliases.put("HANI", HAN);
4406            aliases.put("HANO", HANUNOO);
4407            aliases.put("HEBR", HEBREW);
4408            aliases.put("HIRA", HIRAGANA);
4409            // it appears we don't have the KATAKANA_OR_HIRAGANA
4410            //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4411            aliases.put("ITAL", OLD_ITALIC);
4412            aliases.put("JAVA", JAVANESE);
4413            aliases.put("KALI", KAYAH_LI);
4414            aliases.put("KANA", KATAKANA);
4415            aliases.put("KHAR", KHAROSHTHI);
4416            aliases.put("KHMR", KHMER);
4417            aliases.put("KNDA", KANNADA);
4418            aliases.put("KTHI", KAITHI);
4419            aliases.put("LANA", TAI_THAM);
4420            aliases.put("LAOO", LAO);
4421            aliases.put("LATN", LATIN);
4422            aliases.put("LEPC", LEPCHA);
4423            aliases.put("LIMB", LIMBU);
4424            aliases.put("LINB", LINEAR_B);
4425            aliases.put("LISU", LISU);
4426            aliases.put("LYCI", LYCIAN);
4427            aliases.put("LYDI", LYDIAN);
4428            aliases.put("MAND", MANDAIC);
4429            aliases.put("MERC", MEROITIC_CURSIVE);
4430            aliases.put("MERO", MEROITIC_HIEROGLYPHS);
4431            aliases.put("MLYM", MALAYALAM);
4432            aliases.put("MONG", MONGOLIAN);
4433            aliases.put("MTEI", MEETEI_MAYEK);
4434            aliases.put("MYMR", MYANMAR);
4435            aliases.put("NKOO", NKO);
4436            aliases.put("OGAM", OGHAM);
4437            aliases.put("OLCK", OL_CHIKI);
4438            aliases.put("ORKH", OLD_TURKIC);
4439            aliases.put("ORYA", ORIYA);
4440            aliases.put("OSMA", OSMANYA);
4441            aliases.put("PHAG", PHAGS_PA);
4442            aliases.put("PLRD", MIAO);
4443            aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4444            aliases.put("PHNX", PHOENICIAN);
4445            aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4446            aliases.put("RJNG", REJANG);
4447            aliases.put("RUNR", RUNIC);
4448            aliases.put("SAMR", SAMARITAN);
4449            aliases.put("SARB", OLD_SOUTH_ARABIAN);
4450            aliases.put("SAUR", SAURASHTRA);
4451            aliases.put("SHAW", SHAVIAN);
4452            aliases.put("SHRD", SHARADA);
4453            aliases.put("SINH", SINHALA);
4454            aliases.put("SORA", SORA_SOMPENG);
4455            aliases.put("SUND", SUNDANESE);
4456            aliases.put("SYLO", SYLOTI_NAGRI);
4457            aliases.put("SYRC", SYRIAC);
4458            aliases.put("TAGB", TAGBANWA);
4459            aliases.put("TALE", TAI_LE);
4460            aliases.put("TAKR", TAKRI);
4461            aliases.put("TALU", NEW_TAI_LUE);
4462            aliases.put("TAML", TAMIL);
4463            aliases.put("TAVT", TAI_VIET);
4464            aliases.put("TELU", TELUGU);
4465            aliases.put("TFNG", TIFINAGH);
4466            aliases.put("TGLG", TAGALOG);
4467            aliases.put("THAA", THAANA);
4468            aliases.put("THAI", THAI);
4469            aliases.put("TIBT", TIBETAN);
4470            aliases.put("UGAR", UGARITIC);
4471            aliases.put("VAII", VAI);
4472            aliases.put("XPEO", OLD_PERSIAN);
4473            aliases.put("XSUX", CUNEIFORM);
4474            aliases.put("YIII", YI);
4475            aliases.put("ZINH", INHERITED);
4476            aliases.put("ZYYY", COMMON);
4477            aliases.put("ZZZZ", UNKNOWN);
4478        }
4479
4480        /**
4481         * Returns the enum constant representing the Unicode script of which
4482         * the given character (Unicode code point) is assigned to.
4483         *
4484         * @param   codePoint the character (Unicode code point) in question.
4485         * @return  The {@code UnicodeScript} constant representing the
4486         *          Unicode script of which this character is assigned to.
4487         *
4488         * @exception IllegalArgumentException if the specified
4489         * {@code codePoint} is an invalid Unicode code point.
4490         * @see Character#isValidCodePoint(int)
4491         *
4492         */
4493        public static UnicodeScript of(int codePoint) {
4494            if (!isValidCodePoint(codePoint))
4495                throw new IllegalArgumentException();
4496            int type = getType(codePoint);
4497            // leave SURROGATE and PRIVATE_USE for table lookup
4498            if (type == UNASSIGNED)
4499                return UNKNOWN;
4500            int index = Arrays.binarySearch(scriptStarts, codePoint);
4501            if (index < 0)
4502                index = -index - 2;
4503            return scripts[index];
4504        }
4505
4506        /**
4507         * Returns the UnicodeScript constant with the given Unicode script
4508         * name or the script name alias. Script names and their aliases are
4509         * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4510         * and PropertyValueAliases&lt;version&gt;.txt define script names
4511         * and the script name aliases for a particular version of the
4512         * standard. The {@link Character} class specifies the version of
4513         * the standard that it supports.
4514         * <p>
4515         * Character case is ignored for all of the valid script names.
4516         * The en_US locale's case mapping rules are used to provide
4517         * case-insensitive string comparisons for script name validation.
4518         * <p>
4519         *
4520         * @param scriptName A {@code UnicodeScript} name.
4521         * @return The {@code UnicodeScript} constant identified
4522         *         by {@code scriptName}
4523         * @throws IllegalArgumentException if {@code scriptName} is an
4524         *         invalid name
4525         * @throws NullPointerException if {@code scriptName} is null
4526         */
4527        public static final UnicodeScript forName(String scriptName) {
4528            scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4529                                 //.replace(' ', '_'));
4530            UnicodeScript sc = aliases.get(scriptName);
4531            if (sc != null)
4532                return sc;
4533            return valueOf(scriptName);
4534        }
4535    }
4536
4537    /**
4538     * The value of the {@code Character}.
4539     *
4540     * @serial
4541     */
4542    private final char value;
4543
4544    /** use serialVersionUID from JDK 1.0.2 for interoperability */
4545    private static final long serialVersionUID = 3786198910865385080L;
4546
4547    /**
4548     * Constructs a newly allocated {@code Character} object that
4549     * represents the specified {@code char} value.
4550     *
4551     * @param  value   the value to be represented by the
4552     *                  {@code Character} object.
4553     */
4554    public Character(char value) {
4555        this.value = value;
4556    }
4557
4558    private static class CharacterCache {
4559        private CharacterCache(){}
4560
4561        static final Character cache[] = new Character[127 + 1];
4562
4563        static {
4564            for (int i = 0; i < cache.length; i++)
4565                cache[i] = new Character((char)i);
4566        }
4567    }
4568
4569    /**
4570     * Returns a <tt>Character</tt> instance representing the specified
4571     * <tt>char</tt> value.
4572     * If a new <tt>Character</tt> instance is not required, this method
4573     * should generally be used in preference to the constructor
4574     * {@link #Character(char)}, as this method is likely to yield
4575     * significantly better space and time performance by caching
4576     * frequently requested values.
4577     *
4578     * This method will always cache values in the range {@code
4579     * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4580     * cache other values outside of this range.
4581     *
4582     * @param  c a char value.
4583     * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4584     * @since  1.5
4585     */
4586    public static Character valueOf(char c) {
4587        if (c <= 127) { // must cache
4588            return CharacterCache.cache[(int)c];
4589        }
4590        return new Character(c);
4591    }
4592
4593    /**
4594     * Returns the value of this {@code Character} object.
4595     * @return  the primitive {@code char} value represented by
4596     *          this object.
4597     */
4598    public char charValue() {
4599        return value;
4600    }
4601
4602    /**
4603     * Returns a hash code for this {@code Character}; equal to the result
4604     * of invoking {@code charValue()}.
4605     *
4606     * @return a hash code value for this {@code Character}
4607     */
4608    @Override
4609    public int hashCode() {
4610        return Character.hashCode(value);
4611    }
4612
4613    /**
4614     * Returns a hash code for a {@code char} value; compatible with
4615     * {@code Character.hashCode()}.
4616     *
4617     * @since 1.8
4618     *
4619     * @param value The {@code char} for which to return a hash code.
4620     * @return a hash code value for a {@code char} value.
4621     */
4622    public static int hashCode(char value) {
4623        return (int)value;
4624    }
4625
4626    /**
4627     * Compares this object against the specified object.
4628     * The result is {@code true} if and only if the argument is not
4629     * {@code null} and is a {@code Character} object that
4630     * represents the same {@code char} value as this object.
4631     *
4632     * @param   obj   the object to compare with.
4633     * @return  {@code true} if the objects are the same;
4634     *          {@code false} otherwise.
4635     */
4636    public boolean equals(Object obj) {
4637        if (obj instanceof Character) {
4638            return value == ((Character)obj).charValue();
4639        }
4640        return false;
4641    }
4642
4643    /**
4644     * Returns a {@code String} object representing this
4645     * {@code Character}'s value.  The result is a string of
4646     * length 1 whose sole component is the primitive
4647     * {@code char} value represented by this
4648     * {@code Character} object.
4649     *
4650     * @return  a string representation of this object.
4651     */
4652    public String toString() {
4653        char buf[] = {value};
4654        return String.valueOf(buf);
4655    }
4656
4657    /**
4658     * Returns a {@code String} object representing the
4659     * specified {@code char}.  The result is a string of length
4660     * 1 consisting solely of the specified {@code char}.
4661     *
4662     * @param c the {@code char} to be converted
4663     * @return the string representation of the specified {@code char}
4664     * @since 1.4
4665     */
4666    public static String toString(char c) {
4667        return String.valueOf(c);
4668    }
4669
4670    /**
4671     * Determines whether the specified code point is a valid
4672     * <a href="http://www.unicode.org/glossary/#code_point">
4673     * Unicode code point value</a>.
4674     *
4675     * @param  codePoint the Unicode code point to be tested
4676     * @return {@code true} if the specified code point value is between
4677     *         {@link #MIN_CODE_POINT} and
4678     *         {@link #MAX_CODE_POINT} inclusive;
4679     *         {@code false} otherwise.
4680     * @since  1.5
4681     */
4682    public static boolean isValidCodePoint(int codePoint) {
4683        // Optimized form of:
4684        //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4685        int plane = codePoint >>> 16;
4686        return plane < ((MAX_CODE_POINT + 1) >>> 16);
4687    }
4688
4689    /**
4690     * Determines whether the specified character (Unicode code point)
4691     * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4692     * Such code points can be represented using a single {@code char}.
4693     *
4694     * @param  codePoint the character (Unicode code point) to be tested
4695     * @return {@code true} if the specified code point is between
4696     *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4697     *         {@code false} otherwise.
4698     * @since  1.7
4699     */
4700    public static boolean isBmpCodePoint(int codePoint) {
4701        return codePoint >>> 16 == 0;
4702        // Optimized form of:
4703        //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4704        // We consistently use logical shift (>>>) to facilitate
4705        // additional runtime optimizations.
4706    }
4707
4708    /**
4709     * Determines whether the specified character (Unicode code point)
4710     * is in the <a href="#supplementary">supplementary character</a> range.
4711     *
4712     * @param  codePoint the character (Unicode code point) to be tested
4713     * @return {@code true} if the specified code point is between
4714     *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4715     *         {@link #MAX_CODE_POINT} inclusive;
4716     *         {@code false} otherwise.
4717     * @since  1.5
4718     */
4719    public static boolean isSupplementaryCodePoint(int codePoint) {
4720        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4721            && codePoint <  MAX_CODE_POINT + 1;
4722    }
4723
4724    /**
4725     * Determines if the given {@code char} value is a
4726     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4727     * Unicode high-surrogate code unit</a>
4728     * (also known as <i>leading-surrogate code unit</i>).
4729     *
4730     * <p>Such values do not represent characters by themselves,
4731     * but are used in the representation of
4732     * <a href="#supplementary">supplementary characters</a>
4733     * in the UTF-16 encoding.
4734     *
4735     * @param  ch the {@code char} value to be tested.
4736     * @return {@code true} if the {@code char} value is between
4737     *         {@link #MIN_HIGH_SURROGATE} and
4738     *         {@link #MAX_HIGH_SURROGATE} inclusive;
4739     *         {@code false} otherwise.
4740     * @see    Character#isLowSurrogate(char)
4741     * @see    Character.UnicodeBlock#of(int)
4742     * @since  1.5
4743     */
4744    public static boolean isHighSurrogate(char ch) {
4745        // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4746        return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4747    }
4748
4749    /**
4750     * Determines if the given {@code char} value is a
4751     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4752     * Unicode low-surrogate code unit</a>
4753     * (also known as <i>trailing-surrogate code unit</i>).
4754     *
4755     * <p>Such values do not represent characters by themselves,
4756     * but are used in the representation of
4757     * <a href="#supplementary">supplementary characters</a>
4758     * in the UTF-16 encoding.
4759     *
4760     * @param  ch the {@code char} value to be tested.
4761     * @return {@code true} if the {@code char} value is between
4762     *         {@link #MIN_LOW_SURROGATE} and
4763     *         {@link #MAX_LOW_SURROGATE} inclusive;
4764     *         {@code false} otherwise.
4765     * @see    Character#isHighSurrogate(char)
4766     * @since  1.5
4767     */
4768    public static boolean isLowSurrogate(char ch) {
4769        return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4770    }
4771
4772    /**
4773     * Determines if the given {@code char} value is a Unicode
4774     * <i>surrogate code unit</i>.
4775     *
4776     * <p>Such values do not represent characters by themselves,
4777     * but are used in the representation of
4778     * <a href="#supplementary">supplementary characters</a>
4779     * in the UTF-16 encoding.
4780     *
4781     * <p>A char value is a surrogate code unit if and only if it is either
4782     * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4783     * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4784     *
4785     * @param  ch the {@code char} value to be tested.
4786     * @return {@code true} if the {@code char} value is between
4787     *         {@link #MIN_SURROGATE} and
4788     *         {@link #MAX_SURROGATE} inclusive;
4789     *         {@code false} otherwise.
4790     * @since  1.7
4791     */
4792    public static boolean isSurrogate(char ch) {
4793        return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4794    }
4795
4796    /**
4797     * Determines whether the specified pair of {@code char}
4798     * values is a valid
4799     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4800     * Unicode surrogate pair</a>.
4801
4802     * <p>This method is equivalent to the expression:
4803     * <blockquote><pre>{@code
4804     * isHighSurrogate(high) && isLowSurrogate(low)
4805     * }</pre></blockquote>
4806     *
4807     * @param  high the high-surrogate code value to be tested
4808     * @param  low the low-surrogate code value to be tested
4809     * @return {@code true} if the specified high and
4810     * low-surrogate code values represent a valid surrogate pair;
4811     * {@code false} otherwise.
4812     * @since  1.5
4813     */
4814    public static boolean isSurrogatePair(char high, char low) {
4815        return isHighSurrogate(high) && isLowSurrogate(low);
4816    }
4817
4818    /**
4819     * Determines the number of {@code char} values needed to
4820     * represent the specified character (Unicode code point). If the
4821     * specified character is equal to or greater than 0x10000, then
4822     * the method returns 2. Otherwise, the method returns 1.
4823     *
4824     * <p>This method doesn't validate the specified character to be a
4825     * valid Unicode code point. The caller must validate the
4826     * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4827     * if necessary.
4828     *
4829     * @param   codePoint the character (Unicode code point) to be tested.
4830     * @return  2 if the character is a valid supplementary character; 1 otherwise.
4831     * @see     Character#isSupplementaryCodePoint(int)
4832     * @since   1.5
4833     */
4834    public static int charCount(int codePoint) {
4835        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4836    }
4837
4838    /**
4839     * Converts the specified surrogate pair to its supplementary code
4840     * point value. This method does not validate the specified
4841     * surrogate pair. The caller must validate it using {@link
4842     * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4843     *
4844     * @param  high the high-surrogate code unit
4845     * @param  low the low-surrogate code unit
4846     * @return the supplementary code point composed from the
4847     *         specified surrogate pair.
4848     * @since  1.5
4849     */
4850    public static int toCodePoint(char high, char low) {
4851        // Optimized form of:
4852        // return ((high - MIN_HIGH_SURROGATE) << 10)
4853        //         + (low - MIN_LOW_SURROGATE)
4854        //         + MIN_SUPPLEMENTARY_CODE_POINT;
4855        return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4856                                       - (MIN_HIGH_SURROGATE << 10)
4857                                       - MIN_LOW_SURROGATE);
4858    }
4859
4860    /**
4861     * Returns the code point at the given index of the
4862     * {@code CharSequence}. If the {@code char} value at
4863     * the given index in the {@code CharSequence} is in the
4864     * high-surrogate range, the following index is less than the
4865     * length of the {@code CharSequence}, and the
4866     * {@code char} value at the following index is in the
4867     * low-surrogate range, then the supplementary code point
4868     * corresponding to this surrogate pair is returned. Otherwise,
4869     * the {@code char} value at the given index is returned.
4870     *
4871     * @param seq a sequence of {@code char} values (Unicode code
4872     * units)
4873     * @param index the index to the {@code char} values (Unicode
4874     * code units) in {@code seq} to be converted
4875     * @return the Unicode code point at the given index
4876     * @exception NullPointerException if {@code seq} is null.
4877     * @exception IndexOutOfBoundsException if the value
4878     * {@code index} is negative or not less than
4879     * {@link CharSequence#length() seq.length()}.
4880     * @since  1.5
4881     */
4882    public static int codePointAt(CharSequence seq, int index) {
4883        char c1 = seq.charAt(index);
4884        if (isHighSurrogate(c1) && ++index < seq.length()) {
4885            char c2 = seq.charAt(index);
4886            if (isLowSurrogate(c2)) {
4887                return toCodePoint(c1, c2);
4888            }
4889        }
4890        return c1;
4891    }
4892
4893    /**
4894     * Returns the code point at the given index of the
4895     * {@code char} array. If the {@code char} value at
4896     * the given index in the {@code char} array is in the
4897     * high-surrogate range, the following index is less than the
4898     * length of the {@code char} array, and the
4899     * {@code char} value at the following index is in the
4900     * low-surrogate range, then the supplementary code point
4901     * corresponding to this surrogate pair is returned. Otherwise,
4902     * the {@code char} value at the given index is returned.
4903     *
4904     * @param a the {@code char} array
4905     * @param index the index to the {@code char} values (Unicode
4906     * code units) in the {@code char} array to be converted
4907     * @return the Unicode code point at the given index
4908     * @exception NullPointerException if {@code a} is null.
4909     * @exception IndexOutOfBoundsException if the value
4910     * {@code index} is negative or not less than
4911     * the length of the {@code char} array.
4912     * @since  1.5
4913     */
4914    public static int codePointAt(char[] a, int index) {
4915        return codePointAtImpl(a, index, a.length);
4916    }
4917
4918    /**
4919     * Returns the code point at the given index of the
4920     * {@code char} array, where only array elements with
4921     * {@code index} less than {@code limit} can be used. If
4922     * the {@code char} value at the given index in the
4923     * {@code char} array is in the high-surrogate range, the
4924     * following index is less than the {@code limit}, and the
4925     * {@code char} value at the following index is in the
4926     * low-surrogate range, then the supplementary code point
4927     * corresponding to this surrogate pair is returned. Otherwise,
4928     * the {@code char} value at the given index is returned.
4929     *
4930     * @param a the {@code char} array
4931     * @param index the index to the {@code char} values (Unicode
4932     * code units) in the {@code char} array to be converted
4933     * @param limit the index after the last array element that
4934     * can be used in the {@code char} array
4935     * @return the Unicode code point at the given index
4936     * @exception NullPointerException if {@code a} is null.
4937     * @exception IndexOutOfBoundsException if the {@code index}
4938     * argument is negative or not less than the {@code limit}
4939     * argument, or if the {@code limit} argument is negative or
4940     * greater than the length of the {@code char} array.
4941     * @since  1.5
4942     */
4943    public static int codePointAt(char[] a, int index, int limit) {
4944        if (index >= limit || limit < 0 || limit > a.length) {
4945            throw new IndexOutOfBoundsException();
4946        }
4947        return codePointAtImpl(a, index, limit);
4948    }
4949
4950    // throws ArrayIndexOutOfBoundsException if index out of bounds
4951    static int codePointAtImpl(char[] a, int index, int limit) {
4952        char c1 = a[index];
4953        if (isHighSurrogate(c1) && ++index < limit) {
4954            char c2 = a[index];
4955            if (isLowSurrogate(c2)) {
4956                return toCodePoint(c1, c2);
4957            }
4958        }
4959        return c1;
4960    }
4961
4962    /**
4963     * Returns the code point preceding the given index of the
4964     * {@code CharSequence}. If the {@code char} value at
4965     * {@code (index - 1)} in the {@code CharSequence} is in
4966     * the low-surrogate range, {@code (index - 2)} is not
4967     * negative, and the {@code char} value at {@code (index - 2)}
4968     * in the {@code CharSequence} is in the
4969     * high-surrogate range, then the supplementary code point
4970     * corresponding to this surrogate pair is returned. Otherwise,
4971     * the {@code char} value at {@code (index - 1)} is
4972     * returned.
4973     *
4974     * @param seq the {@code CharSequence} instance
4975     * @param index the index following the code point that should be returned
4976     * @return the Unicode code point value before the given index.
4977     * @exception NullPointerException if {@code seq} is null.
4978     * @exception IndexOutOfBoundsException if the {@code index}
4979     * argument is less than 1 or greater than {@link
4980     * CharSequence#length() seq.length()}.
4981     * @since  1.5
4982     */
4983    public static int codePointBefore(CharSequence seq, int index) {
4984        char c2 = seq.charAt(--index);
4985        if (isLowSurrogate(c2) && index > 0) {
4986            char c1 = seq.charAt(--index);
4987            if (isHighSurrogate(c1)) {
4988                return toCodePoint(c1, c2);
4989            }
4990        }
4991        return c2;
4992    }
4993
4994    /**
4995     * Returns the code point preceding the given index of the
4996     * {@code char} array. If the {@code char} value at
4997     * {@code (index - 1)} in the {@code char} array is in
4998     * the low-surrogate range, {@code (index - 2)} is not
4999     * negative, and the {@code char} value at {@code (index - 2)}
5000     * in the {@code char} array is in the
5001     * high-surrogate range, then the supplementary code point
5002     * corresponding to this surrogate pair is returned. Otherwise,
5003     * the {@code char} value at {@code (index - 1)} is
5004     * returned.
5005     *
5006     * @param a the {@code char} array
5007     * @param index the index following the code point that should be returned
5008     * @return the Unicode code point value before the given index.
5009     * @exception NullPointerException if {@code a} is null.
5010     * @exception IndexOutOfBoundsException if the {@code index}
5011     * argument is less than 1 or greater than the length of the
5012     * {@code char} array
5013     * @since  1.5
5014     */
5015    public static int codePointBefore(char[] a, int index) {
5016        return codePointBeforeImpl(a, index, 0);
5017    }
5018
5019    /**
5020     * Returns the code point preceding the given index of the
5021     * {@code char} array, where only array elements with
5022     * {@code index} greater than or equal to {@code start}
5023     * can be used. If the {@code char} value at {@code (index - 1)}
5024     * in the {@code char} array is in the
5025     * low-surrogate range, {@code (index - 2)} is not less than
5026     * {@code start}, and the {@code char} value at
5027     * {@code (index - 2)} in the {@code char} array is in
5028     * the high-surrogate range, then the supplementary code point
5029     * corresponding to this surrogate pair is returned. Otherwise,
5030     * the {@code char} value at {@code (index - 1)} is
5031     * returned.
5032     *
5033     * @param a the {@code char} array
5034     * @param index the index following the code point that should be returned
5035     * @param start the index of the first array element in the
5036     * {@code char} array
5037     * @return the Unicode code point value before the given index.
5038     * @exception NullPointerException if {@code a} is null.
5039     * @exception IndexOutOfBoundsException if the {@code index}
5040     * argument is not greater than the {@code start} argument or
5041     * is greater than the length of the {@code char} array, or
5042     * if the {@code start} argument is negative or not less than
5043     * the length of the {@code char} array.
5044     * @since  1.5
5045     */
5046    public static int codePointBefore(char[] a, int index, int start) {
5047        if (index <= start || start < 0 || start >= a.length) {
5048            throw new IndexOutOfBoundsException();
5049        }
5050        return codePointBeforeImpl(a, index, start);
5051    }
5052
5053    // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
5054    static int codePointBeforeImpl(char[] a, int index, int start) {
5055        char c2 = a[--index];
5056        if (isLowSurrogate(c2) && index > start) {
5057            char c1 = a[--index];
5058            if (isHighSurrogate(c1)) {
5059                return toCodePoint(c1, c2);
5060            }
5061        }
5062        return c2;
5063    }
5064
5065    /**
5066     * Returns the leading surrogate (a
5067     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
5068     * high surrogate code unit</a>) of the
5069     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5070     * surrogate pair</a>
5071     * representing the specified supplementary character (Unicode
5072     * code point) in the UTF-16 encoding.  If the specified character
5073     * is not a
5074     * <a href="Character.html#supplementary">supplementary character</a>,
5075     * an unspecified {@code char} is returned.
5076     *
5077     * <p>If
5078     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5079     * is {@code true}, then
5080     * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
5081     * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
5082     * are also always {@code true}.
5083     *
5084     * @param   codePoint a supplementary character (Unicode code point)
5085     * @return  the leading surrogate code unit used to represent the
5086     *          character in the UTF-16 encoding
5087     * @since   1.7
5088     */
5089    public static char highSurrogate(int codePoint) {
5090        return (char) ((codePoint >>> 10)
5091            + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
5092    }
5093
5094    /**
5095     * Returns the trailing surrogate (a
5096     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
5097     * low surrogate code unit</a>) of the
5098     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5099     * surrogate pair</a>
5100     * representing the specified supplementary character (Unicode
5101     * code point) in the UTF-16 encoding.  If the specified character
5102     * is not a
5103     * <a href="Character.html#supplementary">supplementary character</a>,
5104     * an unspecified {@code char} is returned.
5105     *
5106     * <p>If
5107     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5108     * is {@code true}, then
5109     * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
5110     * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
5111     * are also always {@code true}.
5112     *
5113     * @param   codePoint a supplementary character (Unicode code point)
5114     * @return  the trailing surrogate code unit used to represent the
5115     *          character in the UTF-16 encoding
5116     * @since   1.7
5117     */
5118    public static char lowSurrogate(int codePoint) {
5119        return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
5120    }
5121
5122    /**
5123     * Converts the specified character (Unicode code point) to its
5124     * UTF-16 representation. If the specified code point is a BMP
5125     * (Basic Multilingual Plane or Plane 0) value, the same value is
5126     * stored in {@code dst[dstIndex]}, and 1 is returned. If the
5127     * specified code point is a supplementary character, its
5128     * surrogate values are stored in {@code dst[dstIndex]}
5129     * (high-surrogate) and {@code dst[dstIndex+1]}
5130     * (low-surrogate), and 2 is returned.
5131     *
5132     * @param  codePoint the character (Unicode code point) to be converted.
5133     * @param  dst an array of {@code char} in which the
5134     * {@code codePoint}'s UTF-16 value is stored.
5135     * @param dstIndex the start index into the {@code dst}
5136     * array where the converted value is stored.
5137     * @return 1 if the code point is a BMP code point, 2 if the
5138     * code point is a supplementary code point.
5139     * @exception IllegalArgumentException if the specified
5140     * {@code codePoint} is not a valid Unicode code point.
5141     * @exception NullPointerException if the specified {@code dst} is null.
5142     * @exception IndexOutOfBoundsException if {@code dstIndex}
5143     * is negative or not less than {@code dst.length}, or if
5144     * {@code dst} at {@code dstIndex} doesn't have enough
5145     * array element(s) to store the resulting {@code char}
5146     * value(s). (If {@code dstIndex} is equal to
5147     * {@code dst.length-1} and the specified
5148     * {@code codePoint} is a supplementary character, the
5149     * high-surrogate value is not stored in
5150     * {@code dst[dstIndex]}.)
5151     * @since  1.5
5152     */
5153    public static int toChars(int codePoint, char[] dst, int dstIndex) {
5154        if (isBmpCodePoint(codePoint)) {
5155            dst[dstIndex] = (char) codePoint;
5156            return 1;
5157        } else if (isValidCodePoint(codePoint)) {
5158            toSurrogates(codePoint, dst, dstIndex);
5159            return 2;
5160        } else {
5161            throw new IllegalArgumentException();
5162        }
5163    }
5164
5165    /**
5166     * Converts the specified character (Unicode code point) to its
5167     * UTF-16 representation stored in a {@code char} array. If
5168     * the specified code point is a BMP (Basic Multilingual Plane or
5169     * Plane 0) value, the resulting {@code char} array has
5170     * the same value as {@code codePoint}. If the specified code
5171     * point is a supplementary code point, the resulting
5172     * {@code char} array has the corresponding surrogate pair.
5173     *
5174     * @param  codePoint a Unicode code point
5175     * @return a {@code char} array having
5176     *         {@code codePoint}'s UTF-16 representation.
5177     * @exception IllegalArgumentException if the specified
5178     * {@code codePoint} is not a valid Unicode code point.
5179     * @since  1.5
5180     */
5181    public static char[] toChars(int codePoint) {
5182        if (isBmpCodePoint(codePoint)) {
5183            return new char[] { (char) codePoint };
5184        } else if (isValidCodePoint(codePoint)) {
5185            char[] result = new char[2];
5186            toSurrogates(codePoint, result, 0);
5187            return result;
5188        } else {
5189            throw new IllegalArgumentException();
5190        }
5191    }
5192
5193    static void toSurrogates(int codePoint, char[] dst, int index) {
5194        // We write elements "backwards" to guarantee all-or-nothing
5195        dst[index+1] = lowSurrogate(codePoint);
5196        dst[index] = highSurrogate(codePoint);
5197    }
5198
5199    /**
5200     * Returns the number of Unicode code points in the text range of
5201     * the specified char sequence. The text range begins at the
5202     * specified {@code beginIndex} and extends to the
5203     * {@code char} at index {@code endIndex - 1}. Thus the
5204     * length (in {@code char}s) of the text range is
5205     * {@code endIndex-beginIndex}. Unpaired surrogates within
5206     * the text range count as one code point each.
5207     *
5208     * @param seq the char sequence
5209     * @param beginIndex the index to the first {@code char} of
5210     * the text range.
5211     * @param endIndex the index after the last {@code char} of
5212     * the text range.
5213     * @return the number of Unicode code points in the specified text
5214     * range
5215     * @exception NullPointerException if {@code seq} is null.
5216     * @exception IndexOutOfBoundsException if the
5217     * {@code beginIndex} is negative, or {@code endIndex}
5218     * is larger than the length of the given sequence, or
5219     * {@code beginIndex} is larger than {@code endIndex}.
5220     * @since  1.5
5221     */
5222    public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5223        int length = seq.length();
5224        if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5225            throw new IndexOutOfBoundsException();
5226        }
5227        int n = endIndex - beginIndex;
5228        for (int i = beginIndex; i < endIndex; ) {
5229            if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5230                isLowSurrogate(seq.charAt(i))) {
5231                n--;
5232                i++;
5233            }
5234        }
5235        return n;
5236    }
5237
5238    /**
5239     * Returns the number of Unicode code points in a subarray of the
5240     * {@code char} array argument. The {@code offset}
5241     * argument is the index of the first {@code char} of the
5242     * subarray and the {@code count} argument specifies the
5243     * length of the subarray in {@code char}s. Unpaired
5244     * surrogates within the subarray count as one code point each.
5245     *
5246     * @param a the {@code char} array
5247     * @param offset the index of the first {@code char} in the
5248     * given {@code char} array
5249     * @param count the length of the subarray in {@code char}s
5250     * @return the number of Unicode code points in the specified subarray
5251     * @exception NullPointerException if {@code a} is null.
5252     * @exception IndexOutOfBoundsException if {@code offset} or
5253     * {@code count} is negative, or if {@code offset +
5254     * count} is larger than the length of the given array.
5255     * @since  1.5
5256     */
5257    public static int codePointCount(char[] a, int offset, int count) {
5258        if (count > a.length - offset || offset < 0 || count < 0) {
5259            throw new IndexOutOfBoundsException();
5260        }
5261        return codePointCountImpl(a, offset, count);
5262    }
5263
5264    static int codePointCountImpl(char[] a, int offset, int count) {
5265        int endIndex = offset + count;
5266        int n = count;
5267        for (int i = offset; i < endIndex; ) {
5268            if (isHighSurrogate(a[i++]) && i < endIndex &&
5269                isLowSurrogate(a[i])) {
5270                n--;
5271                i++;
5272            }
5273        }
5274        return n;
5275    }
5276
5277    /**
5278     * Returns the index within the given char sequence that is offset
5279     * from the given {@code index} by {@code codePointOffset}
5280     * code points. Unpaired surrogates within the text range given by
5281     * {@code index} and {@code codePointOffset} count as
5282     * one code point each.
5283     *
5284     * @param seq the char sequence
5285     * @param index the index to be offset
5286     * @param codePointOffset the offset in code points
5287     * @return the index within the char sequence
5288     * @exception NullPointerException if {@code seq} is null.
5289     * @exception IndexOutOfBoundsException if {@code index}
5290     *   is negative or larger then the length of the char sequence,
5291     *   or if {@code codePointOffset} is positive and the
5292     *   subsequence starting with {@code index} has fewer than
5293     *   {@code codePointOffset} code points, or if
5294     *   {@code codePointOffset} is negative and the subsequence
5295     *   before {@code index} has fewer than the absolute value
5296     *   of {@code codePointOffset} code points.
5297     * @since 1.5
5298     */
5299    public static int offsetByCodePoints(CharSequence seq, int index,
5300                                         int codePointOffset) {
5301        int length = seq.length();
5302        if (index < 0 || index > length) {
5303            throw new IndexOutOfBoundsException();
5304        }
5305
5306        int x = index;
5307        if (codePointOffset >= 0) {
5308            int i;
5309            for (i = 0; x < length && i < codePointOffset; i++) {
5310                if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5311                    isLowSurrogate(seq.charAt(x))) {
5312                    x++;
5313                }
5314            }
5315            if (i < codePointOffset) {
5316                throw new IndexOutOfBoundsException();
5317            }
5318        } else {
5319            int i;
5320            for (i = codePointOffset; x > 0 && i < 0; i++) {
5321                if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5322                    isHighSurrogate(seq.charAt(x-1))) {
5323                    x--;
5324                }
5325            }
5326            if (i < 0) {
5327                throw new IndexOutOfBoundsException();
5328            }
5329        }
5330        return x;
5331    }
5332
5333    /**
5334     * Returns the index within the given {@code char} subarray
5335     * that is offset from the given {@code index} by
5336     * {@code codePointOffset} code points. The
5337     * {@code start} and {@code count} arguments specify a
5338     * subarray of the {@code char} array. Unpaired surrogates
5339     * within the text range given by {@code index} and
5340     * {@code codePointOffset} count as one code point each.
5341     *
5342     * @param a the {@code char} array
5343     * @param start the index of the first {@code char} of the
5344     * subarray
5345     * @param count the length of the subarray in {@code char}s
5346     * @param index the index to be offset
5347     * @param codePointOffset the offset in code points
5348     * @return the index within the subarray
5349     * @exception NullPointerException if {@code a} is null.
5350     * @exception IndexOutOfBoundsException
5351     *   if {@code start} or {@code count} is negative,
5352     *   or if {@code start + count} is larger than the length of
5353     *   the given array,
5354     *   or if {@code index} is less than {@code start} or
5355     *   larger then {@code start + count},
5356     *   or if {@code codePointOffset} is positive and the text range
5357     *   starting with {@code index} and ending with {@code start + count - 1}
5358     *   has fewer than {@code codePointOffset} code
5359     *   points,
5360     *   or if {@code codePointOffset} is negative and the text range
5361     *   starting with {@code start} and ending with {@code index - 1}
5362     *   has fewer than the absolute value of
5363     *   {@code codePointOffset} code points.
5364     * @since 1.5
5365     */
5366    public static int offsetByCodePoints(char[] a, int start, int count,
5367                                         int index, int codePointOffset) {
5368        if (count > a.length-start || start < 0 || count < 0
5369            || index < start || index > start+count) {
5370            throw new IndexOutOfBoundsException();
5371        }
5372        return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5373    }
5374
5375    static int offsetByCodePointsImpl(char[]a, int start, int count,
5376                                      int index, int codePointOffset) {
5377        int x = index;
5378        if (codePointOffset >= 0) {
5379            int limit = start + count;
5380            int i;
5381            for (i = 0; x < limit && i < codePointOffset; i++) {
5382                if (isHighSurrogate(a[x++]) && x < limit &&
5383                    isLowSurrogate(a[x])) {
5384                    x++;
5385                }
5386            }
5387            if (i < codePointOffset) {
5388                throw new IndexOutOfBoundsException();
5389            }
5390        } else {
5391            int i;
5392            for (i = codePointOffset; x > start && i < 0; i++) {
5393                if (isLowSurrogate(a[--x]) && x > start &&
5394                    isHighSurrogate(a[x-1])) {
5395                    x--;
5396                }
5397            }
5398            if (i < 0) {
5399                throw new IndexOutOfBoundsException();
5400            }
5401        }
5402        return x;
5403    }
5404
5405    /**
5406     * Determines if the specified character is a lowercase character.
5407     * <p>
5408     * A character is lowercase if its general category type, provided
5409     * by {@code Character.getType(ch)}, is
5410     * {@code LOWERCASE_LETTER}, or it has contributory property
5411     * Other_Lowercase as defined by the Unicode Standard.
5412     * <p>
5413     * The following are examples of lowercase characters:
5414     * <blockquote><pre>
5415     * a b c d e f g h i j k l m n o p q r s t u v w x y z
5416     * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5417     * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5418     * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5419     * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5420     * </pre></blockquote>
5421     * <p> Many other Unicode characters are lowercase too.
5422     *
5423     * <p><b>Note:</b> This method cannot handle <a
5424     * href="#supplementary"> supplementary characters</a>. To support
5425     * all Unicode characters, including supplementary characters, use
5426     * the {@link #isLowerCase(int)} method.
5427     *
5428     * @param   ch   the character to be tested.
5429     * @return  {@code true} if the character is lowercase;
5430     *          {@code false} otherwise.
5431     * @see     Character#isLowerCase(char)
5432     * @see     Character#isTitleCase(char)
5433     * @see     Character#toLowerCase(char)
5434     * @see     Character#getType(char)
5435     */
5436    public static boolean isLowerCase(char ch) {
5437        return isLowerCase((int)ch);
5438    }
5439
5440    /**
5441     * Determines if the specified character (Unicode code point) is a
5442     * lowercase character.
5443     * <p>
5444     * A character is lowercase if its general category type, provided
5445     * by {@link Character#getType getType(codePoint)}, is
5446     * {@code LOWERCASE_LETTER}, or it has contributory property
5447     * Other_Lowercase as defined by the Unicode Standard.
5448     * <p>
5449     * The following are examples of lowercase characters:
5450     * <blockquote><pre>
5451     * a b c d e f g h i j k l m n o p q r s t u v w x y z
5452     * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5453     * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5454     * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5455     * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5456     * </pre></blockquote>
5457     * <p> Many other Unicode characters are lowercase too.
5458     *
5459     * @param   codePoint the character (Unicode code point) to be tested.
5460     * @return  {@code true} if the character is lowercase;
5461     *          {@code false} otherwise.
5462     * @see     Character#isLowerCase(int)
5463     * @see     Character#isTitleCase(int)
5464     * @see     Character#toLowerCase(int)
5465     * @see     Character#getType(int)
5466     * @since   1.5
5467     */
5468    public static boolean isLowerCase(int codePoint) {
5469        return isLowerCaseImpl(codePoint);
5470    }
5471
5472    static native boolean isLowerCaseImpl(int codePoint);
5473
5474    /**
5475     * Determines if the specified character is an uppercase character.
5476     * <p>
5477     * A character is uppercase if its general category type, provided by
5478     * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5479     * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5480     * <p>
5481     * The following are examples of uppercase characters:
5482     * <blockquote><pre>
5483     * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5484     * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5485     * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5486     * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5487     * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5488     * </pre></blockquote>
5489     * <p> Many other Unicode characters are uppercase too.
5490     *
5491     * <p><b>Note:</b> This method cannot handle <a
5492     * href="#supplementary"> supplementary characters</a>. To support
5493     * all Unicode characters, including supplementary characters, use
5494     * the {@link #isUpperCase(int)} method.
5495     *
5496     * @param   ch   the character to be tested.
5497     * @return  {@code true} if the character is uppercase;
5498     *          {@code false} otherwise.
5499     * @see     Character#isLowerCase(char)
5500     * @see     Character#isTitleCase(char)
5501     * @see     Character#toUpperCase(char)
5502     * @see     Character#getType(char)
5503     * @since   1.0
5504     */
5505    public static boolean isUpperCase(char ch) {
5506        return isUpperCase((int)ch);
5507    }
5508
5509    /**
5510     * Determines if the specified character (Unicode code point) is an uppercase character.
5511     * <p>
5512     * A character is uppercase if its general category type, provided by
5513     * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5514     * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5515     * <p>
5516     * The following are examples of uppercase characters:
5517     * <blockquote><pre>
5518     * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5519     * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5520     * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5521     * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5522     * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5523     * </pre></blockquote>
5524     * <p> Many other Unicode characters are uppercase too.<p>
5525     *
5526     * @param   codePoint the character (Unicode code point) to be tested.
5527     * @return  {@code true} if the character is uppercase;
5528     *          {@code false} otherwise.
5529     * @see     Character#isLowerCase(int)
5530     * @see     Character#isTitleCase(int)
5531     * @see     Character#toUpperCase(int)
5532     * @see     Character#getType(int)
5533     * @since   1.5
5534     */
5535    public static boolean isUpperCase(int codePoint) {
5536        return isUpperCaseImpl(codePoint);
5537    }
5538
5539    static native boolean isUpperCaseImpl(int codePoint);
5540
5541
5542    /**
5543     * Determines if the specified character is a titlecase character.
5544     * <p>
5545     * A character is a titlecase character if its general
5546     * category type, provided by {@code Character.getType(ch)},
5547     * is {@code TITLECASE_LETTER}.
5548     * <p>
5549     * Some characters look like pairs of Latin letters. For example, there
5550     * is an uppercase letter that looks like "LJ" and has a corresponding
5551     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5552     * is the appropriate form to use when rendering a word in lowercase
5553     * with initial capitals, as for a book title.
5554     * <p>
5555     * These are some of the Unicode characters for which this method returns
5556     * {@code true}:
5557     * <ul>
5558     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5559     * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5560     * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5561     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5562     * </ul>
5563     * <p> Many other Unicode characters are titlecase too.
5564     *
5565     * <p><b>Note:</b> This method cannot handle <a
5566     * href="#supplementary"> supplementary characters</a>. To support
5567     * all Unicode characters, including supplementary characters, use
5568     * the {@link #isTitleCase(int)} method.
5569     *
5570     * @param   ch   the character to be tested.
5571     * @return  {@code true} if the character is titlecase;
5572     *          {@code false} otherwise.
5573     * @see     Character#isLowerCase(char)
5574     * @see     Character#isUpperCase(char)
5575     * @see     Character#toTitleCase(char)
5576     * @see     Character#getType(char)
5577     * @since   1.0.2
5578     */
5579    public static boolean isTitleCase(char ch) {
5580        return isTitleCase((int)ch);
5581    }
5582
5583    /**
5584     * Determines if the specified character (Unicode code point) is a titlecase character.
5585     * <p>
5586     * A character is a titlecase character if its general
5587     * category type, provided by {@link Character#getType(int) getType(codePoint)},
5588     * is {@code TITLECASE_LETTER}.
5589     * <p>
5590     * Some characters look like pairs of Latin letters. For example, there
5591     * is an uppercase letter that looks like "LJ" and has a corresponding
5592     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5593     * is the appropriate form to use when rendering a word in lowercase
5594     * with initial capitals, as for a book title.
5595     * <p>
5596     * These are some of the Unicode characters for which this method returns
5597     * {@code true}:
5598     * <ul>
5599     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5600     * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5601     * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5602     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5603     * </ul>
5604     * <p> Many other Unicode characters are titlecase too.<p>
5605     *
5606     * @param   codePoint the character (Unicode code point) to be tested.
5607     * @return  {@code true} if the character is titlecase;
5608     *          {@code false} otherwise.
5609     * @see     Character#isLowerCase(int)
5610     * @see     Character#isUpperCase(int)
5611     * @see     Character#toTitleCase(int)
5612     * @see     Character#getType(int)
5613     * @since   1.5
5614     */
5615    public static boolean isTitleCase(int codePoint) {
5616        return isTitleCaseImpl(codePoint);
5617    }
5618
5619    static native boolean isTitleCaseImpl(int codePoint);
5620
5621    /**
5622     * Determines if the specified character is a digit.
5623     * <p>
5624     * A character is a digit if its general category type, provided
5625     * by {@code Character.getType(ch)}, is
5626     * {@code DECIMAL_DIGIT_NUMBER}.
5627     * <p>
5628     * Some Unicode character ranges that contain digits:
5629     * <ul>
5630     * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5631     *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5632     * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5633     *     Arabic-Indic digits
5634     * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5635     *     Extended Arabic-Indic digits
5636     * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5637     *     Devanagari digits
5638     * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5639     *     Fullwidth digits
5640     * </ul>
5641     *
5642     * Many other character ranges contain digits as well.
5643     *
5644     * <p><b>Note:</b> This method cannot handle <a
5645     * href="#supplementary"> supplementary characters</a>. To support
5646     * all Unicode characters, including supplementary characters, use
5647     * the {@link #isDigit(int)} method.
5648     *
5649     * @param   ch   the character to be tested.
5650     * @return  {@code true} if the character is a digit;
5651     *          {@code false} otherwise.
5652     * @see     Character#digit(char, int)
5653     * @see     Character#forDigit(int, int)
5654     * @see     Character#getType(char)
5655     */
5656    public static boolean isDigit(char ch) {
5657        return isDigit((int)ch);
5658    }
5659
5660    /**
5661     * Determines if the specified character (Unicode code point) is a digit.
5662     * <p>
5663     * A character is a digit if its general category type, provided
5664     * by {@link Character#getType(int) getType(codePoint)}, is
5665     * {@code DECIMAL_DIGIT_NUMBER}.
5666     * <p>
5667     * Some Unicode character ranges that contain digits:
5668     * <ul>
5669     * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5670     *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5671     * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5672     *     Arabic-Indic digits
5673     * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5674     *     Extended Arabic-Indic digits
5675     * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5676     *     Devanagari digits
5677     * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5678     *     Fullwidth digits
5679     * </ul>
5680     *
5681     * Many other character ranges contain digits as well.
5682     *
5683     * @param   codePoint the character (Unicode code point) to be tested.
5684     * @return  {@code true} if the character is a digit;
5685     *          {@code false} otherwise.
5686     * @see     Character#forDigit(int, int)
5687     * @see     Character#getType(int)
5688     * @since   1.5
5689     */
5690    public static boolean isDigit(int codePoint) {
5691        return isDigitImpl(codePoint);
5692    }
5693
5694    static native boolean isDigitImpl(int codePoint);
5695
5696    /**
5697     * Determines if a character is defined in Unicode.
5698     * <p>
5699     * A character is defined if at least one of the following is true:
5700     * <ul>
5701     * <li>It has an entry in the UnicodeData file.
5702     * <li>It has a value in a range defined by the UnicodeData file.
5703     * </ul>
5704     *
5705     * <p><b>Note:</b> This method cannot handle <a
5706     * href="#supplementary"> supplementary characters</a>. To support
5707     * all Unicode characters, including supplementary characters, use
5708     * the {@link #isDefined(int)} method.
5709     *
5710     * @param   ch   the character to be tested
5711     * @return  {@code true} if the character has a defined meaning
5712     *          in Unicode; {@code false} otherwise.
5713     * @see     Character#isDigit(char)
5714     * @see     Character#isLetter(char)
5715     * @see     Character#isLetterOrDigit(char)
5716     * @see     Character#isLowerCase(char)
5717     * @see     Character#isTitleCase(char)
5718     * @see     Character#isUpperCase(char)
5719     * @since   1.0.2
5720     */
5721    public static boolean isDefined(char ch) {
5722        return isDefined((int)ch);
5723    }
5724
5725    /**
5726     * Determines if a character (Unicode code point) is defined in Unicode.
5727     * <p>
5728     * A character is defined if at least one of the following is true:
5729     * <ul>
5730     * <li>It has an entry in the UnicodeData file.
5731     * <li>It has a value in a range defined by the UnicodeData file.
5732     * </ul>
5733     *
5734     * @param   codePoint the character (Unicode code point) to be tested.
5735     * @return  {@code true} if the character has a defined meaning
5736     *          in Unicode; {@code false} otherwise.
5737     * @see     Character#isDigit(int)
5738     * @see     Character#isLetter(int)
5739     * @see     Character#isLetterOrDigit(int)
5740     * @see     Character#isLowerCase(int)
5741     * @see     Character#isTitleCase(int)
5742     * @see     Character#isUpperCase(int)
5743     * @since   1.5
5744     */
5745    public static boolean isDefined(int codePoint) {
5746        return isDefinedImpl(codePoint);
5747    }
5748
5749    static native boolean isDefinedImpl(int codePoint);
5750
5751    /**
5752     * Determines if the specified character is a letter.
5753     * <p>
5754     * A character is considered to be a letter if its general
5755     * category type, provided by {@code Character.getType(ch)},
5756     * is any of the following:
5757     * <ul>
5758     * <li> {@code UPPERCASE_LETTER}
5759     * <li> {@code LOWERCASE_LETTER}
5760     * <li> {@code TITLECASE_LETTER}
5761     * <li> {@code MODIFIER_LETTER}
5762     * <li> {@code OTHER_LETTER}
5763     * </ul>
5764     *
5765     * Not all letters have case. Many characters are
5766     * letters but are neither uppercase nor lowercase nor titlecase.
5767     *
5768     * <p><b>Note:</b> This method cannot handle <a
5769     * href="#supplementary"> supplementary characters</a>. To support
5770     * all Unicode characters, including supplementary characters, use
5771     * the {@link #isLetter(int)} method.
5772     *
5773     * @param   ch   the character to be tested.
5774     * @return  {@code true} if the character is a letter;
5775     *          {@code false} otherwise.
5776     * @see     Character#isDigit(char)
5777     * @see     Character#isJavaIdentifierStart(char)
5778     * @see     Character#isJavaLetter(char)
5779     * @see     Character#isJavaLetterOrDigit(char)
5780     * @see     Character#isLetterOrDigit(char)
5781     * @see     Character#isLowerCase(char)
5782     * @see     Character#isTitleCase(char)
5783     * @see     Character#isUnicodeIdentifierStart(char)
5784     * @see     Character#isUpperCase(char)
5785     */
5786    public static boolean isLetter(char ch) {
5787        return isLetter((int)ch);
5788    }
5789
5790    /**
5791     * Determines if the specified character (Unicode code point) is a letter.
5792     * <p>
5793     * A character is considered to be a letter if its general
5794     * category type, provided by {@link Character#getType(int) getType(codePoint)},
5795     * is any of the following:
5796     * <ul>
5797     * <li> {@code UPPERCASE_LETTER}
5798     * <li> {@code LOWERCASE_LETTER}
5799     * <li> {@code TITLECASE_LETTER}
5800     * <li> {@code MODIFIER_LETTER}
5801     * <li> {@code OTHER_LETTER}
5802     * </ul>
5803     *
5804     * Not all letters have case. Many characters are
5805     * letters but are neither uppercase nor lowercase nor titlecase.
5806     *
5807     * @param   codePoint the character (Unicode code point) to be tested.
5808     * @return  {@code true} if the character is a letter;
5809     *          {@code false} otherwise.
5810     * @see     Character#isDigit(int)
5811     * @see     Character#isJavaIdentifierStart(int)
5812     * @see     Character#isLetterOrDigit(int)
5813     * @see     Character#isLowerCase(int)
5814     * @see     Character#isTitleCase(int)
5815     * @see     Character#isUnicodeIdentifierStart(int)
5816     * @see     Character#isUpperCase(int)
5817     * @since   1.5
5818     */
5819    public static boolean isLetter(int codePoint) {
5820        return isLetterImpl(codePoint);
5821    }
5822
5823    static native boolean isLetterImpl(int codePoint);
5824
5825    /**
5826     * Determines if the specified character is a letter or digit.
5827     * <p>
5828     * A character is considered to be a letter or digit if either
5829     * {@code Character.isLetter(char ch)} or
5830     * {@code Character.isDigit(char ch)} returns
5831     * {@code true} for the character.
5832     *
5833     * <p><b>Note:</b> This method cannot handle <a
5834     * href="#supplementary"> supplementary characters</a>. To support
5835     * all Unicode characters, including supplementary characters, use
5836     * the {@link #isLetterOrDigit(int)} method.
5837     *
5838     * @param   ch   the character to be tested.
5839     * @return  {@code true} if the character is a letter or digit;
5840     *          {@code false} otherwise.
5841     * @see     Character#isDigit(char)
5842     * @see     Character#isJavaIdentifierPart(char)
5843     * @see     Character#isJavaLetter(char)
5844     * @see     Character#isJavaLetterOrDigit(char)
5845     * @see     Character#isLetter(char)
5846     * @see     Character#isUnicodeIdentifierPart(char)
5847     * @since   1.0.2
5848     */
5849    public static boolean isLetterOrDigit(char ch) {
5850        return isLetterOrDigit((int)ch);
5851    }
5852
5853    /**
5854     * Determines if the specified character (Unicode code point) is a letter or digit.
5855     * <p>
5856     * A character is considered to be a letter or digit if either
5857     * {@link #isLetter(int) isLetter(codePoint)} or
5858     * {@link #isDigit(int) isDigit(codePoint)} returns
5859     * {@code true} for the character.
5860     *
5861     * @param   codePoint the character (Unicode code point) to be tested.
5862     * @return  {@code true} if the character is a letter or digit;
5863     *          {@code false} otherwise.
5864     * @see     Character#isDigit(int)
5865     * @see     Character#isJavaIdentifierPart(int)
5866     * @see     Character#isLetter(int)
5867     * @see     Character#isUnicodeIdentifierPart(int)
5868     * @since   1.5
5869     */
5870    public static boolean isLetterOrDigit(int codePoint) {
5871        return isLetterOrDigitImpl(codePoint);
5872    }
5873
5874    static native boolean isLetterOrDigitImpl(int codePoint);
5875
5876    /**
5877     * Determines if the specified character is permissible as the first
5878     * character in a Java identifier.
5879     * <p>
5880     * A character may start a Java identifier if and only if
5881     * one of the following is true:
5882     * <ul>
5883     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5884     * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5885     * <li> {@code ch} is a currency symbol (such as {@code '$'})
5886     * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5887     * </ul>
5888     *
5889     * @param   ch the character to be tested.
5890     * @return  {@code true} if the character may start a Java
5891     *          identifier; {@code false} otherwise.
5892     * @see     Character#isJavaLetterOrDigit(char)
5893     * @see     Character#isJavaIdentifierStart(char)
5894     * @see     Character#isJavaIdentifierPart(char)
5895     * @see     Character#isLetter(char)
5896     * @see     Character#isLetterOrDigit(char)
5897     * @see     Character#isUnicodeIdentifierStart(char)
5898     * @since   1.02
5899     * @deprecated Replaced by isJavaIdentifierStart(char).
5900     */
5901    @Deprecated
5902    public static boolean isJavaLetter(char ch) {
5903        return isJavaIdentifierStart(ch);
5904    }
5905
5906    /**
5907     * Determines if the specified character may be part of a Java
5908     * identifier as other than the first character.
5909     * <p>
5910     * A character may be part of a Java identifier if and only if any
5911     * of the following are true:
5912     * <ul>
5913     * <li>  it is a letter
5914     * <li>  it is a currency symbol (such as {@code '$'})
5915     * <li>  it is a connecting punctuation character (such as {@code '_'})
5916     * <li>  it is a digit
5917     * <li>  it is a numeric letter (such as a Roman numeral character)
5918     * <li>  it is a combining mark
5919     * <li>  it is a non-spacing mark
5920     * <li> {@code isIdentifierIgnorable} returns
5921     * {@code true} for the character.
5922     * </ul>
5923     *
5924     * @param   ch the character to be tested.
5925     * @return  {@code true} if the character may be part of a
5926     *          Java identifier; {@code false} otherwise.
5927     * @see     Character#isJavaLetter(char)
5928     * @see     Character#isJavaIdentifierStart(char)
5929     * @see     Character#isJavaIdentifierPart(char)
5930     * @see     Character#isLetter(char)
5931     * @see     Character#isLetterOrDigit(char)
5932     * @see     Character#isUnicodeIdentifierPart(char)
5933     * @see     Character#isIdentifierIgnorable(char)
5934     * @since   1.02
5935     * @deprecated Replaced by isJavaIdentifierPart(char).
5936     */
5937    @Deprecated
5938    public static boolean isJavaLetterOrDigit(char ch) {
5939        return isJavaIdentifierPart(ch);
5940    }
5941
5942    /**
5943     * Determines if the specified character (Unicode code point) is an alphabet.
5944     * <p>
5945     * A character is considered to be alphabetic if its general category type,
5946     * provided by {@link Character#getType(int) getType(codePoint)}, is any of
5947     * the following:
5948     * <ul>
5949     * <li> <code>UPPERCASE_LETTER</code>
5950     * <li> <code>LOWERCASE_LETTER</code>
5951     * <li> <code>TITLECASE_LETTER</code>
5952     * <li> <code>MODIFIER_LETTER</code>
5953     * <li> <code>OTHER_LETTER</code>
5954     * <li> <code>LETTER_NUMBER</code>
5955     * </ul>
5956     * or it has contributory property Other_Alphabetic as defined by the
5957     * Unicode Standard.
5958     *
5959     * @param   codePoint the character (Unicode code point) to be tested.
5960     * @return  <code>true</code> if the character is a Unicode alphabet
5961     *          character, <code>false</code> otherwise.
5962     * @since   1.7
5963     */
5964    public static boolean isAlphabetic(int codePoint) {
5965        return isAlphabeticImpl(codePoint);
5966    }
5967
5968    static native boolean isAlphabeticImpl(int codePoint);
5969
5970
5971    /**
5972     * Determines if the specified character (Unicode code point) is a CJKV
5973     * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
5974     * the Unicode Standard.
5975     *
5976     * @param   codePoint the character (Unicode code point) to be tested.
5977     * @return  <code>true</code> if the character is a Unicode ideograph
5978     *          character, <code>false</code> otherwise.
5979     * @since   1.7
5980     */
5981    public static boolean isIdeographic(int codePoint) {
5982        return isIdeographicImpl(codePoint);
5983    }
5984    static native boolean isIdeographicImpl(int codePoint);
5985
5986    /**
5987     * Determines if the specified character is
5988     * permissible as the first character in a Java identifier.
5989     * <p>
5990     * A character may start a Java identifier if and only if
5991     * one of the following conditions is true:
5992     * <ul>
5993     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5994     * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5995     * <li> {@code ch} is a currency symbol (such as {@code '$'})
5996     * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5997     * </ul>
5998     *
5999     * <p><b>Note:</b> This method cannot handle <a
6000     * href="#supplementary"> supplementary characters</a>. To support
6001     * all Unicode characters, including supplementary characters, use
6002     * the {@link #isJavaIdentifierStart(int)} method.
6003     *
6004     * @param   ch the character to be tested.
6005     * @return  {@code true} if the character may start a Java identifier;
6006     *          {@code false} otherwise.
6007     * @see     Character#isJavaIdentifierPart(char)
6008     * @see     Character#isLetter(char)
6009     * @see     Character#isUnicodeIdentifierStart(char)
6010     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6011     * @since   1.1
6012     */
6013    public static boolean isJavaIdentifierStart(char ch) {
6014        return isJavaIdentifierStart((int)ch);
6015    }
6016
6017    /**
6018     * Determines if the character (Unicode code point) is
6019     * permissible as the first character in a Java identifier.
6020     * <p>
6021     * A character may start a Java identifier if and only if
6022     * one of the following conditions is true:
6023     * <ul>
6024     * <li> {@link #isLetter(int) isLetter(codePoint)}
6025     *      returns {@code true}
6026     * <li> {@link #getType(int) getType(codePoint)}
6027     *      returns {@code LETTER_NUMBER}
6028     * <li> the referenced character is a currency symbol (such as {@code '$'})
6029     * <li> the referenced character is a connecting punctuation character
6030     *      (such as {@code '_'}).
6031     * </ul>
6032     *
6033     * @param   codePoint the character (Unicode code point) to be tested.
6034     * @return  {@code true} if the character may start a Java identifier;
6035     *          {@code false} otherwise.
6036     * @see     Character#isJavaIdentifierPart(int)
6037     * @see     Character#isLetter(int)
6038     * @see     Character#isUnicodeIdentifierStart(int)
6039     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6040     * @since   1.5
6041     */
6042    public static boolean isJavaIdentifierStart(int codePoint) {
6043        // Use precomputed bitmasks to optimize the ASCII range.
6044        if (codePoint < 64) {
6045            return (codePoint == '$'); // There's only one character in this range.
6046        } else if (codePoint < 128) {
6047            return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
6048        }
6049        return ((1 << getType(codePoint))
6050                & ((1 << UPPERCASE_LETTER)
6051                   | (1 << LOWERCASE_LETTER)
6052                   | (1  << TITLECASE_LETTER)
6053                   | (1  << MODIFIER_LETTER)
6054                   | (1  << OTHER_LETTER)
6055                   | (1  << CURRENCY_SYMBOL)
6056                   | (1  << CONNECTOR_PUNCTUATION)
6057                   | (1  << LETTER_NUMBER))) != 0;
6058    }
6059
6060    /**
6061     * Determines if the specified character may be part of a Java
6062     * identifier as other than the first character.
6063     * <p>
6064     * A character may be part of a Java identifier if any of the following
6065     * are true:
6066     * <ul>
6067     * <li>  it is a letter
6068     * <li>  it is a currency symbol (such as {@code '$'})
6069     * <li>  it is a connecting punctuation character (such as {@code '_'})
6070     * <li>  it is a digit
6071     * <li>  it is a numeric letter (such as a Roman numeral character)
6072     * <li>  it is a combining mark
6073     * <li>  it is a non-spacing mark
6074     * <li> {@code isIdentifierIgnorable} returns
6075     * {@code true} for the character
6076     * </ul>
6077     *
6078     * <p><b>Note:</b> This method cannot handle <a
6079     * href="#supplementary"> supplementary characters</a>. To support
6080     * all Unicode characters, including supplementary characters, use
6081     * the {@link #isJavaIdentifierPart(int)} method.
6082     *
6083     * @param   ch      the character to be tested.
6084     * @return {@code true} if the character may be part of a
6085     *          Java identifier; {@code false} otherwise.
6086     * @see     Character#isIdentifierIgnorable(char)
6087     * @see     Character#isJavaIdentifierStart(char)
6088     * @see     Character#isLetterOrDigit(char)
6089     * @see     Character#isUnicodeIdentifierPart(char)
6090     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6091     * @since   1.1
6092     */
6093    public static boolean isJavaIdentifierPart(char ch) {
6094        return isJavaIdentifierPart((int)ch);
6095    }
6096
6097    /**
6098     * Determines if the character (Unicode code point) may be part of a Java
6099     * identifier as other than the first character.
6100     * <p>
6101     * A character may be part of a Java identifier if any of the following
6102     * are true:
6103     * <ul>
6104     * <li>  it is a letter
6105     * <li>  it is a currency symbol (such as {@code '$'})
6106     * <li>  it is a connecting punctuation character (such as {@code '_'})
6107     * <li>  it is a digit
6108     * <li>  it is a numeric letter (such as a Roman numeral character)
6109     * <li>  it is a combining mark
6110     * <li>  it is a non-spacing mark
6111     * <li> {@link #isIdentifierIgnorable(int)
6112     * isIdentifierIgnorable(codePoint)} returns {@code true} for
6113     * the character
6114     * </ul>
6115     *
6116     * @param   codePoint the character (Unicode code point) to be tested.
6117     * @return {@code true} if the character may be part of a
6118     *          Java identifier; {@code false} otherwise.
6119     * @see     Character#isIdentifierIgnorable(int)
6120     * @see     Character#isJavaIdentifierStart(int)
6121     * @see     Character#isLetterOrDigit(int)
6122     * @see     Character#isUnicodeIdentifierPart(int)
6123     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6124     * @since   1.5
6125     */
6126    public static boolean isJavaIdentifierPart(int codePoint) {
6127        // Use precomputed bitmasks to optimize the ASCII range.
6128        if (codePoint < 64) {
6129            return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
6130        } else if (codePoint < 128) {
6131            return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
6132        }
6133        return ((1 << getType(codePoint))
6134                & ((1 << UPPERCASE_LETTER)
6135                   | (1 << LOWERCASE_LETTER)
6136                   | (1 << TITLECASE_LETTER)
6137                   | (1 << MODIFIER_LETTER)
6138                   | (1 << OTHER_LETTER)
6139                   | (1 << CURRENCY_SYMBOL)
6140                   | (1 << CONNECTOR_PUNCTUATION)
6141                   | (1 << DECIMAL_DIGIT_NUMBER)
6142                   | (1 << LETTER_NUMBER)
6143                   | (1 << FORMAT)
6144                   | (1 << COMBINING_SPACING_MARK)
6145                   | (1 << NON_SPACING_MARK))) != 0
6146                || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
6147                || (codePoint >= 0x7f && codePoint <= 0x9f);
6148    }
6149
6150    /**
6151     * Determines if the specified character is permissible as the
6152     * first character in a Unicode identifier.
6153     * <p>
6154     * A character may start a Unicode identifier if and only if
6155     * one of the following conditions is true:
6156     * <ul>
6157     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6158     * <li> {@link #getType(char) getType(ch)} returns
6159     *      {@code LETTER_NUMBER}.
6160     * </ul>
6161     *
6162     * <p><b>Note:</b> This method cannot handle <a
6163     * href="#supplementary"> supplementary characters</a>. To support
6164     * all Unicode characters, including supplementary characters, use
6165     * the {@link #isUnicodeIdentifierStart(int)} method.
6166     *
6167     * @param   ch      the character to be tested.
6168     * @return  {@code true} if the character may start a Unicode
6169     *          identifier; {@code false} otherwise.
6170     * @see     Character#isJavaIdentifierStart(char)
6171     * @see     Character#isLetter(char)
6172     * @see     Character#isUnicodeIdentifierPart(char)
6173     * @since   1.1
6174     */
6175    public static boolean isUnicodeIdentifierStart(char ch) {
6176        return isUnicodeIdentifierStart((int)ch);
6177    }
6178
6179    /**
6180     * Determines if the specified character (Unicode code point) is permissible as the
6181     * first character in a Unicode identifier.
6182     * <p>
6183     * A character may start a Unicode identifier if and only if
6184     * one of the following conditions is true:
6185     * <ul>
6186     * <li> {@link #isLetter(int) isLetter(codePoint)}
6187     *      returns {@code true}
6188     * <li> {@link #getType(int) getType(codePoint)}
6189     *      returns {@code LETTER_NUMBER}.
6190     * </ul>
6191     * @param   codePoint the character (Unicode code point) to be tested.
6192     * @return  {@code true} if the character may start a Unicode
6193     *          identifier; {@code false} otherwise.
6194     * @see     Character#isJavaIdentifierStart(int)
6195     * @see     Character#isLetter(int)
6196     * @see     Character#isUnicodeIdentifierPart(int)
6197     * @since   1.5
6198     */
6199    public static boolean isUnicodeIdentifierStart(int codePoint) {
6200        return isUnicodeIdentifierStartImpl(codePoint);
6201    }
6202
6203    static native boolean isUnicodeIdentifierStartImpl(int codePoint);
6204
6205    /**
6206     * Determines if the specified character may be part of a Unicode
6207     * identifier as other than the first character.
6208     * <p>
6209     * A character may be part of a Unicode identifier if and only if
6210     * one of the following statements is true:
6211     * <ul>
6212     * <li>  it is a letter
6213     * <li>  it is a connecting punctuation character (such as {@code '_'})
6214     * <li>  it is a digit
6215     * <li>  it is a numeric letter (such as a Roman numeral character)
6216     * <li>  it is a combining mark
6217     * <li>  it is a non-spacing mark
6218     * <li> {@code isIdentifierIgnorable} returns
6219     * {@code true} for this character.
6220     * </ul>
6221     *
6222     * <p><b>Note:</b> This method cannot handle <a
6223     * href="#supplementary"> supplementary characters</a>. To support
6224     * all Unicode characters, including supplementary characters, use
6225     * the {@link #isUnicodeIdentifierPart(int)} method.
6226     *
6227     * @param   ch      the character to be tested.
6228     * @return  {@code true} if the character may be part of a
6229     *          Unicode identifier; {@code false} otherwise.
6230     * @see     Character#isIdentifierIgnorable(char)
6231     * @see     Character#isJavaIdentifierPart(char)
6232     * @see     Character#isLetterOrDigit(char)
6233     * @see     Character#isUnicodeIdentifierStart(char)
6234     * @since   1.1
6235     */
6236    public static boolean isUnicodeIdentifierPart(char ch) {
6237        return isUnicodeIdentifierPart((int)ch);
6238    }
6239
6240    /**
6241     * Determines if the specified character (Unicode code point) may be part of a Unicode
6242     * identifier as other than the first character.
6243     * <p>
6244     * A character may be part of a Unicode identifier if and only if
6245     * one of the following statements is true:
6246     * <ul>
6247     * <li>  it is a letter
6248     * <li>  it is a connecting punctuation character (such as {@code '_'})
6249     * <li>  it is a digit
6250     * <li>  it is a numeric letter (such as a Roman numeral character)
6251     * <li>  it is a combining mark
6252     * <li>  it is a non-spacing mark
6253     * <li> {@code isIdentifierIgnorable} returns
6254     * {@code true} for this character.
6255     * </ul>
6256     * @param   codePoint the character (Unicode code point) to be tested.
6257     * @return  {@code true} if the character may be part of a
6258     *          Unicode identifier; {@code false} otherwise.
6259     * @see     Character#isIdentifierIgnorable(int)
6260     * @see     Character#isJavaIdentifierPart(int)
6261     * @see     Character#isLetterOrDigit(int)
6262     * @see     Character#isUnicodeIdentifierStart(int)
6263     * @since   1.5
6264     */
6265    public static boolean isUnicodeIdentifierPart(int codePoint) {
6266        return isUnicodeIdentifierPartImpl(codePoint);
6267    }
6268
6269    static native boolean isUnicodeIdentifierPartImpl(int codePoint);
6270
6271    /**
6272     * Determines if the specified character should be regarded as
6273     * an ignorable character in a Java identifier or a Unicode identifier.
6274     * <p>
6275     * The following Unicode characters are ignorable in a Java identifier
6276     * or a Unicode identifier:
6277     * <ul>
6278     * <li>ISO control characters that are not whitespace
6279     * <ul>
6280     * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6281     * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6282     * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6283     * </ul>
6284     *
6285     * <li>all characters that have the {@code FORMAT} general
6286     * category value
6287     * </ul>
6288     *
6289     * <p><b>Note:</b> This method cannot handle <a
6290     * href="#supplementary"> supplementary characters</a>. To support
6291     * all Unicode characters, including supplementary characters, use
6292     * the {@link #isIdentifierIgnorable(int)} method.
6293     *
6294     * @param   ch      the character to be tested.
6295     * @return  {@code true} if the character is an ignorable control
6296     *          character that may be part of a Java or Unicode identifier;
6297     *           {@code false} otherwise.
6298     * @see     Character#isJavaIdentifierPart(char)
6299     * @see     Character#isUnicodeIdentifierPart(char)
6300     * @since   1.1
6301     */
6302    public static boolean isIdentifierIgnorable(char ch) {
6303        return isIdentifierIgnorable((int)ch);
6304    }
6305
6306    /**
6307     * Determines if the specified character (Unicode code point) should be regarded as
6308     * an ignorable character in a Java identifier or a Unicode identifier.
6309     * <p>
6310     * The following Unicode characters are ignorable in a Java identifier
6311     * or a Unicode identifier:
6312     * <ul>
6313     * <li>ISO control characters that are not whitespace
6314     * <ul>
6315     * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6316     * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6317     * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6318     * </ul>
6319     *
6320     * <li>all characters that have the {@code FORMAT} general
6321     * category value
6322     * </ul>
6323     *
6324     * @param   codePoint the character (Unicode code point) to be tested.
6325     * @return  {@code true} if the character is an ignorable control
6326     *          character that may be part of a Java or Unicode identifier;
6327     *          {@code false} otherwise.
6328     * @see     Character#isJavaIdentifierPart(int)
6329     * @see     Character#isUnicodeIdentifierPart(int)
6330     * @since   1.5
6331     */
6332    public static boolean isIdentifierIgnorable(int codePoint) {
6333        return isIdentifierIgnorableImpl(codePoint);
6334    }
6335
6336    static native boolean isIdentifierIgnorableImpl(int codePoint);
6337
6338    /**
6339     * Converts the character argument to lowercase using case
6340     * mapping information from the UnicodeData file.
6341     * <p>
6342     * Note that
6343     * {@code Character.isLowerCase(Character.toLowerCase(ch))}
6344     * does not always return {@code true} for some ranges of
6345     * characters, particularly those that are symbols or ideographs.
6346     *
6347     * <p>In general, {@link String#toLowerCase()} should be used to map
6348     * characters to lowercase. {@code String} case mapping methods
6349     * have several benefits over {@code Character} case mapping methods.
6350     * {@code String} case mapping methods can perform locale-sensitive
6351     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6352     * the {@code Character} case mapping methods cannot.
6353     *
6354     * <p><b>Note:</b> This method cannot handle <a
6355     * href="#supplementary"> supplementary characters</a>. To support
6356     * all Unicode characters, including supplementary characters, use
6357     * the {@link #toLowerCase(int)} method.
6358     *
6359     * @param   ch   the character to be converted.
6360     * @return  the lowercase equivalent of the character, if any;
6361     *          otherwise, the character itself.
6362     * @see     Character#isLowerCase(char)
6363     * @see     String#toLowerCase()
6364     */
6365    public static char toLowerCase(char ch) {
6366        return (char)toLowerCase((int)ch);
6367    }
6368
6369    /**
6370     * Converts the character (Unicode code point) argument to
6371     * lowercase using case mapping information from the UnicodeData
6372     * file.
6373     *
6374     * <p> Note that
6375     * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6376     * does not always return {@code true} for some ranges of
6377     * characters, particularly those that are symbols or ideographs.
6378     *
6379     * <p>In general, {@link String#toLowerCase()} should be used to map
6380     * characters to lowercase. {@code String} case mapping methods
6381     * have several benefits over {@code Character} case mapping methods.
6382     * {@code String} case mapping methods can perform locale-sensitive
6383     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6384     * the {@code Character} case mapping methods cannot.
6385     *
6386     * @param   codePoint   the character (Unicode code point) to be converted.
6387     * @return  the lowercase equivalent of the character (Unicode code
6388     *          point), if any; otherwise, the character itself.
6389     * @see     Character#isLowerCase(int)
6390     * @see     String#toLowerCase()
6391     *
6392     * @since   1.5
6393     */
6394    public static int toLowerCase(int codePoint) {
6395        return toLowerCaseImpl(codePoint);
6396    }
6397
6398    static native int toLowerCaseImpl(int codePoint);
6399
6400    /**
6401     * Converts the character argument to uppercase using case mapping
6402     * information from the UnicodeData file.
6403     * <p>
6404     * Note that
6405     * {@code Character.isUpperCase(Character.toUpperCase(ch))}
6406     * does not always return {@code true} for some ranges of
6407     * characters, particularly those that are symbols or ideographs.
6408     *
6409     * <p>In general, {@link String#toUpperCase()} should be used to map
6410     * characters to uppercase. {@code String} case mapping methods
6411     * have several benefits over {@code Character} case mapping methods.
6412     * {@code String} case mapping methods can perform locale-sensitive
6413     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6414     * the {@code Character} case mapping methods cannot.
6415     *
6416     * <p><b>Note:</b> This method cannot handle <a
6417     * href="#supplementary"> supplementary characters</a>. To support
6418     * all Unicode characters, including supplementary characters, use
6419     * the {@link #toUpperCase(int)} method.
6420     *
6421     * @param   ch   the character to be converted.
6422     * @return  the uppercase equivalent of the character, if any;
6423     *          otherwise, the character itself.
6424     * @see     Character#isUpperCase(char)
6425     * @see     String#toUpperCase()
6426     */
6427    public static char toUpperCase(char ch) {
6428        return (char)toUpperCase((int)ch);
6429    }
6430
6431    /**
6432     * Converts the character (Unicode code point) argument to
6433     * uppercase using case mapping information from the UnicodeData
6434     * file.
6435     *
6436     * <p>Note that
6437     * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6438     * does not always return {@code true} for some ranges of
6439     * characters, particularly those that are symbols or ideographs.
6440     *
6441     * <p>In general, {@link String#toUpperCase()} should be used to map
6442     * characters to uppercase. {@code String} case mapping methods
6443     * have several benefits over {@code Character} case mapping methods.
6444     * {@code String} case mapping methods can perform locale-sensitive
6445     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6446     * the {@code Character} case mapping methods cannot.
6447     *
6448     * @param   codePoint   the character (Unicode code point) to be converted.
6449     * @return  the uppercase equivalent of the character, if any;
6450     *          otherwise, the character itself.
6451     * @see     Character#isUpperCase(int)
6452     * @see     String#toUpperCase()
6453     *
6454     * @since   1.5
6455     */
6456    public static int toUpperCase(int codePoint) {
6457        return toUpperCaseImpl(codePoint);
6458    }
6459
6460    static native int toUpperCaseImpl(int codePoint);
6461
6462    /**
6463     * Converts the character argument to titlecase using case mapping
6464     * information from the UnicodeData file. If a character has no
6465     * explicit titlecase mapping and is not itself a titlecase char
6466     * according to UnicodeData, then the uppercase mapping is
6467     * returned as an equivalent titlecase mapping. If the
6468     * {@code char} argument is already a titlecase
6469     * {@code char}, the same {@code char} value will be
6470     * returned.
6471     * <p>
6472     * Note that
6473     * {@code Character.isTitleCase(Character.toTitleCase(ch))}
6474     * does not always return {@code true} for some ranges of
6475     * characters.
6476     *
6477     * <p><b>Note:</b> This method cannot handle <a
6478     * href="#supplementary"> supplementary characters</a>. To support
6479     * all Unicode characters, including supplementary characters, use
6480     * the {@link #toTitleCase(int)} method.
6481     *
6482     * @param   ch   the character to be converted.
6483     * @return  the titlecase equivalent of the character, if any;
6484     *          otherwise, the character itself.
6485     * @see     Character#isTitleCase(char)
6486     * @see     Character#toLowerCase(char)
6487     * @see     Character#toUpperCase(char)
6488     * @since   1.0.2
6489     */
6490    public static char toTitleCase(char ch) {
6491        return (char)toTitleCase((int)ch);
6492    }
6493
6494    /**
6495     * Converts the character (Unicode code point) argument to titlecase using case mapping
6496     * information from the UnicodeData file. If a character has no
6497     * explicit titlecase mapping and is not itself a titlecase char
6498     * according to UnicodeData, then the uppercase mapping is
6499     * returned as an equivalent titlecase mapping. If the
6500     * character argument is already a titlecase
6501     * character, the same character value will be
6502     * returned.
6503     *
6504     * <p>Note that
6505     * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6506     * does not always return {@code true} for some ranges of
6507     * characters.
6508     *
6509     * @param   codePoint   the character (Unicode code point) to be converted.
6510     * @return  the titlecase equivalent of the character, if any;
6511     *          otherwise, the character itself.
6512     * @see     Character#isTitleCase(int)
6513     * @see     Character#toLowerCase(int)
6514     * @see     Character#toUpperCase(int)
6515     * @since   1.5
6516     */
6517    public static int toTitleCase(int codePoint) {
6518        return toTitleCaseImpl(codePoint);
6519    }
6520
6521    static native int toTitleCaseImpl(int codePoint);
6522
6523    /**
6524     * Returns the numeric value of the character {@code ch} in the
6525     * specified radix.
6526     * <p>
6527     * If the radix is not in the range {@code MIN_RADIX} &le;
6528     * {@code radix} &le; {@code MAX_RADIX} or if the
6529     * value of {@code ch} is not a valid digit in the specified
6530     * radix, {@code -1} is returned. A character is a valid digit
6531     * if at least one of the following is true:
6532     * <ul>
6533     * <li>The method {@code isDigit} is {@code true} of the character
6534     *     and the Unicode decimal digit value of the character (or its
6535     *     single-character decomposition) is less than the specified radix.
6536     *     In this case the decimal digit value is returned.
6537     * <li>The character is one of the uppercase Latin letters
6538     *     {@code 'A'} through {@code 'Z'} and its code is less than
6539     *     {@code radix + 'A' - 10}.
6540     *     In this case, {@code ch - 'A' + 10}
6541     *     is returned.
6542     * <li>The character is one of the lowercase Latin letters
6543     *     {@code 'a'} through {@code 'z'} and its code is less than
6544     *     {@code radix + 'a' - 10}.
6545     *     In this case, {@code ch - 'a' + 10}
6546     *     is returned.
6547     * <li>The character is one of the fullwidth uppercase Latin letters A
6548     *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6549     *     and its code is less than
6550     *     {@code radix + '\u005CuFF21' - 10}.
6551     *     In this case, {@code ch - '\u005CuFF21' + 10}
6552     *     is returned.
6553     * <li>The character is one of the fullwidth lowercase Latin letters a
6554     *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6555     *     and its code is less than
6556     *     {@code radix + '\u005CuFF41' - 10}.
6557     *     In this case, {@code ch - '\u005CuFF41' + 10}
6558     *     is returned.
6559     * </ul>
6560     *
6561     * <p><b>Note:</b> This method cannot handle <a
6562     * href="#supplementary"> supplementary characters</a>. To support
6563     * all Unicode characters, including supplementary characters, use
6564     * the {@link #digit(int, int)} method.
6565     *
6566     * @param   ch      the character to be converted.
6567     * @param   radix   the radix.
6568     * @return  the numeric value represented by the character in the
6569     *          specified radix.
6570     * @see     Character#forDigit(int, int)
6571     * @see     Character#isDigit(char)
6572     */
6573    public static int digit(char ch, int radix) {
6574        return digit((int)ch, radix);
6575    }
6576
6577    /**
6578     * Returns the numeric value of the specified character (Unicode
6579     * code point) in the specified radix.
6580     *
6581     * <p>If the radix is not in the range {@code MIN_RADIX} &le;
6582     * {@code radix} &le; {@code MAX_RADIX} or if the
6583     * character is not a valid digit in the specified
6584     * radix, {@code -1} is returned. A character is a valid digit
6585     * if at least one of the following is true:
6586     * <ul>
6587     * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6588     *     and the Unicode decimal digit value of the character (or its
6589     *     single-character decomposition) is less than the specified radix.
6590     *     In this case the decimal digit value is returned.
6591     * <li>The character is one of the uppercase Latin letters
6592     *     {@code 'A'} through {@code 'Z'} and its code is less than
6593     *     {@code radix + 'A' - 10}.
6594     *     In this case, {@code codePoint - 'A' + 10}
6595     *     is returned.
6596     * <li>The character is one of the lowercase Latin letters
6597     *     {@code 'a'} through {@code 'z'} and its code is less than
6598     *     {@code radix + 'a' - 10}.
6599     *     In this case, {@code codePoint - 'a' + 10}
6600     *     is returned.
6601     * <li>The character is one of the fullwidth uppercase Latin letters A
6602     *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6603     *     and its code is less than
6604     *     {@code radix + '\u005CuFF21' - 10}.
6605     *     In this case,
6606     *     {@code codePoint - '\u005CuFF21' + 10}
6607     *     is returned.
6608     * <li>The character is one of the fullwidth lowercase Latin letters a
6609     *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6610     *     and its code is less than
6611     *     {@code radix + '\u005CuFF41'- 10}.
6612     *     In this case,
6613     *     {@code codePoint - '\u005CuFF41' + 10}
6614     *     is returned.
6615     * </ul>
6616     *
6617     * @param   codePoint the character (Unicode code point) to be converted.
6618     * @param   radix   the radix.
6619     * @return  the numeric value represented by the character in the
6620     *          specified radix.
6621     * @see     Character#forDigit(int, int)
6622     * @see     Character#isDigit(int)
6623     * @since   1.5
6624     */
6625    public static int digit(int codePoint, int radix) {
6626        if (radix < MIN_RADIX || radix > MAX_RADIX) {
6627            return -1;
6628        }
6629        if (codePoint < 128) {
6630            // Optimized for ASCII
6631            int result = -1;
6632            if ('0' <= codePoint && codePoint <= '9') {
6633                result = codePoint - '0';
6634            } else if ('a' <= codePoint && codePoint <= 'z') {
6635                result = 10 + (codePoint - 'a');
6636            } else if ('A' <= codePoint && codePoint <= 'Z') {
6637                result = 10 + (codePoint - 'A');
6638            }
6639            return result < radix ? result : -1;
6640        }
6641        return digitImpl(codePoint, radix);
6642    }
6643
6644    native static int digitImpl(int codePoint, int radix);
6645
6646    /**
6647     * Returns the {@code int} value that the specified Unicode
6648     * character represents. For example, the character
6649     * {@code '\u005Cu216C'} (the roman numeral fifty) will return
6650     * an int with a value of 50.
6651     * <p>
6652     * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6653     * {@code '\u005Cu005A'}), lowercase
6654     * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6655     * full width variant ({@code '\u005CuFF21'} through
6656     * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6657     * {@code '\u005CuFF5A'}) forms have numeric values from 10
6658     * through 35. This is independent of the Unicode specification,
6659     * which does not assign numeric values to these {@code char}
6660     * values.
6661     * <p>
6662     * If the character does not have a numeric value, then -1 is returned.
6663     * If the character has a numeric value that cannot be represented as a
6664     * nonnegative integer (for example, a fractional value), then -2
6665     * is returned.
6666     *
6667     * <p><b>Note:</b> This method cannot handle <a
6668     * href="#supplementary"> supplementary characters</a>. To support
6669     * all Unicode characters, including supplementary characters, use
6670     * the {@link #getNumericValue(int)} method.
6671     *
6672     * @param   ch      the character to be converted.
6673     * @return  the numeric value of the character, as a nonnegative {@code int}
6674     *           value; -2 if the character has a numeric value that is not a
6675     *          nonnegative integer; -1 if the character has no numeric value.
6676     * @see     Character#forDigit(int, int)
6677     * @see     Character#isDigit(char)
6678     * @since   1.1
6679     */
6680    public static int getNumericValue(char ch) {
6681        return getNumericValue((int)ch);
6682    }
6683
6684    /**
6685     * Returns the {@code int} value that the specified
6686     * character (Unicode code point) represents. For example, the character
6687     * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6688     * an {@code int} with a value of 50.
6689     * <p>
6690     * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6691     * {@code '\u005Cu005A'}), lowercase
6692     * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6693     * full width variant ({@code '\u005CuFF21'} through
6694     * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6695     * {@code '\u005CuFF5A'}) forms have numeric values from 10
6696     * through 35. This is independent of the Unicode specification,
6697     * which does not assign numeric values to these {@code char}
6698     * values.
6699     * <p>
6700     * If the character does not have a numeric value, then -1 is returned.
6701     * If the character has a numeric value that cannot be represented as a
6702     * nonnegative integer (for example, a fractional value), then -2
6703     * is returned.
6704     *
6705     * @param   codePoint the character (Unicode code point) to be converted.
6706     * @return  the numeric value of the character, as a nonnegative {@code int}
6707     *          value; -2 if the character has a numeric value that is not a
6708     *          nonnegative integer; -1 if the character has no numeric value.
6709     * @see     Character#forDigit(int, int)
6710     * @see     Character#isDigit(int)
6711     * @since   1.5
6712     */
6713    public static int getNumericValue(int codePoint) {
6714        // This is both an optimization and papers over differences between Java and ICU.
6715        if (codePoint < 128) {
6716            if (codePoint >= '0' && codePoint <= '9') {
6717                return codePoint - '0';
6718            }
6719            if (codePoint >= 'a' && codePoint <= 'z') {
6720                return codePoint - ('a' - 10);
6721            }
6722            if (codePoint >= 'A' && codePoint <= 'Z') {
6723                return codePoint - ('A' - 10);
6724            }
6725            return -1;
6726        }
6727        // Full-width uppercase A-Z.
6728        if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
6729            return codePoint - 0xff17;
6730        }
6731        // Full-width lowercase a-z.
6732        if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
6733            return codePoint - 0xff37;
6734        }
6735        return getNumericValueImpl(codePoint);
6736    }
6737
6738    native static int getNumericValueImpl(int codePoint);
6739
6740    /**
6741     * Determines if the specified character is ISO-LATIN-1 white space.
6742     * This method returns {@code true} for the following five
6743     * characters only:
6744     * <table summary="truechars">
6745     * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
6746     *     <td>{@code HORIZONTAL TABULATION}</td></tr>
6747     * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
6748     *     <td>{@code NEW LINE}</td></tr>
6749     * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
6750     *     <td>{@code FORM FEED}</td></tr>
6751     * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
6752     *     <td>{@code CARRIAGE RETURN}</td></tr>
6753     * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
6754     *     <td>{@code SPACE}</td></tr>
6755     * </table>
6756     *
6757     * @param      ch   the character to be tested.
6758     * @return     {@code true} if the character is ISO-LATIN-1 white
6759     *             space; {@code false} otherwise.
6760     * @see        Character#isSpaceChar(char)
6761     * @see        Character#isWhitespace(char)
6762     * @deprecated Replaced by isWhitespace(char).
6763     */
6764    @Deprecated
6765    public static boolean isSpace(char ch) {
6766        return (ch <= 0x0020) &&
6767            (((((1L << 0x0009) |
6768            (1L << 0x000A) |
6769            (1L << 0x000C) |
6770            (1L << 0x000D) |
6771            (1L << 0x0020)) >> ch) & 1L) != 0);
6772    }
6773
6774
6775    /**
6776     * Determines if the specified character is a Unicode space character.
6777     * A character is considered to be a space character if and only if
6778     * it is specified to be a space character by the Unicode Standard. This
6779     * method returns true if the character's general category type is any of
6780     * the following:
6781     * <ul>
6782     * <li> {@code SPACE_SEPARATOR}
6783     * <li> {@code LINE_SEPARATOR}
6784     * <li> {@code PARAGRAPH_SEPARATOR}
6785     * </ul>
6786     *
6787     * <p><b>Note:</b> This method cannot handle <a
6788     * href="#supplementary"> supplementary characters</a>. To support
6789     * all Unicode characters, including supplementary characters, use
6790     * the {@link #isSpaceChar(int)} method.
6791     *
6792     * @param   ch      the character to be tested.
6793     * @return  {@code true} if the character is a space character;
6794     *          {@code false} otherwise.
6795     * @see     Character#isWhitespace(char)
6796     * @since   1.1
6797     */
6798    public static boolean isSpaceChar(char ch) {
6799        return isSpaceChar((int)ch);
6800    }
6801
6802    /**
6803     * Determines if the specified character (Unicode code point) is a
6804     * Unicode space character.  A character is considered to be a
6805     * space character if and only if it is specified to be a space
6806     * character by the Unicode Standard. This method returns true if
6807     * the character's general category type is any of the following:
6808     *
6809     * <ul>
6810     * <li> {@link #SPACE_SEPARATOR}
6811     * <li> {@link #LINE_SEPARATOR}
6812     * <li> {@link #PARAGRAPH_SEPARATOR}
6813     * </ul>
6814     *
6815     * @param   codePoint the character (Unicode code point) to be tested.
6816     * @return  {@code true} if the character is a space character;
6817     *          {@code false} otherwise.
6818     * @see     Character#isWhitespace(int)
6819     * @since   1.5
6820     */
6821    public static boolean isSpaceChar(int codePoint) {
6822        // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
6823        // SPACE or NO-BREAK SPACE?
6824        if (codePoint == 0x20 || codePoint == 0xa0) {
6825            return true;
6826        }
6827        if (codePoint < 0x1000) {
6828            return false;
6829        }
6830        // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
6831        if (codePoint == 0x1680 || codePoint == 0x180e) {
6832            return true;
6833        }
6834        if (codePoint < 0x2000) {
6835            return false;
6836        }
6837        if (codePoint <= 0xffff) {
6838            // Other whitespace from General Punctuation...
6839            return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f ||
6840                codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
6841        }
6842        // Let icu4c worry about non-BMP code points.
6843        return isSpaceCharImpl(codePoint);
6844    }
6845
6846    static native boolean isSpaceCharImpl(int codePoint);
6847
6848    /**
6849     * Determines if the specified character is white space according to Java.
6850     * A character is a Java whitespace character if and only if it satisfies
6851     * one of the following criteria:
6852     * <ul>
6853     * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
6854     *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
6855     *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6856     *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6857     * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6858     * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6859     * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6860     * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6861     * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6862     * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6863     * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6864     * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6865     * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6866     * </ul>
6867     *
6868     * <p><b>Note:</b> This method cannot handle <a
6869     * href="#supplementary"> supplementary characters</a>. To support
6870     * all Unicode characters, including supplementary characters, use
6871     * the {@link #isWhitespace(int)} method.
6872     *
6873     * @param   ch the character to be tested.
6874     * @return  {@code true} if the character is a Java whitespace
6875     *          character; {@code false} otherwise.
6876     * @see     Character#isSpaceChar(char)
6877     * @since   1.1
6878     */
6879    public static boolean isWhitespace(char ch) {
6880        return isWhitespace((int)ch);
6881    }
6882
6883    /**
6884     * Determines if the specified character (Unicode code point) is
6885     * white space according to Java.  A character is a Java
6886     * whitespace character if and only if it satisfies one of the
6887     * following criteria:
6888     * <ul>
6889     * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6890     *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6891     *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6892     *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6893     * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6894     * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6895     * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6896     * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6897     * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6898     * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6899     * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6900     * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6901     * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6902     * </ul>
6903     * <p>
6904     *
6905     * @param   codePoint the character (Unicode code point) to be tested.
6906     * @return  {@code true} if the character is a Java whitespace
6907     *          character; {@code false} otherwise.
6908     * @see     Character#isSpaceChar(int)
6909     * @since   1.5
6910     */
6911    public static boolean isWhitespace(int codePoint) {
6912        // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
6913        // Any ASCII whitespace character?
6914        if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) {
6915            return true;
6916        }
6917        if (codePoint < 0x1000) {
6918            return false;
6919        }
6920        // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
6921        if (codePoint == 0x1680 || codePoint == 0x180e) {
6922            return true;
6923        }
6924        if (codePoint < 0x2000) {
6925            return false;
6926        }
6927        // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE).
6928        if (codePoint == 0x2007 || codePoint == 0x202f) {
6929            return false;
6930        }
6931        if (codePoint <= 0xffff) {
6932            // Other whitespace from General Punctuation...
6933            return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f ||
6934                codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
6935        }
6936        // Let icu4c worry about non-BMP code points.
6937        return isWhitespaceImpl(codePoint);
6938    }
6939
6940    native static boolean isWhitespaceImpl(int codePoint);
6941
6942    /**
6943     * Determines if the specified character is an ISO control
6944     * character.  A character is considered to be an ISO control
6945     * character if its code is in the range {@code '\u005Cu0000'}
6946     * through {@code '\u005Cu001F'} or in the range
6947     * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6948     *
6949     * <p><b>Note:</b> This method cannot handle <a
6950     * href="#supplementary"> supplementary characters</a>. To support
6951     * all Unicode characters, including supplementary characters, use
6952     * the {@link #isISOControl(int)} method.
6953     *
6954     * @param   ch      the character to be tested.
6955     * @return  {@code true} if the character is an ISO control character;
6956     *          {@code false} otherwise.
6957     *
6958     * @see     Character#isSpaceChar(char)
6959     * @see     Character#isWhitespace(char)
6960     * @since   1.1
6961     */
6962    public static boolean isISOControl(char ch) {
6963        return isISOControl((int)ch);
6964    }
6965
6966    /**
6967     * Determines if the referenced character (Unicode code point) is an ISO control
6968     * character.  A character is considered to be an ISO control
6969     * character if its code is in the range {@code '\u005Cu0000'}
6970     * through {@code '\u005Cu001F'} or in the range
6971     * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6972     *
6973     * @param   codePoint the character (Unicode code point) to be tested.
6974     * @return  {@code true} if the character is an ISO control character;
6975     *          {@code false} otherwise.
6976     * @see     Character#isSpaceChar(int)
6977     * @see     Character#isWhitespace(int)
6978     * @since   1.5
6979     */
6980    public static boolean isISOControl(int codePoint) {
6981        // Optimized form of:
6982        //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
6983        //     (codePoint >= 0x7F && codePoint <= 0x9F);
6984        return codePoint <= 0x9F &&
6985            (codePoint >= 0x7F || (codePoint >>> 5 == 0));
6986    }
6987
6988    /**
6989     * Returns a value indicating a character's general category.
6990     *
6991     * <p><b>Note:</b> This method cannot handle <a
6992     * href="#supplementary"> supplementary characters</a>. To support
6993     * all Unicode characters, including supplementary characters, use
6994     * the {@link #getType(int)} method.
6995     *
6996     * @param   ch      the character to be tested.
6997     * @return  a value of type {@code int} representing the
6998     *          character's general category.
6999     * @see     Character#COMBINING_SPACING_MARK
7000     * @see     Character#CONNECTOR_PUNCTUATION
7001     * @see     Character#CONTROL
7002     * @see     Character#CURRENCY_SYMBOL
7003     * @see     Character#DASH_PUNCTUATION
7004     * @see     Character#DECIMAL_DIGIT_NUMBER
7005     * @see     Character#ENCLOSING_MARK
7006     * @see     Character#END_PUNCTUATION
7007     * @see     Character#FINAL_QUOTE_PUNCTUATION
7008     * @see     Character#FORMAT
7009     * @see     Character#INITIAL_QUOTE_PUNCTUATION
7010     * @see     Character#LETTER_NUMBER
7011     * @see     Character#LINE_SEPARATOR
7012     * @see     Character#LOWERCASE_LETTER
7013     * @see     Character#MATH_SYMBOL
7014     * @see     Character#MODIFIER_LETTER
7015     * @see     Character#MODIFIER_SYMBOL
7016     * @see     Character#NON_SPACING_MARK
7017     * @see     Character#OTHER_LETTER
7018     * @see     Character#OTHER_NUMBER
7019     * @see     Character#OTHER_PUNCTUATION
7020     * @see     Character#OTHER_SYMBOL
7021     * @see     Character#PARAGRAPH_SEPARATOR
7022     * @see     Character#PRIVATE_USE
7023     * @see     Character#SPACE_SEPARATOR
7024     * @see     Character#START_PUNCTUATION
7025     * @see     Character#SURROGATE
7026     * @see     Character#TITLECASE_LETTER
7027     * @see     Character#UNASSIGNED
7028     * @see     Character#UPPERCASE_LETTER
7029     * @since   1.1
7030     */
7031    public static int getType(char ch) {
7032        return getType((int)ch);
7033    }
7034
7035    /**
7036     * Returns a value indicating a character's general category.
7037     *
7038     * @param   codePoint the character (Unicode code point) to be tested.
7039     * @return  a value of type {@code int} representing the
7040     *          character's general category.
7041     * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
7042     * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
7043     * @see     Character#CONTROL CONTROL
7044     * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
7045     * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
7046     * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
7047     * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
7048     * @see     Character#END_PUNCTUATION END_PUNCTUATION
7049     * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
7050     * @see     Character#FORMAT FORMAT
7051     * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
7052     * @see     Character#LETTER_NUMBER LETTER_NUMBER
7053     * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
7054     * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
7055     * @see     Character#MATH_SYMBOL MATH_SYMBOL
7056     * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
7057     * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
7058     * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
7059     * @see     Character#OTHER_LETTER OTHER_LETTER
7060     * @see     Character#OTHER_NUMBER OTHER_NUMBER
7061     * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
7062     * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
7063     * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
7064     * @see     Character#PRIVATE_USE PRIVATE_USE
7065     * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
7066     * @see     Character#START_PUNCTUATION START_PUNCTUATION
7067     * @see     Character#SURROGATE SURROGATE
7068     * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
7069     * @see     Character#UNASSIGNED UNASSIGNED
7070     * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
7071     * @since   1.5
7072     */
7073    public static int getType(int codePoint) {
7074        int type = getTypeImpl(codePoint);
7075        // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
7076        if (type <= Character.FORMAT) {
7077            return type;
7078        }
7079        return (type + 1);
7080    }
7081
7082    static native int getTypeImpl(int codePoint);
7083
7084    /**
7085     * Determines the character representation for a specific digit in
7086     * the specified radix. If the value of {@code radix} is not a
7087     * valid radix, or the value of {@code digit} is not a valid
7088     * digit in the specified radix, the null character
7089     * ({@code '\u005Cu0000'}) is returned.
7090     * <p>
7091     * The {@code radix} argument is valid if it is greater than or
7092     * equal to {@code MIN_RADIX} and less than or equal to
7093     * {@code MAX_RADIX}. The {@code digit} argument is valid if
7094     * {@code 0 <= digit < radix}.
7095     * <p>
7096     * If the digit is less than 10, then
7097     * {@code '0' + digit} is returned. Otherwise, the value
7098     * {@code 'a' + digit - 10} is returned.
7099     *
7100     * @param   digit   the number to convert to a character.
7101     * @param   radix   the radix.
7102     * @return  the {@code char} representation of the specified digit
7103     *          in the specified radix.
7104     * @see     Character#MIN_RADIX
7105     * @see     Character#MAX_RADIX
7106     * @see     Character#digit(char, int)
7107     */
7108    public static char forDigit(int digit, int radix) {
7109        if ((digit >= radix) || (digit < 0)) {
7110            return '\0';
7111        }
7112        if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
7113            return '\0';
7114        }
7115        if (digit < 10) {
7116            return (char)('0' + digit);
7117        }
7118        return (char)('a' - 10 + digit);
7119    }
7120
7121    /**
7122     * Returns the Unicode directionality property for the given
7123     * character.  Character directionality is used to calculate the
7124     * visual ordering of text. The directionality value of undefined
7125     * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
7126     *
7127     * <p><b>Note:</b> This method cannot handle <a
7128     * href="#supplementary"> supplementary characters</a>. To support
7129     * all Unicode characters, including supplementary characters, use
7130     * the {@link #getDirectionality(int)} method.
7131     *
7132     * @param  ch {@code char} for which the directionality property
7133     *            is requested.
7134     * @return the directionality property of the {@code char} value.
7135     *
7136     * @see Character#DIRECTIONALITY_UNDEFINED
7137     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
7138     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
7139     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7140     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
7141     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7142     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7143     * @see Character#DIRECTIONALITY_ARABIC_NUMBER
7144     * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7145     * @see Character#DIRECTIONALITY_NONSPACING_MARK
7146     * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
7147     * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
7148     * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
7149     * @see Character#DIRECTIONALITY_WHITESPACE
7150     * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
7151     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7152     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7153     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7154     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7155     * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7156     * @since 1.4
7157     */
7158    public static byte getDirectionality(char ch) {
7159        return getDirectionality((int)ch);
7160    }
7161
7162    /**
7163     * Returns the Unicode directionality property for the given
7164     * character (Unicode code point).  Character directionality is
7165     * used to calculate the visual ordering of text. The
7166     * directionality value of undefined character is {@link
7167     * #DIRECTIONALITY_UNDEFINED}.
7168     *
7169     * @param   codePoint the character (Unicode code point) for which
7170     *          the directionality property is requested.
7171     * @return the directionality property of the character.
7172     *
7173     * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
7174     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
7175     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
7176     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7177     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
7178     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7179     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7180     * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
7181     * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7182     * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
7183     * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
7184     * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
7185     * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
7186     * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
7187     * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
7188     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7189     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7190     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7191     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7192     * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7193     * @since    1.5
7194     */
7195    public static byte getDirectionality(int codePoint) {
7196        if (getType(codePoint) == Character.UNASSIGNED) {
7197            return Character.DIRECTIONALITY_UNDEFINED;
7198        }
7199
7200        byte directionality = getDirectionalityImpl(codePoint);
7201        if (directionality >= 0 && directionality < DIRECTIONALITY.length) {
7202            return DIRECTIONALITY[directionality];
7203        }
7204        return Character.DIRECTIONALITY_UNDEFINED;
7205    }
7206
7207    native static byte getDirectionalityImpl(int codePoint);
7208    /**
7209     * Determines whether the character is mirrored according to the
7210     * Unicode specification.  Mirrored characters should have their
7211     * glyphs horizontally mirrored when displayed in text that is
7212     * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
7213     * PARENTHESIS is semantically defined to be an <i>opening
7214     * parenthesis</i>.  This will appear as a "(" in text that is
7215     * left-to-right but as a ")" in text that is right-to-left.
7216     *
7217     * <p><b>Note:</b> This method cannot handle <a
7218     * href="#supplementary"> supplementary characters</a>. To support
7219     * all Unicode characters, including supplementary characters, use
7220     * the {@link #isMirrored(int)} method.
7221     *
7222     * @param  ch {@code char} for which the mirrored property is requested
7223     * @return {@code true} if the char is mirrored, {@code false}
7224     *         if the {@code char} is not mirrored or is not defined.
7225     * @since 1.4
7226     */
7227    public static boolean isMirrored(char ch) {
7228        return isMirrored((int)ch);
7229    }
7230
7231    /**
7232     * Determines whether the specified character (Unicode code point)
7233     * is mirrored according to the Unicode specification.  Mirrored
7234     * characters should have their glyphs horizontally mirrored when
7235     * displayed in text that is right-to-left.  For example,
7236     * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
7237     * defined to be an <i>opening parenthesis</i>.  This will appear
7238     * as a "(" in text that is left-to-right but as a ")" in text
7239     * that is right-to-left.
7240     *
7241     * @param   codePoint the character (Unicode code point) to be tested.
7242     * @return  {@code true} if the character is mirrored, {@code false}
7243     *          if the character is not mirrored or is not defined.
7244     * @since   1.5
7245     */
7246    public static boolean isMirrored(int codePoint) {
7247        return isMirroredImpl(codePoint);
7248    }
7249
7250    native static boolean isMirroredImpl(int codePoint);
7251    /**
7252     * Compares two {@code Character} objects numerically.
7253     *
7254     * @param   anotherCharacter   the {@code Character} to be compared.
7255
7256     * @return  the value {@code 0} if the argument {@code Character}
7257     *          is equal to this {@code Character}; a value less than
7258     *          {@code 0} if this {@code Character} is numerically less
7259     *          than the {@code Character} argument; and a value greater than
7260     *          {@code 0} if this {@code Character} is numerically greater
7261     *          than the {@code Character} argument (unsigned comparison).
7262     *          Note that this is strictly a numerical comparison; it is not
7263     *          locale-dependent.
7264     * @since   1.2
7265     */
7266    public int compareTo(Character anotherCharacter) {
7267        return compare(this.value, anotherCharacter.value);
7268    }
7269
7270    /**
7271     * Compares two {@code char} values numerically.
7272     * The value returned is identical to what would be returned by:
7273     * <pre>
7274     *    Character.valueOf(x).compareTo(Character.valueOf(y))
7275     * </pre>
7276     *
7277     * @param  x the first {@code char} to compare
7278     * @param  y the second {@code char} to compare
7279     * @return the value {@code 0} if {@code x == y};
7280     *         a value less than {@code 0} if {@code x < y}; and
7281     *         a value greater than {@code 0} if {@code x > y}
7282     * @since 1.7
7283     */
7284    public static int compare(char x, char y) {
7285        return x - y;
7286    }
7287
7288    /**
7289     * The number of bits used to represent a <tt>char</tt> value in unsigned
7290     * binary form, constant {@code 16}.
7291     *
7292     * @since 1.5
7293     */
7294    public static final int SIZE = 16;
7295
7296    /**
7297     * The number of bytes used to represent a {@code char} value in unsigned
7298     * binary form.
7299     *
7300     * @since 1.8
7301     */
7302    public static final int BYTES = SIZE / Byte.SIZE;
7303
7304    /**
7305     * Returns the value obtained by reversing the order of the bytes in the
7306     * specified <tt>char</tt> value.
7307     *
7308     * @param ch The {@code char} of which to reverse the byte order.
7309     * @return the value obtained by reversing (or, equivalently, swapping)
7310     *     the bytes in the specified <tt>char</tt> value.
7311     * @since 1.5
7312     */
7313    public static char reverseBytes(char ch) {
7314        return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
7315    }
7316
7317    /**
7318     * Returns the Unicode name of the specified character
7319     * {@code codePoint}, or null if the code point is
7320     * {@link #UNASSIGNED unassigned}.
7321     * <p>
7322     * Note: if the specified character is not assigned a name by
7323     * the <i>UnicodeData</i> file (part of the Unicode Character
7324     * Database maintained by the Unicode Consortium), the returned
7325     * name is the same as the result of expression.
7326     *
7327     * <blockquote>{@code
7328     *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
7329     *     + " "
7330     *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7331     *
7332     * }</blockquote>
7333     *
7334     * @param  codePoint the character (Unicode code point)
7335     *
7336     * @return the Unicode name of the specified character, or null if
7337     *         the code point is unassigned.
7338     *
7339     * @exception IllegalArgumentException if the specified
7340     *            {@code codePoint} is not a valid Unicode
7341     *            code point.
7342     *
7343     * @since 1.7
7344     */
7345    public static String getName(int codePoint) {
7346        if (!isValidCodePoint(codePoint)) {
7347            throw new IllegalArgumentException();
7348        }
7349        String name = getNameImpl(codePoint);
7350        if (name != null)
7351            return name;
7352        if (getType(codePoint) == UNASSIGNED)
7353            return null;
7354        UnicodeBlock block = UnicodeBlock.of(codePoint);
7355        if (block != null)
7356            return block.toString().replace('_', ' ') + " "
7357                   + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7358        // should never come here
7359        return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7360    }
7361
7362    private static native String getNameImpl(int codePoint);
7363}
7364