1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.  Oracle designates this
9 * particular file as subject to the "Classpath" exception as provided
10 * by Oracle in the LICENSE file that accompanied this code.
11 *
12 * This code is distributed in the hope that it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 * version 2 for more details (a copy is included in the LICENSE file that
16 * accompanied this code).
17 *
18 * You should have received a copy of the GNU General Public License version
19 * 2 along with this work; if not, write to the Free Software Foundation,
20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21 *
22 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
23 * or visit www.oracle.com if you need additional information or have any
24 * questions.
25 */
26
27package java.lang;
28
29import java.util.Arrays;
30import java.util.HashMap;
31import java.util.Locale;
32import java.util.Map;
33
34/**
35 * The {@code Character} class wraps a value of the primitive
36 * type {@code char} in an object. An object of type
37 * {@code Character} contains a single field whose type is
38 * {@code char}.
39 * <p>
40 * In addition, this class provides several methods for determining
41 * a character's category (lowercase letter, digit, etc.) and for converting
42 * characters from uppercase to lowercase and vice versa.
43 * <p>
44 * Character information is based on the Unicode Standard, version 6.0.0.
45 * <p>
46 * The methods and data of class {@code Character} are defined by
47 * the information in the <i>UnicodeData</i> file that is part of the
48 * Unicode Character Database maintained by the Unicode
49 * Consortium. This file specifies various properties including name
50 * and general category for every defined Unicode code point or
51 * character range.
52 * <p>
53 * The file and its description are available from the Unicode Consortium at:
54 * <ul>
55 * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
56 * </ul>
57 *
58 * <h4><a name="unicode">Unicode Character Representations</a></h4>
59 *
60 * <p>The {@code char} data type (and therefore the value that a
61 * {@code Character} object encapsulates) are based on the
62 * original Unicode specification, which defined characters as
63 * fixed-width 16-bit entities. The Unicode Standard has since been
64 * changed to allow for characters whose representation requires more
65 * than 16 bits.  The range of legal <em>code point</em>s is now
66 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
67 * (Refer to the <a
68 * href="http://www.unicode.org/reports/tr27/#notation"><i>
69 * definition</i></a> of the U+<i>n</i> notation in the Unicode
70 * Standard.)
71 *
72 * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
73 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
74 * <a name="supplementary">Characters</a> whose code points are greater
75 * than U+FFFF are called <em>supplementary character</em>s.  The Java
76 * platform uses the UTF-16 representation in {@code char} arrays and
77 * in the {@code String} and {@code StringBuffer} classes. In
78 * this representation, supplementary characters are represented as a pair
79 * of {@code char} values, the first from the <em>high-surrogates</em>
80 * range, (&#92;uD800-&#92;uDBFF), the second from the
81 * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
82 *
83 * <p>A {@code char} value, therefore, represents Basic
84 * Multilingual Plane (BMP) code points, including the surrogate
85 * code points, or code units of the UTF-16 encoding. An
86 * {@code int} value represents all Unicode code points,
87 * including supplementary code points. The lower (least significant)
88 * 21 bits of {@code int} are used to represent Unicode code
89 * points and the upper (most significant) 11 bits must be zero.
90 * Unless otherwise specified, the behavior with respect to
91 * supplementary characters and surrogate {@code char} values is
92 * as follows:
93 *
94 * <ul>
95 * <li>The methods that only accept a {@code char} value cannot support
96 * supplementary characters. They treat {@code char} values from the
97 * surrogate ranges as undefined characters. For example,
98 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
99 * this specific value if followed by any low-surrogate value in a string
100 * would represent a letter.
101 *
102 * <li>The methods that accept an {@code int} value support all
103 * Unicode characters, including supplementary characters. For
104 * example, {@code Character.isLetter(0x2F81A)} returns
105 * {@code true} because the code point value represents a letter
106 * (a CJK ideograph).
107 * </ul>
108 *
109 * <p>In the Java SE API documentation, <em>Unicode code point</em> is
110 * used for character values in the range between U+0000 and U+10FFFF,
111 * and <em>Unicode code unit</em> is used for 16-bit
112 * {@code char} values that are code units of the <em>UTF-16</em>
113 * encoding. For more information on Unicode terminology, refer to the
114 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
115 *
116 * @author  Lee Boynton
117 * @author  Guy Steele
118 * @author  Akira Tanaka
119 * @author  Martin Buchholz
120 * @author  Ulf Zibis
121 * @since   1.0
122 */
123public final
124class Character implements java.io.Serializable, Comparable<Character> {
125    /**
126     * The minimum radix available for conversion to and from strings.
127     * The constant value of this field is the smallest value permitted
128     * for the radix argument in radix-conversion methods such as the
129     * {@code digit} method, the {@code forDigit} method, and the
130     * {@code toString} method of class {@code Integer}.
131     *
132     * @see     Character#digit(char, int)
133     * @see     Character#forDigit(int, int)
134     * @see     Integer#toString(int, int)
135     * @see     Integer#valueOf(String)
136     */
137    public static final int MIN_RADIX = 2;
138
139    /**
140     * The maximum radix available for conversion to and from strings.
141     * The constant value of this field is the largest value permitted
142     * for the radix argument in radix-conversion methods such as the
143     * {@code digit} method, the {@code forDigit} method, and the
144     * {@code toString} method of class {@code Integer}.
145     *
146     * @see     Character#digit(char, int)
147     * @see     Character#forDigit(int, int)
148     * @see     Integer#toString(int, int)
149     * @see     Integer#valueOf(String)
150     */
151    public static final int MAX_RADIX = 36;
152
153    /**
154     * The constant value of this field is the smallest value of type
155     * {@code char}, {@code '\u005Cu0000'}.
156     *
157     * @since   1.0.2
158     */
159    public static final char MIN_VALUE = '\u0000';
160
161    /**
162     * The constant value of this field is the largest value of type
163     * {@code char}, {@code '\u005CuFFFF'}.
164     *
165     * @since   1.0.2
166     */
167    public static final char MAX_VALUE = '\uFFFF';
168
169    /**
170     * The {@code Class} instance representing the primitive type
171     * {@code char}.
172     *
173     * @since   1.1
174     */
175    @SuppressWarnings("unchecked")
176    /* ----- BEGIN android -----
177    public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
178    */
179    public static final Class<Character> TYPE = (Class<Character>) char[].class.getComponentType();
180    // ----- END android -----
181
182    /*
183     * Normative general types
184     */
185
186    /*
187     * General character types
188     */
189
190    /**
191     * General category "Cn" in the Unicode specification.
192     * @since   1.1
193     */
194    public static final byte UNASSIGNED = 0;
195
196    /**
197     * General category "Lu" in the Unicode specification.
198     * @since   1.1
199     */
200    public static final byte UPPERCASE_LETTER = 1;
201
202    /**
203     * General category "Ll" in the Unicode specification.
204     * @since   1.1
205     */
206    public static final byte LOWERCASE_LETTER = 2;
207
208    /**
209     * General category "Lt" in the Unicode specification.
210     * @since   1.1
211     */
212    public static final byte TITLECASE_LETTER = 3;
213
214    /**
215     * General category "Lm" in the Unicode specification.
216     * @since   1.1
217     */
218    public static final byte MODIFIER_LETTER = 4;
219
220    /**
221     * General category "Lo" in the Unicode specification.
222     * @since   1.1
223     */
224    public static final byte OTHER_LETTER = 5;
225
226    /**
227     * General category "Mn" in the Unicode specification.
228     * @since   1.1
229     */
230    public static final byte NON_SPACING_MARK = 6;
231
232    /**
233     * General category "Me" in the Unicode specification.
234     * @since   1.1
235     */
236    public static final byte ENCLOSING_MARK = 7;
237
238    /**
239     * General category "Mc" in the Unicode specification.
240     * @since   1.1
241     */
242    public static final byte COMBINING_SPACING_MARK = 8;
243
244    /**
245     * General category "Nd" in the Unicode specification.
246     * @since   1.1
247     */
248    public static final byte DECIMAL_DIGIT_NUMBER        = 9;
249
250    /**
251     * General category "Nl" in the Unicode specification.
252     * @since   1.1
253     */
254    public static final byte LETTER_NUMBER = 10;
255
256    /**
257     * General category "No" in the Unicode specification.
258     * @since   1.1
259     */
260    public static final byte OTHER_NUMBER = 11;
261
262    /**
263     * General category "Zs" in the Unicode specification.
264     * @since   1.1
265     */
266    public static final byte SPACE_SEPARATOR = 12;
267
268    /**
269     * General category "Zl" in the Unicode specification.
270     * @since   1.1
271     */
272    public static final byte LINE_SEPARATOR = 13;
273
274    /**
275     * General category "Zp" in the Unicode specification.
276     * @since   1.1
277     */
278    public static final byte PARAGRAPH_SEPARATOR = 14;
279
280    /**
281     * General category "Cc" in the Unicode specification.
282     * @since   1.1
283     */
284    public static final byte CONTROL = 15;
285
286    /**
287     * General category "Cf" in the Unicode specification.
288     * @since   1.1
289     */
290    public static final byte FORMAT = 16;
291
292    /**
293     * General category "Co" in the Unicode specification.
294     * @since   1.1
295     */
296    public static final byte PRIVATE_USE = 18;
297
298    /**
299     * General category "Cs" in the Unicode specification.
300     * @since   1.1
301     */
302    public static final byte SURROGATE = 19;
303
304    /**
305     * General category "Pd" in the Unicode specification.
306     * @since   1.1
307     */
308    public static final byte DASH_PUNCTUATION = 20;
309
310    /**
311     * General category "Ps" in the Unicode specification.
312     * @since   1.1
313     */
314    public static final byte START_PUNCTUATION = 21;
315
316    /**
317     * General category "Pe" in the Unicode specification.
318     * @since   1.1
319     */
320    public static final byte END_PUNCTUATION = 22;
321
322    /**
323     * General category "Pc" in the Unicode specification.
324     * @since   1.1
325     */
326    public static final byte CONNECTOR_PUNCTUATION = 23;
327
328    /**
329     * General category "Po" in the Unicode specification.
330     * @since   1.1
331     */
332    public static final byte OTHER_PUNCTUATION = 24;
333
334    /**
335     * General category "Sm" in the Unicode specification.
336     * @since   1.1
337     */
338    public static final byte MATH_SYMBOL = 25;
339
340    /**
341     * General category "Sc" in the Unicode specification.
342     * @since   1.1
343     */
344    public static final byte CURRENCY_SYMBOL = 26;
345
346    /**
347     * General category "Sk" in the Unicode specification.
348     * @since   1.1
349     */
350    public static final byte MODIFIER_SYMBOL = 27;
351
352    /**
353     * General category "So" in the Unicode specification.
354     * @since   1.1
355     */
356    public static final byte OTHER_SYMBOL = 28;
357
358    /**
359     * General category "Pi" in the Unicode specification.
360     * @since   1.4
361     */
362    public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
363
364    /**
365     * General category "Pf" in the Unicode specification.
366     * @since   1.4
367     */
368    public static final byte FINAL_QUOTE_PUNCTUATION = 30;
369
370    /**
371     * Error flag. Use int (code point) to avoid confusion with U+FFFF.
372     */
373    static final int ERROR = 0xFFFFFFFF;
374
375
376    /**
377     * Undefined bidirectional character type. Undefined {@code char}
378     * values have undefined directionality in the Unicode specification.
379     * @since 1.4
380     */
381    public static final byte DIRECTIONALITY_UNDEFINED = -1;
382
383    /**
384     * Strong bidirectional character type "L" in the Unicode specification.
385     * @since 1.4
386     */
387    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
388
389    /**
390     * Strong bidirectional character type "R" in the Unicode specification.
391     * @since 1.4
392     */
393    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
394
395    /**
396    * Strong bidirectional character type "AL" in the Unicode specification.
397     * @since 1.4
398     */
399    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
400
401    /**
402     * Weak bidirectional character type "EN" in the Unicode specification.
403     * @since 1.4
404     */
405    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
406
407    /**
408     * Weak bidirectional character type "ES" in the Unicode specification.
409     * @since 1.4
410     */
411    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
412
413    /**
414     * Weak bidirectional character type "ET" in the Unicode specification.
415     * @since 1.4
416     */
417    public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
418
419    /**
420     * Weak bidirectional character type "AN" in the Unicode specification.
421     * @since 1.4
422     */
423    public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
424
425    /**
426     * Weak bidirectional character type "CS" in the Unicode specification.
427     * @since 1.4
428     */
429    public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
430
431    /**
432     * Weak bidirectional character type "NSM" in the Unicode specification.
433     * @since 1.4
434     */
435    public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
436
437    /**
438     * Weak bidirectional character type "BN" in the Unicode specification.
439     * @since 1.4
440     */
441    public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
442
443    /**
444     * Neutral bidirectional character type "B" in the Unicode specification.
445     * @since 1.4
446     */
447    public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
448
449    /**
450     * Neutral bidirectional character type "S" in the Unicode specification.
451     * @since 1.4
452     */
453    public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
454
455    /**
456     * Neutral bidirectional character type "WS" in the Unicode specification.
457     * @since 1.4
458     */
459    public static final byte DIRECTIONALITY_WHITESPACE = 12;
460
461    /**
462     * Neutral bidirectional character type "ON" in the Unicode specification.
463     * @since 1.4
464     */
465    public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
466
467    /**
468     * Strong bidirectional character type "LRE" in the Unicode specification.
469     * @since 1.4
470     */
471    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
472
473    /**
474     * Strong bidirectional character type "LRO" in the Unicode specification.
475     * @since 1.4
476     */
477    public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
478
479    /**
480     * Strong bidirectional character type "RLE" in the Unicode specification.
481     * @since 1.4
482     */
483    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
484
485    /**
486     * Strong bidirectional character type "RLO" in the Unicode specification.
487     * @since 1.4
488     */
489    public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
490
491    /**
492     * Weak bidirectional character type "PDF" in the Unicode specification.
493     * @since 1.4
494     */
495    public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
496
497    /**
498     * The minimum value of a
499     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
500     * Unicode high-surrogate code unit</a>
501     * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
502     * A high-surrogate is also known as a <i>leading-surrogate</i>.
503     *
504     * @since 1.5
505     */
506    public static final char MIN_HIGH_SURROGATE = '\uD800';
507
508    /**
509     * The maximum value of a
510     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
511     * Unicode high-surrogate code unit</a>
512     * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
513     * A high-surrogate is also known as a <i>leading-surrogate</i>.
514     *
515     * @since 1.5
516     */
517    public static final char MAX_HIGH_SURROGATE = '\uDBFF';
518
519    /**
520     * The minimum value of a
521     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
522     * Unicode low-surrogate code unit</a>
523     * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
524     * A low-surrogate is also known as a <i>trailing-surrogate</i>.
525     *
526     * @since 1.5
527     */
528    public static final char MIN_LOW_SURROGATE  = '\uDC00';
529
530    /**
531     * The maximum value of a
532     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
533     * Unicode low-surrogate code unit</a>
534     * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
535     * A low-surrogate is also known as a <i>trailing-surrogate</i>.
536     *
537     * @since 1.5
538     */
539    public static final char MAX_LOW_SURROGATE  = '\uDFFF';
540
541    /**
542     * The minimum value of a Unicode surrogate code unit in the
543     * UTF-16 encoding, constant {@code '\u005CuD800'}.
544     *
545     * @since 1.5
546     */
547    public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
548
549    /**
550     * The maximum value of a Unicode surrogate code unit in the
551     * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
552     *
553     * @since 1.5
554     */
555    public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
556
557    /**
558     * The minimum value of a
559     * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
560     * Unicode supplementary code point</a>, constant {@code U+10000}.
561     *
562     * @since 1.5
563     */
564    public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
565
566    /**
567     * The minimum value of a
568     * <a href="http://www.unicode.org/glossary/#code_point">
569     * Unicode code point</a>, constant {@code U+0000}.
570     *
571     * @since 1.5
572     */
573    public static final int MIN_CODE_POINT = 0x000000;
574
575    /**
576     * The maximum value of a
577     * <a href="http://www.unicode.org/glossary/#code_point">
578     * Unicode code point</a>, constant {@code U+10FFFF}.
579     *
580     * @since 1.5
581     */
582    public static final int MAX_CODE_POINT = 0X10FFFF;
583
584    private static final byte[] DIRECTIONALITY = new byte[] {
585            DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
586            DIRECTIONALITY_EUROPEAN_NUMBER,
587            DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
588            DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
589            DIRECTIONALITY_ARABIC_NUMBER,
590            DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
591            DIRECTIONALITY_PARAGRAPH_SEPARATOR,
592            DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
593            DIRECTIONALITY_OTHER_NEUTRALS,
594            DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
595            DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
596            DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
597            DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
598            DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
599            DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
600            DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
601
602    /**
603     * Instances of this class represent particular subsets of the Unicode
604     * character set.  The only family of subsets defined in the
605     * {@code Character} class is {@link Character.UnicodeBlock}.
606     * Other portions of the Java API may define other subsets for their
607     * own purposes.
608     *
609     * @since 1.2
610     */
611    public static class Subset  {
612
613        private String name;
614
615        /**
616         * Constructs a new {@code Subset} instance.
617         *
618         * @param  name  The name of this subset
619         * @exception NullPointerException if name is {@code null}
620         */
621        protected Subset(String name) {
622            if (name == null) {
623                throw new NullPointerException("name");
624            }
625            this.name = name;
626        }
627
628        /**
629         * Compares two {@code Subset} objects for equality.
630         * This method returns {@code true} if and only if
631         * {@code this} and the argument refer to the same
632         * object; since this method is {@code final}, this
633         * guarantee holds for all subclasses.
634         */
635        public final boolean equals(Object obj) {
636            return (this == obj);
637        }
638
639        /**
640         * Returns the standard hash code as defined by the
641         * {@link Object#hashCode} method.  This method
642         * is {@code final} in order to ensure that the
643         * {@code equals} and {@code hashCode} methods will
644         * be consistent in all subclasses.
645         */
646        public final int hashCode() {
647            return super.hashCode();
648        }
649
650        /**
651         * Returns the name of this subset.
652         */
653        public final String toString() {
654            return name;
655        }
656    }
657
658    // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
659    // for the latest specification of Unicode Blocks.
660
661    /**
662     * A family of character subsets representing the character blocks in the
663     * Unicode specification. Character blocks generally define characters
664     * used for a specific script or purpose. A character is contained by
665     * at most one Unicode block.
666     *
667     * @since 1.2
668     */
669    public static final class UnicodeBlock extends Subset {
670
671        private static Map<String, UnicodeBlock> map = new HashMap<>(256);
672
673        /**
674         * Creates a UnicodeBlock with the given identifier name.
675         * This name must be the same as the block identifier.
676         */
677        private UnicodeBlock(String idName) {
678            this(idName, true);
679        }
680
681        private UnicodeBlock(String idName, boolean isMap) {
682            super(idName);
683            if (isMap) {
684                map.put(idName, this);
685            }
686        }
687
688        /**
689         * Creates a UnicodeBlock with the given identifier name and
690         * alias name.
691         */
692        private UnicodeBlock(String idName, String alias) {
693            this(idName, true);
694            map.put(alias, this);
695        }
696
697        /**
698         * Creates a UnicodeBlock with the given identifier name and
699         * alias names.
700         */
701        private UnicodeBlock(String idName, String... aliases) {
702            this(idName, true);
703            for (String alias : aliases)
704                map.put(alias, this);
705        }
706
707        /**
708         * Constant for the "Basic Latin" Unicode character block.
709         * @since 1.2
710         */
711        public static final UnicodeBlock  BASIC_LATIN =
712            new UnicodeBlock("BASIC_LATIN",
713                             "BASIC LATIN",
714                             "BASICLATIN");
715
716        /**
717         * Constant for the "Latin-1 Supplement" Unicode character block.
718         * @since 1.2
719         */
720        public static final UnicodeBlock LATIN_1_SUPPLEMENT =
721            new UnicodeBlock("LATIN_1_SUPPLEMENT",
722                             "LATIN-1 SUPPLEMENT",
723                             "LATIN-1SUPPLEMENT");
724
725        /**
726         * Constant for the "Latin Extended-A" Unicode character block.
727         * @since 1.2
728         */
729        public static final UnicodeBlock LATIN_EXTENDED_A =
730            new UnicodeBlock("LATIN_EXTENDED_A",
731                             "LATIN EXTENDED-A",
732                             "LATINEXTENDED-A");
733
734        /**
735         * Constant for the "Latin Extended-B" Unicode character block.
736         * @since 1.2
737         */
738        public static final UnicodeBlock LATIN_EXTENDED_B =
739            new UnicodeBlock("LATIN_EXTENDED_B",
740                             "LATIN EXTENDED-B",
741                             "LATINEXTENDED-B");
742
743        /**
744         * Constant for the "IPA Extensions" Unicode character block.
745         * @since 1.2
746         */
747        public static final UnicodeBlock IPA_EXTENSIONS =
748            new UnicodeBlock("IPA_EXTENSIONS",
749                             "IPA EXTENSIONS",
750                             "IPAEXTENSIONS");
751
752        /**
753         * Constant for the "Spacing Modifier Letters" Unicode character block.
754         * @since 1.2
755         */
756        public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
757            new UnicodeBlock("SPACING_MODIFIER_LETTERS",
758                             "SPACING MODIFIER LETTERS",
759                             "SPACINGMODIFIERLETTERS");
760
761        /**
762         * Constant for the "Combining Diacritical Marks" Unicode character block.
763         * @since 1.2
764         */
765        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
766            new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
767                             "COMBINING DIACRITICAL MARKS",
768                             "COMBININGDIACRITICALMARKS");
769
770        /**
771         * Constant for the "Greek and Coptic" Unicode character block.
772         * <p>
773         * This block was previously known as the "Greek" block.
774         *
775         * @since 1.2
776         */
777        public static final UnicodeBlock GREEK =
778            new UnicodeBlock("GREEK",
779                             "GREEK AND COPTIC",
780                             "GREEKANDCOPTIC");
781
782        /**
783         * Constant for the "Cyrillic" Unicode character block.
784         * @since 1.2
785         */
786        public static final UnicodeBlock CYRILLIC =
787            new UnicodeBlock("CYRILLIC");
788
789        /**
790         * Constant for the "Armenian" Unicode character block.
791         * @since 1.2
792         */
793        public static final UnicodeBlock ARMENIAN =
794            new UnicodeBlock("ARMENIAN");
795
796        /**
797         * Constant for the "Hebrew" Unicode character block.
798         * @since 1.2
799         */
800        public static final UnicodeBlock HEBREW =
801            new UnicodeBlock("HEBREW");
802
803        /**
804         * Constant for the "Arabic" Unicode character block.
805         * @since 1.2
806         */
807        public static final UnicodeBlock ARABIC =
808            new UnicodeBlock("ARABIC");
809
810        /**
811         * Constant for the "Devanagari" Unicode character block.
812         * @since 1.2
813         */
814        public static final UnicodeBlock DEVANAGARI =
815            new UnicodeBlock("DEVANAGARI");
816
817        /**
818         * Constant for the "Bengali" Unicode character block.
819         * @since 1.2
820         */
821        public static final UnicodeBlock BENGALI =
822            new UnicodeBlock("BENGALI");
823
824        /**
825         * Constant for the "Gurmukhi" Unicode character block.
826         * @since 1.2
827         */
828        public static final UnicodeBlock GURMUKHI =
829            new UnicodeBlock("GURMUKHI");
830
831        /**
832         * Constant for the "Gujarati" Unicode character block.
833         * @since 1.2
834         */
835        public static final UnicodeBlock GUJARATI =
836            new UnicodeBlock("GUJARATI");
837
838        /**
839         * Constant for the "Oriya" Unicode character block.
840         * @since 1.2
841         */
842        public static final UnicodeBlock ORIYA =
843            new UnicodeBlock("ORIYA");
844
845        /**
846         * Constant for the "Tamil" Unicode character block.
847         * @since 1.2
848         */
849        public static final UnicodeBlock TAMIL =
850            new UnicodeBlock("TAMIL");
851
852        /**
853         * Constant for the "Telugu" Unicode character block.
854         * @since 1.2
855         */
856        public static final UnicodeBlock TELUGU =
857            new UnicodeBlock("TELUGU");
858
859        /**
860         * Constant for the "Kannada" Unicode character block.
861         * @since 1.2
862         */
863        public static final UnicodeBlock KANNADA =
864            new UnicodeBlock("KANNADA");
865
866        /**
867         * Constant for the "Malayalam" Unicode character block.
868         * @since 1.2
869         */
870        public static final UnicodeBlock MALAYALAM =
871            new UnicodeBlock("MALAYALAM");
872
873        /**
874         * Constant for the "Thai" Unicode character block.
875         * @since 1.2
876         */
877        public static final UnicodeBlock THAI =
878            new UnicodeBlock("THAI");
879
880        /**
881         * Constant for the "Lao" Unicode character block.
882         * @since 1.2
883         */
884        public static final UnicodeBlock LAO =
885            new UnicodeBlock("LAO");
886
887        /**
888         * Constant for the "Tibetan" Unicode character block.
889         * @since 1.2
890         */
891        public static final UnicodeBlock TIBETAN =
892            new UnicodeBlock("TIBETAN");
893
894        /**
895         * Constant for the "Georgian" Unicode character block.
896         * @since 1.2
897         */
898        public static final UnicodeBlock GEORGIAN =
899            new UnicodeBlock("GEORGIAN");
900
901        /**
902         * Constant for the "Hangul Jamo" Unicode character block.
903         * @since 1.2
904         */
905        public static final UnicodeBlock HANGUL_JAMO =
906            new UnicodeBlock("HANGUL_JAMO",
907                             "HANGUL JAMO",
908                             "HANGULJAMO");
909
910        /**
911         * Constant for the "Latin Extended Additional" Unicode character block.
912         * @since 1.2
913         */
914        public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
915            new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
916                             "LATIN EXTENDED ADDITIONAL",
917                             "LATINEXTENDEDADDITIONAL");
918
919        /**
920         * Constant for the "Greek Extended" Unicode character block.
921         * @since 1.2
922         */
923        public static final UnicodeBlock GREEK_EXTENDED =
924            new UnicodeBlock("GREEK_EXTENDED",
925                             "GREEK EXTENDED",
926                             "GREEKEXTENDED");
927
928        /**
929         * Constant for the "General Punctuation" Unicode character block.
930         * @since 1.2
931         */
932        public static final UnicodeBlock GENERAL_PUNCTUATION =
933            new UnicodeBlock("GENERAL_PUNCTUATION",
934                             "GENERAL PUNCTUATION",
935                             "GENERALPUNCTUATION");
936
937        /**
938         * Constant for the "Superscripts and Subscripts" Unicode character
939         * block.
940         * @since 1.2
941         */
942        public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
943            new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
944                             "SUPERSCRIPTS AND SUBSCRIPTS",
945                             "SUPERSCRIPTSANDSUBSCRIPTS");
946
947        /**
948         * Constant for the "Currency Symbols" Unicode character block.
949         * @since 1.2
950         */
951        public static final UnicodeBlock CURRENCY_SYMBOLS =
952            new UnicodeBlock("CURRENCY_SYMBOLS",
953                             "CURRENCY SYMBOLS",
954                             "CURRENCYSYMBOLS");
955
956        /**
957         * Constant for the "Combining Diacritical Marks for Symbols" Unicode
958         * character block.
959         * <p>
960         * This block was previously known as "Combining Marks for Symbols".
961         * @since 1.2
962         */
963        public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
964            new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
965                             "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
966                             "COMBININGDIACRITICALMARKSFORSYMBOLS",
967                             "COMBINING MARKS FOR SYMBOLS",
968                             "COMBININGMARKSFORSYMBOLS");
969
970        /**
971         * Constant for the "Letterlike Symbols" Unicode character block.
972         * @since 1.2
973         */
974        public static final UnicodeBlock LETTERLIKE_SYMBOLS =
975            new UnicodeBlock("LETTERLIKE_SYMBOLS",
976                             "LETTERLIKE SYMBOLS",
977                             "LETTERLIKESYMBOLS");
978
979        /**
980         * Constant for the "Number Forms" Unicode character block.
981         * @since 1.2
982         */
983        public static final UnicodeBlock NUMBER_FORMS =
984            new UnicodeBlock("NUMBER_FORMS",
985                             "NUMBER FORMS",
986                             "NUMBERFORMS");
987
988        /**
989         * Constant for the "Arrows" Unicode character block.
990         * @since 1.2
991         */
992        public static final UnicodeBlock ARROWS =
993            new UnicodeBlock("ARROWS");
994
995        /**
996         * Constant for the "Mathematical Operators" Unicode character block.
997         * @since 1.2
998         */
999        public static final UnicodeBlock MATHEMATICAL_OPERATORS =
1000            new UnicodeBlock("MATHEMATICAL_OPERATORS",
1001                             "MATHEMATICAL OPERATORS",
1002                             "MATHEMATICALOPERATORS");
1003
1004        /**
1005         * Constant for the "Miscellaneous Technical" Unicode character block.
1006         * @since 1.2
1007         */
1008        public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1009            new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1010                             "MISCELLANEOUS TECHNICAL",
1011                             "MISCELLANEOUSTECHNICAL");
1012
1013        /**
1014         * Constant for the "Control Pictures" Unicode character block.
1015         * @since 1.2
1016         */
1017        public static final UnicodeBlock CONTROL_PICTURES =
1018            new UnicodeBlock("CONTROL_PICTURES",
1019                             "CONTROL PICTURES",
1020                             "CONTROLPICTURES");
1021
1022        /**
1023         * Constant for the "Optical Character Recognition" Unicode character block.
1024         * @since 1.2
1025         */
1026        public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1027            new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1028                             "OPTICAL CHARACTER RECOGNITION",
1029                             "OPTICALCHARACTERRECOGNITION");
1030
1031        /**
1032         * Constant for the "Enclosed Alphanumerics" Unicode character block.
1033         * @since 1.2
1034         */
1035        public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1036            new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1037                             "ENCLOSED ALPHANUMERICS",
1038                             "ENCLOSEDALPHANUMERICS");
1039
1040        /**
1041         * Constant for the "Box Drawing" Unicode character block.
1042         * @since 1.2
1043         */
1044        public static final UnicodeBlock BOX_DRAWING =
1045            new UnicodeBlock("BOX_DRAWING",
1046                             "BOX DRAWING",
1047                             "BOXDRAWING");
1048
1049        /**
1050         * Constant for the "Block Elements" Unicode character block.
1051         * @since 1.2
1052         */
1053        public static final UnicodeBlock BLOCK_ELEMENTS =
1054            new UnicodeBlock("BLOCK_ELEMENTS",
1055                             "BLOCK ELEMENTS",
1056                             "BLOCKELEMENTS");
1057
1058        /**
1059         * Constant for the "Geometric Shapes" Unicode character block.
1060         * @since 1.2
1061         */
1062        public static final UnicodeBlock GEOMETRIC_SHAPES =
1063            new UnicodeBlock("GEOMETRIC_SHAPES",
1064                             "GEOMETRIC SHAPES",
1065                             "GEOMETRICSHAPES");
1066
1067        /**
1068         * Constant for the "Miscellaneous Symbols" Unicode character block.
1069         * @since 1.2
1070         */
1071        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1072            new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1073                             "MISCELLANEOUS SYMBOLS",
1074                             "MISCELLANEOUSSYMBOLS");
1075
1076        /**
1077         * Constant for the "Dingbats" Unicode character block.
1078         * @since 1.2
1079         */
1080        public static final UnicodeBlock DINGBATS =
1081            new UnicodeBlock("DINGBATS");
1082
1083        /**
1084         * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1085         * @since 1.2
1086         */
1087        public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1088            new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1089                             "CJK SYMBOLS AND PUNCTUATION",
1090                             "CJKSYMBOLSANDPUNCTUATION");
1091
1092        /**
1093         * Constant for the "Hiragana" Unicode character block.
1094         * @since 1.2
1095         */
1096        public static final UnicodeBlock HIRAGANA =
1097            new UnicodeBlock("HIRAGANA");
1098
1099        /**
1100         * Constant for the "Katakana" Unicode character block.
1101         * @since 1.2
1102         */
1103        public static final UnicodeBlock KATAKANA =
1104            new UnicodeBlock("KATAKANA");
1105
1106        /**
1107         * Constant for the "Bopomofo" Unicode character block.
1108         * @since 1.2
1109         */
1110        public static final UnicodeBlock BOPOMOFO =
1111            new UnicodeBlock("BOPOMOFO");
1112
1113        /**
1114         * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1115         * @since 1.2
1116         */
1117        public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1118            new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1119                             "HANGUL COMPATIBILITY JAMO",
1120                             "HANGULCOMPATIBILITYJAMO");
1121
1122        /**
1123         * Constant for the "Kanbun" Unicode character block.
1124         * @since 1.2
1125         */
1126        public static final UnicodeBlock KANBUN =
1127            new UnicodeBlock("KANBUN");
1128
1129        /**
1130         * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1131         * @since 1.2
1132         */
1133        public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1134            new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1135                             "ENCLOSED CJK LETTERS AND MONTHS",
1136                             "ENCLOSEDCJKLETTERSANDMONTHS");
1137
1138        /**
1139         * Constant for the "CJK Compatibility" Unicode character block.
1140         * @since 1.2
1141         */
1142        public static final UnicodeBlock CJK_COMPATIBILITY =
1143            new UnicodeBlock("CJK_COMPATIBILITY",
1144                             "CJK COMPATIBILITY",
1145                             "CJKCOMPATIBILITY");
1146
1147        /**
1148         * Constant for the "CJK Unified Ideographs" Unicode character block.
1149         * @since 1.2
1150         */
1151        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1152            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1153                             "CJK UNIFIED IDEOGRAPHS",
1154                             "CJKUNIFIEDIDEOGRAPHS");
1155
1156        /**
1157         * Constant for the "Hangul Syllables" Unicode character block.
1158         * @since 1.2
1159         */
1160        public static final UnicodeBlock HANGUL_SYLLABLES =
1161            new UnicodeBlock("HANGUL_SYLLABLES",
1162                             "HANGUL SYLLABLES",
1163                             "HANGULSYLLABLES");
1164
1165        /**
1166         * Constant for the "Private Use Area" Unicode character block.
1167         * @since 1.2
1168         */
1169        public static final UnicodeBlock PRIVATE_USE_AREA =
1170            new UnicodeBlock("PRIVATE_USE_AREA",
1171                             "PRIVATE USE AREA",
1172                             "PRIVATEUSEAREA");
1173
1174        /**
1175         * Constant for the "CJK Compatibility Ideographs" Unicode character
1176         * block.
1177         * @since 1.2
1178         */
1179        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1180            new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1181                             "CJK COMPATIBILITY IDEOGRAPHS",
1182                             "CJKCOMPATIBILITYIDEOGRAPHS");
1183
1184        /**
1185         * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1186         * @since 1.2
1187         */
1188        public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1189            new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1190                             "ALPHABETIC PRESENTATION FORMS",
1191                             "ALPHABETICPRESENTATIONFORMS");
1192
1193        /**
1194         * Constant for the "Arabic Presentation Forms-A" Unicode character
1195         * block.
1196         * @since 1.2
1197         */
1198        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1199            new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1200                             "ARABIC PRESENTATION FORMS-A",
1201                             "ARABICPRESENTATIONFORMS-A");
1202
1203        /**
1204         * Constant for the "Combining Half Marks" Unicode character block.
1205         * @since 1.2
1206         */
1207        public static final UnicodeBlock COMBINING_HALF_MARKS =
1208            new UnicodeBlock("COMBINING_HALF_MARKS",
1209                             "COMBINING HALF MARKS",
1210                             "COMBININGHALFMARKS");
1211
1212        /**
1213         * Constant for the "CJK Compatibility Forms" Unicode character block.
1214         * @since 1.2
1215         */
1216        public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1217            new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1218                             "CJK COMPATIBILITY FORMS",
1219                             "CJKCOMPATIBILITYFORMS");
1220
1221        /**
1222         * Constant for the "Small Form Variants" Unicode character block.
1223         * @since 1.2
1224         */
1225        public static final UnicodeBlock SMALL_FORM_VARIANTS =
1226            new UnicodeBlock("SMALL_FORM_VARIANTS",
1227                             "SMALL FORM VARIANTS",
1228                             "SMALLFORMVARIANTS");
1229
1230        /**
1231         * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1232         * @since 1.2
1233         */
1234        public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1235            new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1236                             "ARABIC PRESENTATION FORMS-B",
1237                             "ARABICPRESENTATIONFORMS-B");
1238
1239        /**
1240         * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1241         * block.
1242         * @since 1.2
1243         */
1244        public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1245            new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1246                             "HALFWIDTH AND FULLWIDTH FORMS",
1247                             "HALFWIDTHANDFULLWIDTHFORMS");
1248
1249        /**
1250         * Constant for the "Specials" Unicode character block.
1251         * @since 1.2
1252         */
1253        public static final UnicodeBlock SPECIALS =
1254            new UnicodeBlock("SPECIALS");
1255
1256        /**
1257         * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1258         *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1259         *             {@link #LOW_SURROGATES}. These new constants match
1260         *             the block definitions of the Unicode Standard.
1261         *             The {@link #of(char)} and {@link #of(int)} methods
1262         *             return the new constants, not SURROGATES_AREA.
1263         */
1264        @Deprecated
1265        public static final UnicodeBlock SURROGATES_AREA =
1266            new UnicodeBlock("SURROGATES_AREA", false);
1267
1268        /**
1269         * Constant for the "Syriac" Unicode character block.
1270         * @since 1.4
1271         */
1272        public static final UnicodeBlock SYRIAC =
1273            new UnicodeBlock("SYRIAC");
1274
1275        /**
1276         * Constant for the "Thaana" Unicode character block.
1277         * @since 1.4
1278         */
1279        public static final UnicodeBlock THAANA =
1280            new UnicodeBlock("THAANA");
1281
1282        /**
1283         * Constant for the "Sinhala" Unicode character block.
1284         * @since 1.4
1285         */
1286        public static final UnicodeBlock SINHALA =
1287            new UnicodeBlock("SINHALA");
1288
1289        /**
1290         * Constant for the "Myanmar" Unicode character block.
1291         * @since 1.4
1292         */
1293        public static final UnicodeBlock MYANMAR =
1294            new UnicodeBlock("MYANMAR");
1295
1296        /**
1297         * Constant for the "Ethiopic" Unicode character block.
1298         * @since 1.4
1299         */
1300        public static final UnicodeBlock ETHIOPIC =
1301            new UnicodeBlock("ETHIOPIC");
1302
1303        /**
1304         * Constant for the "Cherokee" Unicode character block.
1305         * @since 1.4
1306         */
1307        public static final UnicodeBlock CHEROKEE =
1308            new UnicodeBlock("CHEROKEE");
1309
1310        /**
1311         * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1312         * @since 1.4
1313         */
1314        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1315            new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1316                             "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1317                             "UNIFIEDCANADIANABORIGINALSYLLABICS");
1318
1319        /**
1320         * Constant for the "Ogham" Unicode character block.
1321         * @since 1.4
1322         */
1323        public static final UnicodeBlock OGHAM =
1324            new UnicodeBlock("OGHAM");
1325
1326        /**
1327         * Constant for the "Runic" Unicode character block.
1328         * @since 1.4
1329         */
1330        public static final UnicodeBlock RUNIC =
1331            new UnicodeBlock("RUNIC");
1332
1333        /**
1334         * Constant for the "Khmer" Unicode character block.
1335         * @since 1.4
1336         */
1337        public static final UnicodeBlock KHMER =
1338            new UnicodeBlock("KHMER");
1339
1340        /**
1341         * Constant for the "Mongolian" Unicode character block.
1342         * @since 1.4
1343         */
1344        public static final UnicodeBlock MONGOLIAN =
1345            new UnicodeBlock("MONGOLIAN");
1346
1347        /**
1348         * Constant for the "Braille Patterns" Unicode character block.
1349         * @since 1.4
1350         */
1351        public static final UnicodeBlock BRAILLE_PATTERNS =
1352            new UnicodeBlock("BRAILLE_PATTERNS",
1353                             "BRAILLE PATTERNS",
1354                             "BRAILLEPATTERNS");
1355
1356        /**
1357         * Constant for the "CJK Radicals Supplement" Unicode character block.
1358         * @since 1.4
1359         */
1360        public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1361            new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1362                             "CJK RADICALS SUPPLEMENT",
1363                             "CJKRADICALSSUPPLEMENT");
1364
1365        /**
1366         * Constant for the "Kangxi Radicals" Unicode character block.
1367         * @since 1.4
1368         */
1369        public static final UnicodeBlock KANGXI_RADICALS =
1370            new UnicodeBlock("KANGXI_RADICALS",
1371                             "KANGXI RADICALS",
1372                             "KANGXIRADICALS");
1373
1374        /**
1375         * Constant for the "Ideographic Description Characters" Unicode character block.
1376         * @since 1.4
1377         */
1378        public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1379            new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1380                             "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1381                             "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1382
1383        /**
1384         * Constant for the "Bopomofo Extended" Unicode character block.
1385         * @since 1.4
1386         */
1387        public static final UnicodeBlock BOPOMOFO_EXTENDED =
1388            new UnicodeBlock("BOPOMOFO_EXTENDED",
1389                             "BOPOMOFO EXTENDED",
1390                             "BOPOMOFOEXTENDED");
1391
1392        /**
1393         * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1394         * @since 1.4
1395         */
1396        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1397            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1398                             "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1399                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1400
1401        /**
1402         * Constant for the "Yi Syllables" Unicode character block.
1403         * @since 1.4
1404         */
1405        public static final UnicodeBlock YI_SYLLABLES =
1406            new UnicodeBlock("YI_SYLLABLES",
1407                             "YI SYLLABLES",
1408                             "YISYLLABLES");
1409
1410        /**
1411         * Constant for the "Yi Radicals" Unicode character block.
1412         * @since 1.4
1413         */
1414        public static final UnicodeBlock YI_RADICALS =
1415            new UnicodeBlock("YI_RADICALS",
1416                             "YI RADICALS",
1417                             "YIRADICALS");
1418
1419        /**
1420         * Constant for the "Cyrillic Supplementary" Unicode character block.
1421         * @since 1.5
1422         */
1423        public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1424            new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1425                             "CYRILLIC SUPPLEMENTARY",
1426                             "CYRILLICSUPPLEMENTARY",
1427                             "CYRILLIC SUPPLEMENT",
1428                             "CYRILLICSUPPLEMENT");
1429
1430        /**
1431         * Constant for the "Tagalog" Unicode character block.
1432         * @since 1.5
1433         */
1434        public static final UnicodeBlock TAGALOG =
1435            new UnicodeBlock("TAGALOG");
1436
1437        /**
1438         * Constant for the "Hanunoo" Unicode character block.
1439         * @since 1.5
1440         */
1441        public static final UnicodeBlock HANUNOO =
1442            new UnicodeBlock("HANUNOO");
1443
1444        /**
1445         * Constant for the "Buhid" Unicode character block.
1446         * @since 1.5
1447         */
1448        public static final UnicodeBlock BUHID =
1449            new UnicodeBlock("BUHID");
1450
1451        /**
1452         * Constant for the "Tagbanwa" Unicode character block.
1453         * @since 1.5
1454         */
1455        public static final UnicodeBlock TAGBANWA =
1456            new UnicodeBlock("TAGBANWA");
1457
1458        /**
1459         * Constant for the "Limbu" Unicode character block.
1460         * @since 1.5
1461         */
1462        public static final UnicodeBlock LIMBU =
1463            new UnicodeBlock("LIMBU");
1464
1465        /**
1466         * Constant for the "Tai Le" Unicode character block.
1467         * @since 1.5
1468         */
1469        public static final UnicodeBlock TAI_LE =
1470            new UnicodeBlock("TAI_LE",
1471                             "TAI LE",
1472                             "TAILE");
1473
1474        /**
1475         * Constant for the "Khmer Symbols" Unicode character block.
1476         * @since 1.5
1477         */
1478        public static final UnicodeBlock KHMER_SYMBOLS =
1479            new UnicodeBlock("KHMER_SYMBOLS",
1480                             "KHMER SYMBOLS",
1481                             "KHMERSYMBOLS");
1482
1483        /**
1484         * Constant for the "Phonetic Extensions" Unicode character block.
1485         * @since 1.5
1486         */
1487        public static final UnicodeBlock PHONETIC_EXTENSIONS =
1488            new UnicodeBlock("PHONETIC_EXTENSIONS",
1489                             "PHONETIC EXTENSIONS",
1490                             "PHONETICEXTENSIONS");
1491
1492        /**
1493         * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1494         * @since 1.5
1495         */
1496        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1497            new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1498                             "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1499                             "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1500
1501        /**
1502         * Constant for the "Supplemental Arrows-A" Unicode character block.
1503         * @since 1.5
1504         */
1505        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1506            new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1507                             "SUPPLEMENTAL ARROWS-A",
1508                             "SUPPLEMENTALARROWS-A");
1509
1510        /**
1511         * Constant for the "Supplemental Arrows-B" Unicode character block.
1512         * @since 1.5
1513         */
1514        public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1515            new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1516                             "SUPPLEMENTAL ARROWS-B",
1517                             "SUPPLEMENTALARROWS-B");
1518
1519        /**
1520         * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1521         * character block.
1522         * @since 1.5
1523         */
1524        public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1525            new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1526                             "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1527                             "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1528
1529        /**
1530         * Constant for the "Supplemental Mathematical Operators" Unicode
1531         * character block.
1532         * @since 1.5
1533         */
1534        public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1535            new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1536                             "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1537                             "SUPPLEMENTALMATHEMATICALOPERATORS");
1538
1539        /**
1540         * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1541         * block.
1542         * @since 1.5
1543         */
1544        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1545            new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1546                             "MISCELLANEOUS SYMBOLS AND ARROWS",
1547                             "MISCELLANEOUSSYMBOLSANDARROWS");
1548
1549        /**
1550         * Constant for the "Katakana Phonetic Extensions" Unicode character
1551         * block.
1552         * @since 1.5
1553         */
1554        public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1555            new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1556                             "KATAKANA PHONETIC EXTENSIONS",
1557                             "KATAKANAPHONETICEXTENSIONS");
1558
1559        /**
1560         * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1561         * @since 1.5
1562         */
1563        public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1564            new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1565                             "YIJING HEXAGRAM SYMBOLS",
1566                             "YIJINGHEXAGRAMSYMBOLS");
1567
1568        /**
1569         * Constant for the "Variation Selectors" Unicode character block.
1570         * @since 1.5
1571         */
1572        public static final UnicodeBlock VARIATION_SELECTORS =
1573            new UnicodeBlock("VARIATION_SELECTORS",
1574                             "VARIATION SELECTORS",
1575                             "VARIATIONSELECTORS");
1576
1577        /**
1578         * Constant for the "Linear B Syllabary" Unicode character block.
1579         * @since 1.5
1580         */
1581        public static final UnicodeBlock LINEAR_B_SYLLABARY =
1582            new UnicodeBlock("LINEAR_B_SYLLABARY",
1583                             "LINEAR B SYLLABARY",
1584                             "LINEARBSYLLABARY");
1585
1586        /**
1587         * Constant for the "Linear B Ideograms" Unicode character block.
1588         * @since 1.5
1589         */
1590        public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1591            new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1592                             "LINEAR B IDEOGRAMS",
1593                             "LINEARBIDEOGRAMS");
1594
1595        /**
1596         * Constant for the "Aegean Numbers" Unicode character block.
1597         * @since 1.5
1598         */
1599        public static final UnicodeBlock AEGEAN_NUMBERS =
1600            new UnicodeBlock("AEGEAN_NUMBERS",
1601                             "AEGEAN NUMBERS",
1602                             "AEGEANNUMBERS");
1603
1604        /**
1605         * Constant for the "Old Italic" Unicode character block.
1606         * @since 1.5
1607         */
1608        public static final UnicodeBlock OLD_ITALIC =
1609            new UnicodeBlock("OLD_ITALIC",
1610                             "OLD ITALIC",
1611                             "OLDITALIC");
1612
1613        /**
1614         * Constant for the "Gothic" Unicode character block.
1615         * @since 1.5
1616         */
1617        public static final UnicodeBlock GOTHIC =
1618            new UnicodeBlock("GOTHIC");
1619
1620        /**
1621         * Constant for the "Ugaritic" Unicode character block.
1622         * @since 1.5
1623         */
1624        public static final UnicodeBlock UGARITIC =
1625            new UnicodeBlock("UGARITIC");
1626
1627        /**
1628         * Constant for the "Deseret" Unicode character block.
1629         * @since 1.5
1630         */
1631        public static final UnicodeBlock DESERET =
1632            new UnicodeBlock("DESERET");
1633
1634        /**
1635         * Constant for the "Shavian" Unicode character block.
1636         * @since 1.5
1637         */
1638        public static final UnicodeBlock SHAVIAN =
1639            new UnicodeBlock("SHAVIAN");
1640
1641        /**
1642         * Constant for the "Osmanya" Unicode character block.
1643         * @since 1.5
1644         */
1645        public static final UnicodeBlock OSMANYA =
1646            new UnicodeBlock("OSMANYA");
1647
1648        /**
1649         * Constant for the "Cypriot Syllabary" Unicode character block.
1650         * @since 1.5
1651         */
1652        public static final UnicodeBlock CYPRIOT_SYLLABARY =
1653            new UnicodeBlock("CYPRIOT_SYLLABARY",
1654                             "CYPRIOT SYLLABARY",
1655                             "CYPRIOTSYLLABARY");
1656
1657        /**
1658         * Constant for the "Byzantine Musical Symbols" Unicode character block.
1659         * @since 1.5
1660         */
1661        public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1662            new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1663                             "BYZANTINE MUSICAL SYMBOLS",
1664                             "BYZANTINEMUSICALSYMBOLS");
1665
1666        /**
1667         * Constant for the "Musical Symbols" Unicode character block.
1668         * @since 1.5
1669         */
1670        public static final UnicodeBlock MUSICAL_SYMBOLS =
1671            new UnicodeBlock("MUSICAL_SYMBOLS",
1672                             "MUSICAL SYMBOLS",
1673                             "MUSICALSYMBOLS");
1674
1675        /**
1676         * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1677         * @since 1.5
1678         */
1679        public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1680            new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1681                             "TAI XUAN JING SYMBOLS",
1682                             "TAIXUANJINGSYMBOLS");
1683
1684        /**
1685         * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1686         * character block.
1687         * @since 1.5
1688         */
1689        public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1690            new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1691                             "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1692                             "MATHEMATICALALPHANUMERICSYMBOLS");
1693
1694        /**
1695         * Constant for the "CJK Unified Ideographs Extension B" Unicode
1696         * character block.
1697         * @since 1.5
1698         */
1699        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1700            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1701                             "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1702                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1703
1704        /**
1705         * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1706         * @since 1.5
1707         */
1708        public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1709            new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1710                             "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1711                             "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1712
1713        /**
1714         * Constant for the "Tags" Unicode character block.
1715         * @since 1.5
1716         */
1717        public static final UnicodeBlock TAGS =
1718            new UnicodeBlock("TAGS");
1719
1720        /**
1721         * Constant for the "Variation Selectors Supplement" Unicode character
1722         * block.
1723         * @since 1.5
1724         */
1725        public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1726            new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1727                             "VARIATION SELECTORS SUPPLEMENT",
1728                             "VARIATIONSELECTORSSUPPLEMENT");
1729
1730        /**
1731         * Constant for the "Supplementary Private Use Area-A" Unicode character
1732         * block.
1733         * @since 1.5
1734         */
1735        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1736            new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1737                             "SUPPLEMENTARY PRIVATE USE AREA-A",
1738                             "SUPPLEMENTARYPRIVATEUSEAREA-A");
1739
1740        /**
1741         * Constant for the "Supplementary Private Use Area-B" Unicode character
1742         * block.
1743         * @since 1.5
1744         */
1745        public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1746            new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1747                             "SUPPLEMENTARY PRIVATE USE AREA-B",
1748                             "SUPPLEMENTARYPRIVATEUSEAREA-B");
1749
1750        /**
1751         * Constant for the "High Surrogates" Unicode character block.
1752         * This block represents codepoint values in the high surrogate
1753         * range: U+D800 through U+DB7F
1754         *
1755         * @since 1.5
1756         */
1757        public static final UnicodeBlock HIGH_SURROGATES =
1758            new UnicodeBlock("HIGH_SURROGATES",
1759                             "HIGH SURROGATES",
1760                             "HIGHSURROGATES");
1761
1762        /**
1763         * Constant for the "High Private Use Surrogates" Unicode character
1764         * block.
1765         * This block represents codepoint values in the private use high
1766         * surrogate range: U+DB80 through U+DBFF
1767         *
1768         * @since 1.5
1769         */
1770        public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1771            new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1772                             "HIGH PRIVATE USE SURROGATES",
1773                             "HIGHPRIVATEUSESURROGATES");
1774
1775        /**
1776         * Constant for the "Low Surrogates" Unicode character block.
1777         * This block represents codepoint values in the low surrogate
1778         * range: U+DC00 through U+DFFF
1779         *
1780         * @since 1.5
1781         */
1782        public static final UnicodeBlock LOW_SURROGATES =
1783            new UnicodeBlock("LOW_SURROGATES",
1784                             "LOW SURROGATES",
1785                             "LOWSURROGATES");
1786
1787        /**
1788         * Constant for the "Arabic Supplement" Unicode character block.
1789         * @since 1.7
1790         */
1791        public static final UnicodeBlock ARABIC_SUPPLEMENT =
1792            new UnicodeBlock("ARABIC_SUPPLEMENT",
1793                             "ARABIC SUPPLEMENT",
1794                             "ARABICSUPPLEMENT");
1795
1796        /**
1797         * Constant for the "NKo" Unicode character block.
1798         * @since 1.7
1799         */
1800        public static final UnicodeBlock NKO =
1801            new UnicodeBlock("NKO");
1802
1803        /**
1804         * Constant for the "Samaritan" Unicode character block.
1805         * @since 1.7
1806         */
1807        public static final UnicodeBlock SAMARITAN =
1808            new UnicodeBlock("SAMARITAN");
1809
1810        /**
1811         * Constant for the "Mandaic" Unicode character block.
1812         * @since 1.7
1813         */
1814        public static final UnicodeBlock MANDAIC =
1815            new UnicodeBlock("MANDAIC");
1816
1817        /**
1818         * Constant for the "Ethiopic Supplement" Unicode character block.
1819         * @since 1.7
1820         */
1821        public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1822            new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1823                             "ETHIOPIC SUPPLEMENT",
1824                             "ETHIOPICSUPPLEMENT");
1825
1826        /**
1827         * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1828         * Unicode character block.
1829         * @since 1.7
1830         */
1831        public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1832            new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1833                             "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1834                             "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1835
1836        /**
1837         * Constant for the "New Tai Lue" Unicode character block.
1838         * @since 1.7
1839         */
1840        public static final UnicodeBlock NEW_TAI_LUE =
1841            new UnicodeBlock("NEW_TAI_LUE",
1842                             "NEW TAI LUE",
1843                             "NEWTAILUE");
1844
1845        /**
1846         * Constant for the "Buginese" Unicode character block.
1847         * @since 1.7
1848         */
1849        public static final UnicodeBlock BUGINESE =
1850            new UnicodeBlock("BUGINESE");
1851
1852        /**
1853         * Constant for the "Tai Tham" Unicode character block.
1854         * @since 1.7
1855         */
1856        public static final UnicodeBlock TAI_THAM =
1857            new UnicodeBlock("TAI_THAM",
1858                             "TAI THAM",
1859                             "TAITHAM");
1860
1861        /**
1862         * Constant for the "Balinese" Unicode character block.
1863         * @since 1.7
1864         */
1865        public static final UnicodeBlock BALINESE =
1866            new UnicodeBlock("BALINESE");
1867
1868        /**
1869         * Constant for the "Sundanese" Unicode character block.
1870         * @since 1.7
1871         */
1872        public static final UnicodeBlock SUNDANESE =
1873            new UnicodeBlock("SUNDANESE");
1874
1875        /**
1876         * Constant for the "Batak" Unicode character block.
1877         * @since 1.7
1878         */
1879        public static final UnicodeBlock BATAK =
1880            new UnicodeBlock("BATAK");
1881
1882        /**
1883         * Constant for the "Lepcha" Unicode character block.
1884         * @since 1.7
1885         */
1886        public static final UnicodeBlock LEPCHA =
1887            new UnicodeBlock("LEPCHA");
1888
1889        /**
1890         * Constant for the "Ol Chiki" Unicode character block.
1891         * @since 1.7
1892         */
1893        public static final UnicodeBlock OL_CHIKI =
1894            new UnicodeBlock("OL_CHIKI",
1895                             "OL CHIKI",
1896                             "OLCHIKI");
1897
1898        /**
1899         * Constant for the "Vedic Extensions" Unicode character block.
1900         * @since 1.7
1901         */
1902        public static final UnicodeBlock VEDIC_EXTENSIONS =
1903            new UnicodeBlock("VEDIC_EXTENSIONS",
1904                             "VEDIC EXTENSIONS",
1905                             "VEDICEXTENSIONS");
1906
1907        /**
1908         * Constant for the "Phonetic Extensions Supplement" Unicode character
1909         * block.
1910         * @since 1.7
1911         */
1912        public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1913            new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1914                             "PHONETIC EXTENSIONS SUPPLEMENT",
1915                             "PHONETICEXTENSIONSSUPPLEMENT");
1916
1917        /**
1918         * Constant for the "Combining Diacritical Marks Supplement" Unicode
1919         * character block.
1920         * @since 1.7
1921         */
1922        public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1923            new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1924                             "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1925                             "COMBININGDIACRITICALMARKSSUPPLEMENT");
1926
1927        /**
1928         * Constant for the "Glagolitic" Unicode character block.
1929         * @since 1.7
1930         */
1931        public static final UnicodeBlock GLAGOLITIC =
1932            new UnicodeBlock("GLAGOLITIC");
1933
1934        /**
1935         * Constant for the "Latin Extended-C" Unicode character block.
1936         * @since 1.7
1937         */
1938        public static final UnicodeBlock LATIN_EXTENDED_C =
1939            new UnicodeBlock("LATIN_EXTENDED_C",
1940                             "LATIN EXTENDED-C",
1941                             "LATINEXTENDED-C");
1942
1943        /**
1944         * Constant for the "Coptic" Unicode character block.
1945         * @since 1.7
1946         */
1947        public static final UnicodeBlock COPTIC =
1948            new UnicodeBlock("COPTIC");
1949
1950        /**
1951         * Constant for the "Georgian Supplement" Unicode character block.
1952         * @since 1.7
1953         */
1954        public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1955            new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1956                             "GEORGIAN SUPPLEMENT",
1957                             "GEORGIANSUPPLEMENT");
1958
1959        /**
1960         * Constant for the "Tifinagh" Unicode character block.
1961         * @since 1.7
1962         */
1963        public static final UnicodeBlock TIFINAGH =
1964            new UnicodeBlock("TIFINAGH");
1965
1966        /**
1967         * Constant for the "Ethiopic Extended" Unicode character block.
1968         * @since 1.7
1969         */
1970        public static final UnicodeBlock ETHIOPIC_EXTENDED =
1971            new UnicodeBlock("ETHIOPIC_EXTENDED",
1972                             "ETHIOPIC EXTENDED",
1973                             "ETHIOPICEXTENDED");
1974
1975        /**
1976         * Constant for the "Cyrillic Extended-A" Unicode character block.
1977         * @since 1.7
1978         */
1979        public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1980            new UnicodeBlock("CYRILLIC_EXTENDED_A",
1981                             "CYRILLIC EXTENDED-A",
1982                             "CYRILLICEXTENDED-A");
1983
1984        /**
1985         * Constant for the "Supplemental Punctuation" Unicode character block.
1986         * @since 1.7
1987         */
1988        public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1989            new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1990                             "SUPPLEMENTAL PUNCTUATION",
1991                             "SUPPLEMENTALPUNCTUATION");
1992
1993        /**
1994         * Constant for the "CJK Strokes" Unicode character block.
1995         * @since 1.7
1996         */
1997        public static final UnicodeBlock CJK_STROKES =
1998            new UnicodeBlock("CJK_STROKES",
1999                             "CJK STROKES",
2000                             "CJKSTROKES");
2001
2002        /**
2003         * Constant for the "Lisu" Unicode character block.
2004         * @since 1.7
2005         */
2006        public static final UnicodeBlock LISU =
2007            new UnicodeBlock("LISU");
2008
2009        /**
2010         * Constant for the "Vai" Unicode character block.
2011         * @since 1.7
2012         */
2013        public static final UnicodeBlock VAI =
2014            new UnicodeBlock("VAI");
2015
2016        /**
2017         * Constant for the "Cyrillic Extended-B" Unicode character block.
2018         * @since 1.7
2019         */
2020        public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2021            new UnicodeBlock("CYRILLIC_EXTENDED_B",
2022                             "CYRILLIC EXTENDED-B",
2023                             "CYRILLICEXTENDED-B");
2024
2025        /**
2026         * Constant for the "Bamum" Unicode character block.
2027         * @since 1.7
2028         */
2029        public static final UnicodeBlock BAMUM =
2030            new UnicodeBlock("BAMUM");
2031
2032        /**
2033         * Constant for the "Modifier Tone Letters" Unicode character block.
2034         * @since 1.7
2035         */
2036        public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2037            new UnicodeBlock("MODIFIER_TONE_LETTERS",
2038                             "MODIFIER TONE LETTERS",
2039                             "MODIFIERTONELETTERS");
2040
2041        /**
2042         * Constant for the "Latin Extended-D" Unicode character block.
2043         * @since 1.7
2044         */
2045        public static final UnicodeBlock LATIN_EXTENDED_D =
2046            new UnicodeBlock("LATIN_EXTENDED_D",
2047                             "LATIN EXTENDED-D",
2048                             "LATINEXTENDED-D");
2049
2050        /**
2051         * Constant for the "Syloti Nagri" Unicode character block.
2052         * @since 1.7
2053         */
2054        public static final UnicodeBlock SYLOTI_NAGRI =
2055            new UnicodeBlock("SYLOTI_NAGRI",
2056                             "SYLOTI NAGRI",
2057                             "SYLOTINAGRI");
2058
2059        /**
2060         * Constant for the "Common Indic Number Forms" Unicode character block.
2061         * @since 1.7
2062         */
2063        public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2064            new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2065                             "COMMON INDIC NUMBER FORMS",
2066                             "COMMONINDICNUMBERFORMS");
2067
2068        /**
2069         * Constant for the "Phags-pa" Unicode character block.
2070         * @since 1.7
2071         */
2072        public static final UnicodeBlock PHAGS_PA =
2073            new UnicodeBlock("PHAGS_PA",
2074                             "PHAGS-PA");
2075
2076        /**
2077         * Constant for the "Saurashtra" Unicode character block.
2078         * @since 1.7
2079         */
2080        public static final UnicodeBlock SAURASHTRA =
2081            new UnicodeBlock("SAURASHTRA");
2082
2083        /**
2084         * Constant for the "Devanagari Extended" Unicode character block.
2085         * @since 1.7
2086         */
2087        public static final UnicodeBlock DEVANAGARI_EXTENDED =
2088            new UnicodeBlock("DEVANAGARI_EXTENDED",
2089                             "DEVANAGARI EXTENDED",
2090                             "DEVANAGARIEXTENDED");
2091
2092        /**
2093         * Constant for the "Kayah Li" Unicode character block.
2094         * @since 1.7
2095         */
2096        public static final UnicodeBlock KAYAH_LI =
2097            new UnicodeBlock("KAYAH_LI",
2098                             "KAYAH LI",
2099                             "KAYAHLI");
2100
2101        /**
2102         * Constant for the "Rejang" Unicode character block.
2103         * @since 1.7
2104         */
2105        public static final UnicodeBlock REJANG =
2106            new UnicodeBlock("REJANG");
2107
2108        /**
2109         * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2110         * @since 1.7
2111         */
2112        public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2113            new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2114                             "HANGUL JAMO EXTENDED-A",
2115                             "HANGULJAMOEXTENDED-A");
2116
2117        /**
2118         * Constant for the "Javanese" Unicode character block.
2119         * @since 1.7
2120         */
2121        public static final UnicodeBlock JAVANESE =
2122            new UnicodeBlock("JAVANESE");
2123
2124        /**
2125         * Constant for the "Cham" Unicode character block.
2126         * @since 1.7
2127         */
2128        public static final UnicodeBlock CHAM =
2129            new UnicodeBlock("CHAM");
2130
2131        /**
2132         * Constant for the "Myanmar Extended-A" Unicode character block.
2133         * @since 1.7
2134         */
2135        public static final UnicodeBlock MYANMAR_EXTENDED_A =
2136            new UnicodeBlock("MYANMAR_EXTENDED_A",
2137                             "MYANMAR EXTENDED-A",
2138                             "MYANMAREXTENDED-A");
2139
2140        /**
2141         * Constant for the "Tai Viet" Unicode character block.
2142         * @since 1.7
2143         */
2144        public static final UnicodeBlock TAI_VIET =
2145            new UnicodeBlock("TAI_VIET",
2146                             "TAI VIET",
2147                             "TAIVIET");
2148
2149        /**
2150         * Constant for the "Ethiopic Extended-A" Unicode character block.
2151         * @since 1.7
2152         */
2153        public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2154            new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2155                             "ETHIOPIC EXTENDED-A",
2156                             "ETHIOPICEXTENDED-A");
2157
2158        /**
2159         * Constant for the "Meetei Mayek" Unicode character block.
2160         * @since 1.7
2161         */
2162        public static final UnicodeBlock MEETEI_MAYEK =
2163            new UnicodeBlock("MEETEI_MAYEK",
2164                             "MEETEI MAYEK",
2165                             "MEETEIMAYEK");
2166
2167        /**
2168         * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2169         * @since 1.7
2170         */
2171        public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2172            new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2173                             "HANGUL JAMO EXTENDED-B",
2174                             "HANGULJAMOEXTENDED-B");
2175
2176        /**
2177         * Constant for the "Vertical Forms" Unicode character block.
2178         * @since 1.7
2179         */
2180        public static final UnicodeBlock VERTICAL_FORMS =
2181            new UnicodeBlock("VERTICAL_FORMS",
2182                             "VERTICAL FORMS",
2183                             "VERTICALFORMS");
2184
2185        /**
2186         * Constant for the "Ancient Greek Numbers" Unicode character block.
2187         * @since 1.7
2188         */
2189        public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2190            new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2191                             "ANCIENT GREEK NUMBERS",
2192                             "ANCIENTGREEKNUMBERS");
2193
2194        /**
2195         * Constant for the "Ancient Symbols" Unicode character block.
2196         * @since 1.7
2197         */
2198        public static final UnicodeBlock ANCIENT_SYMBOLS =
2199            new UnicodeBlock("ANCIENT_SYMBOLS",
2200                             "ANCIENT SYMBOLS",
2201                             "ANCIENTSYMBOLS");
2202
2203        /**
2204         * Constant for the "Phaistos Disc" Unicode character block.
2205         * @since 1.7
2206         */
2207        public static final UnicodeBlock PHAISTOS_DISC =
2208            new UnicodeBlock("PHAISTOS_DISC",
2209                             "PHAISTOS DISC",
2210                             "PHAISTOSDISC");
2211
2212        /**
2213         * Constant for the "Lycian" Unicode character block.
2214         * @since 1.7
2215         */
2216        public static final UnicodeBlock LYCIAN =
2217            new UnicodeBlock("LYCIAN");
2218
2219        /**
2220         * Constant for the "Carian" Unicode character block.
2221         * @since 1.7
2222         */
2223        public static final UnicodeBlock CARIAN =
2224            new UnicodeBlock("CARIAN");
2225
2226        /**
2227         * Constant for the "Old Persian" Unicode character block.
2228         * @since 1.7
2229         */
2230        public static final UnicodeBlock OLD_PERSIAN =
2231            new UnicodeBlock("OLD_PERSIAN",
2232                             "OLD PERSIAN",
2233                             "OLDPERSIAN");
2234
2235        /**
2236         * Constant for the "Imperial Aramaic" Unicode character block.
2237         * @since 1.7
2238         */
2239        public static final UnicodeBlock IMPERIAL_ARAMAIC =
2240            new UnicodeBlock("IMPERIAL_ARAMAIC",
2241                             "IMPERIAL ARAMAIC",
2242                             "IMPERIALARAMAIC");
2243
2244        /**
2245         * Constant for the "Phoenician" Unicode character block.
2246         * @since 1.7
2247         */
2248        public static final UnicodeBlock PHOENICIAN =
2249            new UnicodeBlock("PHOENICIAN");
2250
2251        /**
2252         * Constant for the "Lydian" Unicode character block.
2253         * @since 1.7
2254         */
2255        public static final UnicodeBlock LYDIAN =
2256            new UnicodeBlock("LYDIAN");
2257
2258        /**
2259         * Constant for the "Kharoshthi" Unicode character block.
2260         * @since 1.7
2261         */
2262        public static final UnicodeBlock KHAROSHTHI =
2263            new UnicodeBlock("KHAROSHTHI");
2264
2265        /**
2266         * Constant for the "Old South Arabian" Unicode character block.
2267         * @since 1.7
2268         */
2269        public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2270            new UnicodeBlock("OLD_SOUTH_ARABIAN",
2271                             "OLD SOUTH ARABIAN",
2272                             "OLDSOUTHARABIAN");
2273
2274        /**
2275         * Constant for the "Avestan" Unicode character block.
2276         * @since 1.7
2277         */
2278        public static final UnicodeBlock AVESTAN =
2279            new UnicodeBlock("AVESTAN");
2280
2281        /**
2282         * Constant for the "Inscriptional Parthian" Unicode character block.
2283         * @since 1.7
2284         */
2285        public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2286            new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2287                             "INSCRIPTIONAL PARTHIAN",
2288                             "INSCRIPTIONALPARTHIAN");
2289
2290        /**
2291         * Constant for the "Inscriptional Pahlavi" Unicode character block.
2292         * @since 1.7
2293         */
2294        public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2295            new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2296                             "INSCRIPTIONAL PAHLAVI",
2297                             "INSCRIPTIONALPAHLAVI");
2298
2299        /**
2300         * Constant for the "Old Turkic" Unicode character block.
2301         * @since 1.7
2302         */
2303        public static final UnicodeBlock OLD_TURKIC =
2304            new UnicodeBlock("OLD_TURKIC",
2305                             "OLD TURKIC",
2306                             "OLDTURKIC");
2307
2308        /**
2309         * Constant for the "Rumi Numeral Symbols" Unicode character block.
2310         * @since 1.7
2311         */
2312        public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2313            new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2314                             "RUMI NUMERAL SYMBOLS",
2315                             "RUMINUMERALSYMBOLS");
2316
2317        /**
2318         * Constant for the "Brahmi" Unicode character block.
2319         * @since 1.7
2320         */
2321        public static final UnicodeBlock BRAHMI =
2322            new UnicodeBlock("BRAHMI");
2323
2324        /**
2325         * Constant for the "Kaithi" Unicode character block.
2326         * @since 1.7
2327         */
2328        public static final UnicodeBlock KAITHI =
2329            new UnicodeBlock("KAITHI");
2330
2331        /**
2332         * Constant for the "Cuneiform" Unicode character block.
2333         * @since 1.7
2334         */
2335        public static final UnicodeBlock CUNEIFORM =
2336            new UnicodeBlock("CUNEIFORM");
2337
2338        /**
2339         * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2340         * character block.
2341         * @since 1.7
2342         */
2343        public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2344            new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2345                             "CUNEIFORM NUMBERS AND PUNCTUATION",
2346                             "CUNEIFORMNUMBERSANDPUNCTUATION");
2347
2348        /**
2349         * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2350         * @since 1.7
2351         */
2352        public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2353            new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2354                             "EGYPTIAN HIEROGLYPHS",
2355                             "EGYPTIANHIEROGLYPHS");
2356
2357        /**
2358         * Constant for the "Bamum Supplement" Unicode character block.
2359         * @since 1.7
2360         */
2361        public static final UnicodeBlock BAMUM_SUPPLEMENT =
2362            new UnicodeBlock("BAMUM_SUPPLEMENT",
2363                             "BAMUM SUPPLEMENT",
2364                             "BAMUMSUPPLEMENT");
2365
2366        /**
2367         * Constant for the "Kana Supplement" Unicode character block.
2368         * @since 1.7
2369         */
2370        public static final UnicodeBlock KANA_SUPPLEMENT =
2371            new UnicodeBlock("KANA_SUPPLEMENT",
2372                             "KANA SUPPLEMENT",
2373                             "KANASUPPLEMENT");
2374
2375        /**
2376         * Constant for the "Ancient Greek Musical Notation" Unicode character
2377         * block.
2378         * @since 1.7
2379         */
2380        public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2381            new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2382                             "ANCIENT GREEK MUSICAL NOTATION",
2383                             "ANCIENTGREEKMUSICALNOTATION");
2384
2385        /**
2386         * Constant for the "Counting Rod Numerals" Unicode character block.
2387         * @since 1.7
2388         */
2389        public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2390            new UnicodeBlock("COUNTING_ROD_NUMERALS",
2391                             "COUNTING ROD NUMERALS",
2392                             "COUNTINGRODNUMERALS");
2393
2394        /**
2395         * Constant for the "Mahjong Tiles" Unicode character block.
2396         * @since 1.7
2397         */
2398        public static final UnicodeBlock MAHJONG_TILES =
2399            new UnicodeBlock("MAHJONG_TILES",
2400                             "MAHJONG TILES",
2401                             "MAHJONGTILES");
2402
2403        /**
2404         * Constant for the "Domino Tiles" Unicode character block.
2405         * @since 1.7
2406         */
2407        public static final UnicodeBlock DOMINO_TILES =
2408            new UnicodeBlock("DOMINO_TILES",
2409                             "DOMINO TILES",
2410                             "DOMINOTILES");
2411
2412        /**
2413         * Constant for the "Playing Cards" Unicode character block.
2414         * @since 1.7
2415         */
2416        public static final UnicodeBlock PLAYING_CARDS =
2417            new UnicodeBlock("PLAYING_CARDS",
2418                             "PLAYING CARDS",
2419                             "PLAYINGCARDS");
2420
2421        /**
2422         * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2423         * block.
2424         * @since 1.7
2425         */
2426        public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2427            new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2428                             "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2429                             "ENCLOSEDALPHANUMERICSUPPLEMENT");
2430
2431        /**
2432         * Constant for the "Enclosed Ideographic Supplement" Unicode character
2433         * block.
2434         * @since 1.7
2435         */
2436        public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2437            new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2438                             "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2439                             "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2440
2441        /**
2442         * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2443         * character block.
2444         * @since 1.7
2445         */
2446        public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2447            new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2448                             "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2449                             "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2450
2451        /**
2452         * Constant for the "Emoticons" Unicode character block.
2453         * @since 1.7
2454         */
2455        public static final UnicodeBlock EMOTICONS =
2456            new UnicodeBlock("EMOTICONS");
2457
2458        /**
2459         * Constant for the "Transport And Map Symbols" Unicode character block.
2460         * @since 1.7
2461         */
2462        public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2463            new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2464                             "TRANSPORT AND MAP SYMBOLS",
2465                             "TRANSPORTANDMAPSYMBOLS");
2466
2467        /**
2468         * Constant for the "Alchemical Symbols" Unicode character block.
2469         * @since 1.7
2470         */
2471        public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2472            new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2473                             "ALCHEMICAL SYMBOLS",
2474                             "ALCHEMICALSYMBOLS");
2475
2476        /**
2477         * Constant for the "CJK Unified Ideographs Extension C" Unicode
2478         * character block.
2479         * @since 1.7
2480         */
2481        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2482            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2483                             "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2484                             "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2485
2486        /**
2487         * Constant for the "CJK Unified Ideographs Extension D" Unicode
2488         * character block.
2489         * @since 1.7
2490         */
2491        public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2492            new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2493                             "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2494                             "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2495
2496        private static final int blockStarts[] = {
2497            0x0000,   // 0000..007F; Basic Latin
2498            0x0080,   // 0080..00FF; Latin-1 Supplement
2499            0x0100,   // 0100..017F; Latin Extended-A
2500            0x0180,   // 0180..024F; Latin Extended-B
2501            0x0250,   // 0250..02AF; IPA Extensions
2502            0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2503            0x0300,   // 0300..036F; Combining Diacritical Marks
2504            0x0370,   // 0370..03FF; Greek and Coptic
2505            0x0400,   // 0400..04FF; Cyrillic
2506            0x0500,   // 0500..052F; Cyrillic Supplement
2507            0x0530,   // 0530..058F; Armenian
2508            0x0590,   // 0590..05FF; Hebrew
2509            0x0600,   // 0600..06FF; Arabic
2510            0x0700,   // 0700..074F; Syriac
2511            0x0750,   // 0750..077F; Arabic Supplement
2512            0x0780,   // 0780..07BF; Thaana
2513            0x07C0,   // 07C0..07FF; NKo
2514            0x0800,   // 0800..083F; Samaritan
2515            0x0840,   // 0840..085F; Mandaic
2516            0x0860,   //             unassigned
2517            0x0900,   // 0900..097F; Devanagari
2518            0x0980,   // 0980..09FF; Bengali
2519            0x0A00,   // 0A00..0A7F; Gurmukhi
2520            0x0A80,   // 0A80..0AFF; Gujarati
2521            0x0B00,   // 0B00..0B7F; Oriya
2522            0x0B80,   // 0B80..0BFF; Tamil
2523            0x0C00,   // 0C00..0C7F; Telugu
2524            0x0C80,   // 0C80..0CFF; Kannada
2525            0x0D00,   // 0D00..0D7F; Malayalam
2526            0x0D80,   // 0D80..0DFF; Sinhala
2527            0x0E00,   // 0E00..0E7F; Thai
2528            0x0E80,   // 0E80..0EFF; Lao
2529            0x0F00,   // 0F00..0FFF; Tibetan
2530            0x1000,   // 1000..109F; Myanmar
2531            0x10A0,   // 10A0..10FF; Georgian
2532            0x1100,   // 1100..11FF; Hangul Jamo
2533            0x1200,   // 1200..137F; Ethiopic
2534            0x1380,   // 1380..139F; Ethiopic Supplement
2535            0x13A0,   // 13A0..13FF; Cherokee
2536            0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2537            0x1680,   // 1680..169F; Ogham
2538            0x16A0,   // 16A0..16FF; Runic
2539            0x1700,   // 1700..171F; Tagalog
2540            0x1720,   // 1720..173F; Hanunoo
2541            0x1740,   // 1740..175F; Buhid
2542            0x1760,   // 1760..177F; Tagbanwa
2543            0x1780,   // 1780..17FF; Khmer
2544            0x1800,   // 1800..18AF; Mongolian
2545            0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2546            0x1900,   // 1900..194F; Limbu
2547            0x1950,   // 1950..197F; Tai Le
2548            0x1980,   // 1980..19DF; New Tai Lue
2549            0x19E0,   // 19E0..19FF; Khmer Symbols
2550            0x1A00,   // 1A00..1A1F; Buginese
2551            0x1A20,   // 1A20..1AAF; Tai Tham
2552            0x1AB0,   //             unassigned
2553            0x1B00,   // 1B00..1B7F; Balinese
2554            0x1B80,   // 1B80..1BBF; Sundanese
2555            0x1BC0,   // 1BC0..1BFF; Batak
2556            0x1C00,   // 1C00..1C4F; Lepcha
2557            0x1C50,   // 1C50..1C7F; Ol Chiki
2558            0x1C80,   //             unassigned
2559            0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2560            0x1D00,   // 1D00..1D7F; Phonetic Extensions
2561            0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2562            0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2563            0x1E00,   // 1E00..1EFF; Latin Extended Additional
2564            0x1F00,   // 1F00..1FFF; Greek Extended
2565            0x2000,   // 2000..206F; General Punctuation
2566            0x2070,   // 2070..209F; Superscripts and Subscripts
2567            0x20A0,   // 20A0..20CF; Currency Symbols
2568            0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2569            0x2100,   // 2100..214F; Letterlike Symbols
2570            0x2150,   // 2150..218F; Number Forms
2571            0x2190,   // 2190..21FF; Arrows
2572            0x2200,   // 2200..22FF; Mathematical Operators
2573            0x2300,   // 2300..23FF; Miscellaneous Technical
2574            0x2400,   // 2400..243F; Control Pictures
2575            0x2440,   // 2440..245F; Optical Character Recognition
2576            0x2460,   // 2460..24FF; Enclosed Alphanumerics
2577            0x2500,   // 2500..257F; Box Drawing
2578            0x2580,   // 2580..259F; Block Elements
2579            0x25A0,   // 25A0..25FF; Geometric Shapes
2580            0x2600,   // 2600..26FF; Miscellaneous Symbols
2581            0x2700,   // 2700..27BF; Dingbats
2582            0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2583            0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2584            0x2800,   // 2800..28FF; Braille Patterns
2585            0x2900,   // 2900..297F; Supplemental Arrows-B
2586            0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2587            0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2588            0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2589            0x2C00,   // 2C00..2C5F; Glagolitic
2590            0x2C60,   // 2C60..2C7F; Latin Extended-C
2591            0x2C80,   // 2C80..2CFF; Coptic
2592            0x2D00,   // 2D00..2D2F; Georgian Supplement
2593            0x2D30,   // 2D30..2D7F; Tifinagh
2594            0x2D80,   // 2D80..2DDF; Ethiopic Extended
2595            0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2596            0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2597            0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2598            0x2F00,   // 2F00..2FDF; Kangxi Radicals
2599            0x2FE0,   //             unassigned
2600            0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2601            0x3000,   // 3000..303F; CJK Symbols and Punctuation
2602            0x3040,   // 3040..309F; Hiragana
2603            0x30A0,   // 30A0..30FF; Katakana
2604            0x3100,   // 3100..312F; Bopomofo
2605            0x3130,   // 3130..318F; Hangul Compatibility Jamo
2606            0x3190,   // 3190..319F; Kanbun
2607            0x31A0,   // 31A0..31BF; Bopomofo Extended
2608            0x31C0,   // 31C0..31EF; CJK Strokes
2609            0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2610            0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2611            0x3300,   // 3300..33FF; CJK Compatibility
2612            0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2613            0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2614            0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2615            0xA000,   // A000..A48F; Yi Syllables
2616            0xA490,   // A490..A4CF; Yi Radicals
2617            0xA4D0,   // A4D0..A4FF; Lisu
2618            0xA500,   // A500..A63F; Vai
2619            0xA640,   // A640..A69F; Cyrillic Extended-B
2620            0xA6A0,   // A6A0..A6FF; Bamum
2621            0xA700,   // A700..A71F; Modifier Tone Letters
2622            0xA720,   // A720..A7FF; Latin Extended-D
2623            0xA800,   // A800..A82F; Syloti Nagri
2624            0xA830,   // A830..A83F; Common Indic Number Forms
2625            0xA840,   // A840..A87F; Phags-pa
2626            0xA880,   // A880..A8DF; Saurashtra
2627            0xA8E0,   // A8E0..A8FF; Devanagari Extended
2628            0xA900,   // A900..A92F; Kayah Li
2629            0xA930,   // A930..A95F; Rejang
2630            0xA960,   // A960..A97F; Hangul Jamo Extended-A
2631            0xA980,   // A980..A9DF; Javanese
2632            0xA9E0,   //             unassigned
2633            0xAA00,   // AA00..AA5F; Cham
2634            0xAA60,   // AA60..AA7F; Myanmar Extended-A
2635            0xAA80,   // AA80..AADF; Tai Viet
2636            0xAAE0,   //             unassigned
2637            0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2638            0xAB30,   //             unassigned
2639            0xABC0,   // ABC0..ABFF; Meetei Mayek
2640            0xAC00,   // AC00..D7AF; Hangul Syllables
2641            0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2642            0xD800,   // D800..DB7F; High Surrogates
2643            0xDB80,   // DB80..DBFF; High Private Use Surrogates
2644            0xDC00,   // DC00..DFFF; Low Surrogates
2645            0xE000,   // E000..F8FF; Private Use Area
2646            0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2647            0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2648            0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2649            0xFE00,   // FE00..FE0F; Variation Selectors
2650            0xFE10,   // FE10..FE1F; Vertical Forms
2651            0xFE20,   // FE20..FE2F; Combining Half Marks
2652            0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2653            0xFE50,   // FE50..FE6F; Small Form Variants
2654            0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2655            0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2656            0xFFF0,   // FFF0..FFFF; Specials
2657            0x10000,  // 10000..1007F; Linear B Syllabary
2658            0x10080,  // 10080..100FF; Linear B Ideograms
2659            0x10100,  // 10100..1013F; Aegean Numbers
2660            0x10140,  // 10140..1018F; Ancient Greek Numbers
2661            0x10190,  // 10190..101CF; Ancient Symbols
2662            0x101D0,  // 101D0..101FF; Phaistos Disc
2663            0x10200,  //               unassigned
2664            0x10280,  // 10280..1029F; Lycian
2665            0x102A0,  // 102A0..102DF; Carian
2666            0x102E0,  //               unassigned
2667            0x10300,  // 10300..1032F; Old Italic
2668            0x10330,  // 10330..1034F; Gothic
2669            0x10350,  //               unassigned
2670            0x10380,  // 10380..1039F; Ugaritic
2671            0x103A0,  // 103A0..103DF; Old Persian
2672            0x103E0,  //               unassigned
2673            0x10400,  // 10400..1044F; Deseret
2674            0x10450,  // 10450..1047F; Shavian
2675            0x10480,  // 10480..104AF; Osmanya
2676            0x104B0,  //               unassigned
2677            0x10800,  // 10800..1083F; Cypriot Syllabary
2678            0x10840,  // 10840..1085F; Imperial Aramaic
2679            0x10860,  //               unassigned
2680            0x10900,  // 10900..1091F; Phoenician
2681            0x10920,  // 10920..1093F; Lydian
2682            0x10940,  //               unassigned
2683            0x10A00,  // 10A00..10A5F; Kharoshthi
2684            0x10A60,  // 10A60..10A7F; Old South Arabian
2685            0x10A80,  //               unassigned
2686            0x10B00,  // 10B00..10B3F; Avestan
2687            0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2688            0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2689            0x10B80,  //               unassigned
2690            0x10C00,  // 10C00..10C4F; Old Turkic
2691            0x10C50,  //               unassigned
2692            0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2693            0x10E80,  //               unassigned
2694            0x11000,  // 11000..1107F; Brahmi
2695            0x11080,  // 11080..110CF; Kaithi
2696            0x110D0,  //               unassigned
2697            0x12000,  // 12000..123FF; Cuneiform
2698            0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2699            0x12480,  //               unassigned
2700            0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2701            0x13430,  //               unassigned
2702            0x16800,  // 16800..16A3F; Bamum Supplement
2703            0x16A40,  //               unassigned
2704            0x1B000,  // 1B000..1B0FF; Kana Supplement
2705            0x1B100,  //               unassigned
2706            0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2707            0x1D100,  // 1D100..1D1FF; Musical Symbols
2708            0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2709            0x1D250,  //               unassigned
2710            0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2711            0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2712            0x1D380,  //               unassigned
2713            0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2714            0x1D800,  //               unassigned
2715            0x1F000,  // 1F000..1F02F; Mahjong Tiles
2716            0x1F030,  // 1F030..1F09F; Domino Tiles
2717            0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2718            0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2719            0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2720            0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2721            0x1F600,  // 1F600..1F64F; Emoticons
2722            0x1F650,  //               unassigned
2723            0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2724            0x1F700,  // 1F700..1F77F; Alchemical Symbols
2725            0x1F780,  //               unassigned
2726            0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2727            0x2A6E0,  //               unassigned
2728            0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2729            0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2730            0x2B820,  //               unassigned
2731            0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2732            0x2FA20,  //               unassigned
2733            0xE0000,  // E0000..E007F; Tags
2734            0xE0080,  //               unassigned
2735            0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2736            0xE01F0,  //               unassigned
2737            0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2738            0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2739        };
2740
2741        private static final UnicodeBlock[] blocks = {
2742            BASIC_LATIN,
2743            LATIN_1_SUPPLEMENT,
2744            LATIN_EXTENDED_A,
2745            LATIN_EXTENDED_B,
2746            IPA_EXTENSIONS,
2747            SPACING_MODIFIER_LETTERS,
2748            COMBINING_DIACRITICAL_MARKS,
2749            GREEK,
2750            CYRILLIC,
2751            CYRILLIC_SUPPLEMENTARY,
2752            ARMENIAN,
2753            HEBREW,
2754            ARABIC,
2755            SYRIAC,
2756            ARABIC_SUPPLEMENT,
2757            THAANA,
2758            NKO,
2759            SAMARITAN,
2760            MANDAIC,
2761            null,
2762            DEVANAGARI,
2763            BENGALI,
2764            GURMUKHI,
2765            GUJARATI,
2766            ORIYA,
2767            TAMIL,
2768            TELUGU,
2769            KANNADA,
2770            MALAYALAM,
2771            SINHALA,
2772            THAI,
2773            LAO,
2774            TIBETAN,
2775            MYANMAR,
2776            GEORGIAN,
2777            HANGUL_JAMO,
2778            ETHIOPIC,
2779            ETHIOPIC_SUPPLEMENT,
2780            CHEROKEE,
2781            UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2782            OGHAM,
2783            RUNIC,
2784            TAGALOG,
2785            HANUNOO,
2786            BUHID,
2787            TAGBANWA,
2788            KHMER,
2789            MONGOLIAN,
2790            UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2791            LIMBU,
2792            TAI_LE,
2793            NEW_TAI_LUE,
2794            KHMER_SYMBOLS,
2795            BUGINESE,
2796            TAI_THAM,
2797            null,
2798            BALINESE,
2799            SUNDANESE,
2800            BATAK,
2801            LEPCHA,
2802            OL_CHIKI,
2803            null,
2804            VEDIC_EXTENSIONS,
2805            PHONETIC_EXTENSIONS,
2806            PHONETIC_EXTENSIONS_SUPPLEMENT,
2807            COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2808            LATIN_EXTENDED_ADDITIONAL,
2809            GREEK_EXTENDED,
2810            GENERAL_PUNCTUATION,
2811            SUPERSCRIPTS_AND_SUBSCRIPTS,
2812            CURRENCY_SYMBOLS,
2813            COMBINING_MARKS_FOR_SYMBOLS,
2814            LETTERLIKE_SYMBOLS,
2815            NUMBER_FORMS,
2816            ARROWS,
2817            MATHEMATICAL_OPERATORS,
2818            MISCELLANEOUS_TECHNICAL,
2819            CONTROL_PICTURES,
2820            OPTICAL_CHARACTER_RECOGNITION,
2821            ENCLOSED_ALPHANUMERICS,
2822            BOX_DRAWING,
2823            BLOCK_ELEMENTS,
2824            GEOMETRIC_SHAPES,
2825            MISCELLANEOUS_SYMBOLS,
2826            DINGBATS,
2827            MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2828            SUPPLEMENTAL_ARROWS_A,
2829            BRAILLE_PATTERNS,
2830            SUPPLEMENTAL_ARROWS_B,
2831            MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2832            SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2833            MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2834            GLAGOLITIC,
2835            LATIN_EXTENDED_C,
2836            COPTIC,
2837            GEORGIAN_SUPPLEMENT,
2838            TIFINAGH,
2839            ETHIOPIC_EXTENDED,
2840            CYRILLIC_EXTENDED_A,
2841            SUPPLEMENTAL_PUNCTUATION,
2842            CJK_RADICALS_SUPPLEMENT,
2843            KANGXI_RADICALS,
2844            null,
2845            IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2846            CJK_SYMBOLS_AND_PUNCTUATION,
2847            HIRAGANA,
2848            KATAKANA,
2849            BOPOMOFO,
2850            HANGUL_COMPATIBILITY_JAMO,
2851            KANBUN,
2852            BOPOMOFO_EXTENDED,
2853            CJK_STROKES,
2854            KATAKANA_PHONETIC_EXTENSIONS,
2855            ENCLOSED_CJK_LETTERS_AND_MONTHS,
2856            CJK_COMPATIBILITY,
2857            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2858            YIJING_HEXAGRAM_SYMBOLS,
2859            CJK_UNIFIED_IDEOGRAPHS,
2860            YI_SYLLABLES,
2861            YI_RADICALS,
2862            LISU,
2863            VAI,
2864            CYRILLIC_EXTENDED_B,
2865            BAMUM,
2866            MODIFIER_TONE_LETTERS,
2867            LATIN_EXTENDED_D,
2868            SYLOTI_NAGRI,
2869            COMMON_INDIC_NUMBER_FORMS,
2870            PHAGS_PA,
2871            SAURASHTRA,
2872            DEVANAGARI_EXTENDED,
2873            KAYAH_LI,
2874            REJANG,
2875            HANGUL_JAMO_EXTENDED_A,
2876            JAVANESE,
2877            null,
2878            CHAM,
2879            MYANMAR_EXTENDED_A,
2880            TAI_VIET,
2881            null,
2882            ETHIOPIC_EXTENDED_A,
2883            null,
2884            MEETEI_MAYEK,
2885            HANGUL_SYLLABLES,
2886            HANGUL_JAMO_EXTENDED_B,
2887            HIGH_SURROGATES,
2888            HIGH_PRIVATE_USE_SURROGATES,
2889            LOW_SURROGATES,
2890            PRIVATE_USE_AREA,
2891            CJK_COMPATIBILITY_IDEOGRAPHS,
2892            ALPHABETIC_PRESENTATION_FORMS,
2893            ARABIC_PRESENTATION_FORMS_A,
2894            VARIATION_SELECTORS,
2895            VERTICAL_FORMS,
2896            COMBINING_HALF_MARKS,
2897            CJK_COMPATIBILITY_FORMS,
2898            SMALL_FORM_VARIANTS,
2899            ARABIC_PRESENTATION_FORMS_B,
2900            HALFWIDTH_AND_FULLWIDTH_FORMS,
2901            SPECIALS,
2902            LINEAR_B_SYLLABARY,
2903            LINEAR_B_IDEOGRAMS,
2904            AEGEAN_NUMBERS,
2905            ANCIENT_GREEK_NUMBERS,
2906            ANCIENT_SYMBOLS,
2907            PHAISTOS_DISC,
2908            null,
2909            LYCIAN,
2910            CARIAN,
2911            null,
2912            OLD_ITALIC,
2913            GOTHIC,
2914            null,
2915            UGARITIC,
2916            OLD_PERSIAN,
2917            null,
2918            DESERET,
2919            SHAVIAN,
2920            OSMANYA,
2921            null,
2922            CYPRIOT_SYLLABARY,
2923            IMPERIAL_ARAMAIC,
2924            null,
2925            PHOENICIAN,
2926            LYDIAN,
2927            null,
2928            KHAROSHTHI,
2929            OLD_SOUTH_ARABIAN,
2930            null,
2931            AVESTAN,
2932            INSCRIPTIONAL_PARTHIAN,
2933            INSCRIPTIONAL_PAHLAVI,
2934            null,
2935            OLD_TURKIC,
2936            null,
2937            RUMI_NUMERAL_SYMBOLS,
2938            null,
2939            BRAHMI,
2940            KAITHI,
2941            null,
2942            CUNEIFORM,
2943            CUNEIFORM_NUMBERS_AND_PUNCTUATION,
2944            null,
2945            EGYPTIAN_HIEROGLYPHS,
2946            null,
2947            BAMUM_SUPPLEMENT,
2948            null,
2949            KANA_SUPPLEMENT,
2950            null,
2951            BYZANTINE_MUSICAL_SYMBOLS,
2952            MUSICAL_SYMBOLS,
2953            ANCIENT_GREEK_MUSICAL_NOTATION,
2954            null,
2955            TAI_XUAN_JING_SYMBOLS,
2956            COUNTING_ROD_NUMERALS,
2957            null,
2958            MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
2959            null,
2960            MAHJONG_TILES,
2961            DOMINO_TILES,
2962            PLAYING_CARDS,
2963            ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
2964            ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
2965            MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
2966            EMOTICONS,
2967            null,
2968            TRANSPORT_AND_MAP_SYMBOLS,
2969            ALCHEMICAL_SYMBOLS,
2970            null,
2971            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
2972            null,
2973            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
2974            CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
2975            null,
2976            CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
2977            null,
2978            TAGS,
2979            null,
2980            VARIATION_SELECTORS_SUPPLEMENT,
2981            null,
2982            SUPPLEMENTARY_PRIVATE_USE_AREA_A,
2983            SUPPLEMENTARY_PRIVATE_USE_AREA_B
2984        };
2985
2986
2987        /**
2988         * Returns the object representing the Unicode block containing the
2989         * given character, or {@code null} if the character is not a
2990         * member of a defined block.
2991         *
2992         * <p><b>Note:</b> This method cannot handle
2993         * <a href="Character.html#supplementary"> supplementary
2994         * characters</a>.  To support all Unicode characters, including
2995         * supplementary characters, use the {@link #of(int)} method.
2996         *
2997         * @param   c  The character in question
2998         * @return  The {@code UnicodeBlock} instance representing the
2999         *          Unicode block of which this character is a member, or
3000         *          {@code null} if the character is not a member of any
3001         *          Unicode block
3002         */
3003        public static UnicodeBlock of(char c) {
3004            return of((int)c);
3005        }
3006
3007        /**
3008         * Returns the object representing the Unicode block
3009         * containing the given character (Unicode code point), or
3010         * {@code null} if the character is not a member of a
3011         * defined block.
3012         *
3013         * @param   codePoint the character (Unicode code point) in question.
3014         * @return  The {@code UnicodeBlock} instance representing the
3015         *          Unicode block of which this character is a member, or
3016         *          {@code null} if the character is not a member of any
3017         *          Unicode block
3018         * @exception IllegalArgumentException if the specified
3019         * {@code codePoint} is an invalid Unicode code point.
3020         * @see Character#isValidCodePoint(int)
3021         * @since   1.5
3022         */
3023        public static UnicodeBlock of(int codePoint) {
3024            if (!isValidCodePoint(codePoint)) {
3025                throw new IllegalArgumentException();
3026            }
3027
3028            int top, bottom, current;
3029            bottom = 0;
3030            top = blockStarts.length;
3031            current = top/2;
3032
3033            // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3034            while (top - bottom > 1) {
3035                if (codePoint >= blockStarts[current]) {
3036                    bottom = current;
3037                } else {
3038                    top = current;
3039                }
3040                current = (top + bottom) / 2;
3041            }
3042            return blocks[current];
3043        }
3044
3045        /**
3046         * Returns the UnicodeBlock with the given name. Block
3047         * names are determined by The Unicode Standard. The file
3048         * Blocks-&lt;version&gt;.txt defines blocks for a particular
3049         * version of the standard. The {@link Character} class specifies
3050         * the version of the standard that it supports.
3051         * <p>
3052         * This method accepts block names in the following forms:
3053         * <ol>
3054         * <li> Canonical block names as defined by the Unicode Standard.
3055         * For example, the standard defines a "Basic Latin" block. Therefore, this
3056         * method accepts "Basic Latin" as a valid block name. The documentation of
3057         * each UnicodeBlock provides the canonical name.
3058         * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3059         * is a valid block name for the "Basic Latin" block.
3060         * <li>The text representation of each constant UnicodeBlock identifier.
3061         * For example, this method will return the {@link #BASIC_LATIN} block if
3062         * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3063         * hyphens in the canonical name with underscores.
3064         * </ol>
3065         * Finally, character case is ignored for all of the valid block name forms.
3066         * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3067         * The en_US locale's case mapping rules are used to provide case-insensitive
3068         * string comparisons for block name validation.
3069         * <p>
3070         * If the Unicode Standard changes block names, both the previous and
3071         * current names will be accepted.
3072         *
3073         * @param blockName A {@code UnicodeBlock} name.
3074         * @return The {@code UnicodeBlock} instance identified
3075         *         by {@code blockName}
3076         * @throws IllegalArgumentException if {@code blockName} is an
3077         *         invalid name
3078         * @throws NullPointerException if {@code blockName} is null
3079         * @since 1.5
3080         */
3081        public static final UnicodeBlock forName(String blockName) {
3082            UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3083            if (block == null) {
3084                throw new IllegalArgumentException();
3085            }
3086            return block;
3087        }
3088    }
3089
3090
3091    /**
3092     * A family of character subsets representing the character scripts
3093     * defined in the <a href="http://www.unicode.org/reports/tr24/">
3094     * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3095     * character is assigned to a single Unicode script, either a specific
3096     * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3097     * one of the following three special values,
3098     * {@link Character.UnicodeScript#INHERITED Inherited},
3099     * {@link Character.UnicodeScript#COMMON Common} or
3100     * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3101     *
3102     * @since 1.7
3103     */
3104    public static enum UnicodeScript {
3105        /**
3106         * Unicode script "Common".
3107         */
3108        COMMON,
3109
3110        /**
3111         * Unicode script "Latin".
3112         */
3113        LATIN,
3114
3115        /**
3116         * Unicode script "Greek".
3117         */
3118        GREEK,
3119
3120        /**
3121         * Unicode script "Cyrillic".
3122         */
3123        CYRILLIC,
3124
3125        /**
3126         * Unicode script "Armenian".
3127         */
3128        ARMENIAN,
3129
3130        /**
3131         * Unicode script "Hebrew".
3132         */
3133        HEBREW,
3134
3135        /**
3136         * Unicode script "Arabic".
3137         */
3138        ARABIC,
3139
3140        /**
3141         * Unicode script "Syriac".
3142         */
3143        SYRIAC,
3144
3145        /**
3146         * Unicode script "Thaana".
3147         */
3148        THAANA,
3149
3150        /**
3151         * Unicode script "Devanagari".
3152         */
3153        DEVANAGARI,
3154
3155        /**
3156         * Unicode script "Bengali".
3157         */
3158        BENGALI,
3159
3160        /**
3161         * Unicode script "Gurmukhi".
3162         */
3163        GURMUKHI,
3164
3165        /**
3166         * Unicode script "Gujarati".
3167         */
3168        GUJARATI,
3169
3170        /**
3171         * Unicode script "Oriya".
3172         */
3173        ORIYA,
3174
3175        /**
3176         * Unicode script "Tamil".
3177         */
3178        TAMIL,
3179
3180        /**
3181         * Unicode script "Telugu".
3182         */
3183        TELUGU,
3184
3185        /**
3186         * Unicode script "Kannada".
3187         */
3188        KANNADA,
3189
3190        /**
3191         * Unicode script "Malayalam".
3192         */
3193        MALAYALAM,
3194
3195        /**
3196         * Unicode script "Sinhala".
3197         */
3198        SINHALA,
3199
3200        /**
3201         * Unicode script "Thai".
3202         */
3203        THAI,
3204
3205        /**
3206         * Unicode script "Lao".
3207         */
3208        LAO,
3209
3210        /**
3211         * Unicode script "Tibetan".
3212         */
3213        TIBETAN,
3214
3215        /**
3216         * Unicode script "Myanmar".
3217         */
3218        MYANMAR,
3219
3220        /**
3221         * Unicode script "Georgian".
3222         */
3223        GEORGIAN,
3224
3225        /**
3226         * Unicode script "Hangul".
3227         */
3228        HANGUL,
3229
3230        /**
3231         * Unicode script "Ethiopic".
3232         */
3233        ETHIOPIC,
3234
3235        /**
3236         * Unicode script "Cherokee".
3237         */
3238        CHEROKEE,
3239
3240        /**
3241         * Unicode script "Canadian_Aboriginal".
3242         */
3243        CANADIAN_ABORIGINAL,
3244
3245        /**
3246         * Unicode script "Ogham".
3247         */
3248        OGHAM,
3249
3250        /**
3251         * Unicode script "Runic".
3252         */
3253        RUNIC,
3254
3255        /**
3256         * Unicode script "Khmer".
3257         */
3258        KHMER,
3259
3260        /**
3261         * Unicode script "Mongolian".
3262         */
3263        MONGOLIAN,
3264
3265        /**
3266         * Unicode script "Hiragana".
3267         */
3268        HIRAGANA,
3269
3270        /**
3271         * Unicode script "Katakana".
3272         */
3273        KATAKANA,
3274
3275        /**
3276         * Unicode script "Bopomofo".
3277         */
3278        BOPOMOFO,
3279
3280        /**
3281         * Unicode script "Han".
3282         */
3283        HAN,
3284
3285        /**
3286         * Unicode script "Yi".
3287         */
3288        YI,
3289
3290        /**
3291         * Unicode script "Old_Italic".
3292         */
3293        OLD_ITALIC,
3294
3295        /**
3296         * Unicode script "Gothic".
3297         */
3298        GOTHIC,
3299
3300        /**
3301         * Unicode script "Deseret".
3302         */
3303        DESERET,
3304
3305        /**
3306         * Unicode script "Inherited".
3307         */
3308        INHERITED,
3309
3310        /**
3311         * Unicode script "Tagalog".
3312         */
3313        TAGALOG,
3314
3315        /**
3316         * Unicode script "Hanunoo".
3317         */
3318        HANUNOO,
3319
3320        /**
3321         * Unicode script "Buhid".
3322         */
3323        BUHID,
3324
3325        /**
3326         * Unicode script "Tagbanwa".
3327         */
3328        TAGBANWA,
3329
3330        /**
3331         * Unicode script "Limbu".
3332         */
3333        LIMBU,
3334
3335        /**
3336         * Unicode script "Tai_Le".
3337         */
3338        TAI_LE,
3339
3340        /**
3341         * Unicode script "Linear_B".
3342         */
3343        LINEAR_B,
3344
3345        /**
3346         * Unicode script "Ugaritic".
3347         */
3348        UGARITIC,
3349
3350        /**
3351         * Unicode script "Shavian".
3352         */
3353        SHAVIAN,
3354
3355        /**
3356         * Unicode script "Osmanya".
3357         */
3358        OSMANYA,
3359
3360        /**
3361         * Unicode script "Cypriot".
3362         */
3363        CYPRIOT,
3364
3365        /**
3366         * Unicode script "Braille".
3367         */
3368        BRAILLE,
3369
3370        /**
3371         * Unicode script "Buginese".
3372         */
3373        BUGINESE,
3374
3375        /**
3376         * Unicode script "Coptic".
3377         */
3378        COPTIC,
3379
3380        /**
3381         * Unicode script "New_Tai_Lue".
3382         */
3383        NEW_TAI_LUE,
3384
3385        /**
3386         * Unicode script "Glagolitic".
3387         */
3388        GLAGOLITIC,
3389
3390        /**
3391         * Unicode script "Tifinagh".
3392         */
3393        TIFINAGH,
3394
3395        /**
3396         * Unicode script "Syloti_Nagri".
3397         */
3398        SYLOTI_NAGRI,
3399
3400        /**
3401         * Unicode script "Old_Persian".
3402         */
3403        OLD_PERSIAN,
3404
3405        /**
3406         * Unicode script "Kharoshthi".
3407         */
3408        KHAROSHTHI,
3409
3410        /**
3411         * Unicode script "Balinese".
3412         */
3413        BALINESE,
3414
3415        /**
3416         * Unicode script "Cuneiform".
3417         */
3418        CUNEIFORM,
3419
3420        /**
3421         * Unicode script "Phoenician".
3422         */
3423        PHOENICIAN,
3424
3425        /**
3426         * Unicode script "Phags_Pa".
3427         */
3428        PHAGS_PA,
3429
3430        /**
3431         * Unicode script "Nko".
3432         */
3433        NKO,
3434
3435        /**
3436         * Unicode script "Sundanese".
3437         */
3438        SUNDANESE,
3439
3440        /**
3441         * Unicode script "Batak".
3442         */
3443        BATAK,
3444
3445        /**
3446         * Unicode script "Lepcha".
3447         */
3448        LEPCHA,
3449
3450        /**
3451         * Unicode script "Ol_Chiki".
3452         */
3453        OL_CHIKI,
3454
3455        /**
3456         * Unicode script "Vai".
3457         */
3458        VAI,
3459
3460        /**
3461         * Unicode script "Saurashtra".
3462         */
3463        SAURASHTRA,
3464
3465        /**
3466         * Unicode script "Kayah_Li".
3467         */
3468        KAYAH_LI,
3469
3470        /**
3471         * Unicode script "Rejang".
3472         */
3473        REJANG,
3474
3475        /**
3476         * Unicode script "Lycian".
3477         */
3478        LYCIAN,
3479
3480        /**
3481         * Unicode script "Carian".
3482         */
3483        CARIAN,
3484
3485        /**
3486         * Unicode script "Lydian".
3487         */
3488        LYDIAN,
3489
3490        /**
3491         * Unicode script "Cham".
3492         */
3493        CHAM,
3494
3495        /**
3496         * Unicode script "Tai_Tham".
3497         */
3498        TAI_THAM,
3499
3500        /**
3501         * Unicode script "Tai_Viet".
3502         */
3503        TAI_VIET,
3504
3505        /**
3506         * Unicode script "Avestan".
3507         */
3508        AVESTAN,
3509
3510        /**
3511         * Unicode script "Egyptian_Hieroglyphs".
3512         */
3513        EGYPTIAN_HIEROGLYPHS,
3514
3515        /**
3516         * Unicode script "Samaritan".
3517         */
3518        SAMARITAN,
3519
3520        /**
3521         * Unicode script "Mandaic".
3522         */
3523        MANDAIC,
3524
3525        /**
3526         * Unicode script "Lisu".
3527         */
3528        LISU,
3529
3530        /**
3531         * Unicode script "Bamum".
3532         */
3533        BAMUM,
3534
3535        /**
3536         * Unicode script "Javanese".
3537         */
3538        JAVANESE,
3539
3540        /**
3541         * Unicode script "Meetei_Mayek".
3542         */
3543        MEETEI_MAYEK,
3544
3545        /**
3546         * Unicode script "Imperial_Aramaic".
3547         */
3548        IMPERIAL_ARAMAIC,
3549
3550        /**
3551         * Unicode script "Old_South_Arabian".
3552         */
3553        OLD_SOUTH_ARABIAN,
3554
3555        /**
3556         * Unicode script "Inscriptional_Parthian".
3557         */
3558        INSCRIPTIONAL_PARTHIAN,
3559
3560        /**
3561         * Unicode script "Inscriptional_Pahlavi".
3562         */
3563        INSCRIPTIONAL_PAHLAVI,
3564
3565        /**
3566         * Unicode script "Old_Turkic".
3567         */
3568        OLD_TURKIC,
3569
3570        /**
3571         * Unicode script "Brahmi".
3572         */
3573        BRAHMI,
3574
3575        /**
3576         * Unicode script "Kaithi".
3577         */
3578        KAITHI,
3579
3580        /**
3581         * Unicode script "Unknown".
3582         */
3583        UNKNOWN;
3584
3585        private static final int[] scriptStarts = {
3586            0x0000,   // 0000..0040; COMMON
3587            0x0041,   // 0041..005A; LATIN
3588            0x005B,   // 005B..0060; COMMON
3589            0x0061,   // 0061..007A; LATIN
3590            0x007B,   // 007B..00A9; COMMON
3591            0x00AA,   // 00AA..00AA; LATIN
3592            0x00AB,   // 00AB..00B9; COMMON
3593            0x00BA,   // 00BA..00BA; LATIN
3594            0x00BB,   // 00BB..00BF; COMMON
3595            0x00C0,   // 00C0..00D6; LATIN
3596            0x00D7,   // 00D7..00D7; COMMON
3597            0x00D8,   // 00D8..00F6; LATIN
3598            0x00F7,   // 00F7..00F7; COMMON
3599            0x00F8,   // 00F8..02B8; LATIN
3600            0x02B9,   // 02B9..02DF; COMMON
3601            0x02E0,   // 02E0..02E4; LATIN
3602            0x02E5,   // 02E5..02E9; COMMON
3603            0x02EA,   // 02EA..02EB; BOPOMOFO
3604            0x02EC,   // 02EC..02FF; COMMON
3605            0x0300,   // 0300..036F; INHERITED
3606            0x0370,   // 0370..0373; GREEK
3607            0x0374,   // 0374..0374; COMMON
3608            0x0375,   // 0375..037D; GREEK
3609            0x037E,   // 037E..0383; COMMON
3610            0x0384,   // 0384..0384; GREEK
3611            0x0385,   // 0385..0385; COMMON
3612            0x0386,   // 0386..0386; GREEK
3613            0x0387,   // 0387..0387; COMMON
3614            0x0388,   // 0388..03E1; GREEK
3615            0x03E2,   // 03E2..03EF; COPTIC
3616            0x03F0,   // 03F0..03FF; GREEK
3617            0x0400,   // 0400..0484; CYRILLIC
3618            0x0485,   // 0485..0486; INHERITED
3619            0x0487,   // 0487..0530; CYRILLIC
3620            0x0531,   // 0531..0588; ARMENIAN
3621            0x0589,   // 0589..0589; COMMON
3622            0x058A,   // 058A..0590; ARMENIAN
3623            0x0591,   // 0591..05FF; HEBREW
3624            0x0600,   // 0600..060B; ARABIC
3625            0x060C,   // 060C..060C; COMMON
3626            0x060D,   // 060D..061A; ARABIC
3627            0x061B,   // 061B..061D; COMMON
3628            0x061E,   // 061E..061E; ARABIC
3629            0x061F,   // 061F..061F; COMMON
3630            0x0620,   // 0620..063F; ARABIC
3631            0x0640,   // 0640..0640; COMMON
3632            0x0641,   // 0641..064A; ARABIC
3633            0x064B,   // 064B..0655; INHERITED
3634            0x0656,   // 0656..065E; ARABIC
3635            0x065F,   // 065F..065F; INHERITED
3636            0x0660,   // 0660..0669; COMMON
3637            0x066A,   // 066A..066F; ARABIC
3638            0x0670,   // 0670..0670; INHERITED
3639            0x0671,   // 0671..06DC; ARABIC
3640            0x06DD,   // 06DD..06DD; COMMON
3641            0x06DE,   // 06DE..06FF; ARABIC
3642            0x0700,   // 0700..074F; SYRIAC
3643            0x0750,   // 0750..077F; ARABIC
3644            0x0780,   // 0780..07BF; THAANA
3645            0x07C0,   // 07C0..07FF; NKO
3646            0x0800,   // 0800..083F; SAMARITAN
3647            0x0840,   // 0840..08FF; MANDAIC
3648            0x0900,   // 0900..0950; DEVANAGARI
3649            0x0951,   // 0951..0952; INHERITED
3650            0x0953,   // 0953..0963; DEVANAGARI
3651            0x0964,   // 0964..0965; COMMON
3652            0x0966,   // 0966..096F; DEVANAGARI
3653            0x0970,   // 0970..0970; COMMON
3654            0x0971,   // 0971..0980; DEVANAGARI
3655            0x0981,   // 0981..0A00; BENGALI
3656            0x0A01,   // 0A01..0A80; GURMUKHI
3657            0x0A81,   // 0A81..0B00; GUJARATI
3658            0x0B01,   // 0B01..0B81; ORIYA
3659            0x0B82,   // 0B82..0C00; TAMIL
3660            0x0C01,   // 0C01..0C81; TELUGU
3661            0x0C82,   // 0C82..0CF0; KANNADA
3662            0x0D02,   // 0D02..0D81; MALAYALAM
3663            0x0D82,   // 0D82..0E00; SINHALA
3664            0x0E01,   // 0E01..0E3E; THAI
3665            0x0E3F,   // 0E3F..0E3F; COMMON
3666            0x0E40,   // 0E40..0E80; THAI
3667            0x0E81,   // 0E81..0EFF; LAO
3668            0x0F00,   // 0F00..0FD4; TIBETAN
3669            0x0FD5,   // 0FD5..0FD8; COMMON
3670            0x0FD9,   // 0FD9..0FFF; TIBETAN
3671            0x1000,   // 1000..109F; MYANMAR
3672            0x10A0,   // 10A0..10FA; GEORGIAN
3673            0x10FB,   // 10FB..10FB; COMMON
3674            0x10FC,   // 10FC..10FF; GEORGIAN
3675            0x1100,   // 1100..11FF; HANGUL
3676            0x1200,   // 1200..139F; ETHIOPIC
3677            0x13A0,   // 13A0..13FF; CHEROKEE
3678            0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3679            0x1680,   // 1680..169F; OGHAM
3680            0x16A0,   // 16A0..16EA; RUNIC
3681            0x16EB,   // 16EB..16ED; COMMON
3682            0x16EE,   // 16EE..16FF; RUNIC
3683            0x1700,   // 1700..171F; TAGALOG
3684            0x1720,   // 1720..1734; HANUNOO
3685            0x1735,   // 1735..173F; COMMON
3686            0x1740,   // 1740..175F; BUHID
3687            0x1760,   // 1760..177F; TAGBANWA
3688            0x1780,   // 1780..17FF; KHMER
3689            0x1800,   // 1800..1801; MONGOLIAN
3690            0x1802,   // 1802..1803; COMMON
3691            0x1804,   // 1804..1804; MONGOLIAN
3692            0x1805,   // 1805..1805; COMMON
3693            0x1806,   // 1806..18AF; MONGOLIAN
3694            0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3695            0x1900,   // 1900..194F; LIMBU
3696            0x1950,   // 1950..197F; TAI_LE
3697            0x1980,   // 1980..19DF; NEW_TAI_LUE
3698            0x19E0,   // 19E0..19FF; KHMER
3699            0x1A00,   // 1A00..1A1F; BUGINESE
3700            0x1A20,   // 1A20..1AFF; TAI_THAM
3701            0x1B00,   // 1B00..1B7F; BALINESE
3702            0x1B80,   // 1B80..1BBF; SUNDANESE
3703            0x1BC0,   // 1BC0..1BFF; BATAK
3704            0x1C00,   // 1C00..1C4F; LEPCHA
3705            0x1C50,   // 1C50..1CCF; OL_CHIKI
3706            0x1CD0,   // 1CD0..1CD2; INHERITED
3707            0x1CD3,   // 1CD3..1CD3; COMMON
3708            0x1CD4,   // 1CD4..1CE0; INHERITED
3709            0x1CE1,   // 1CE1..1CE1; COMMON
3710            0x1CE2,   // 1CE2..1CE8; INHERITED
3711            0x1CE9,   // 1CE9..1CEC; COMMON
3712            0x1CED,   // 1CED..1CED; INHERITED
3713            0x1CEE,   // 1CEE..1CFF; COMMON
3714            0x1D00,   // 1D00..1D25; LATIN
3715            0x1D26,   // 1D26..1D2A; GREEK
3716            0x1D2B,   // 1D2B..1D2B; CYRILLIC
3717            0x1D2C,   // 1D2C..1D5C; LATIN
3718            0x1D5D,   // 1D5D..1D61; GREEK
3719            0x1D62,   // 1D62..1D65; LATIN
3720            0x1D66,   // 1D66..1D6A; GREEK
3721            0x1D6B,   // 1D6B..1D77; LATIN
3722            0x1D78,   // 1D78..1D78; CYRILLIC
3723            0x1D79,   // 1D79..1DBE; LATIN
3724            0x1DBF,   // 1DBF..1DBF; GREEK
3725            0x1DC0,   // 1DC0..1DFF; INHERITED
3726            0x1E00,   // 1E00..1EFF; LATIN
3727            0x1F00,   // 1F00..1FFF; GREEK
3728            0x2000,   // 2000..200B; COMMON
3729            0x200C,   // 200C..200D; INHERITED
3730            0x200E,   // 200E..2070; COMMON
3731            0x2071,   // 2071..2073; LATIN
3732            0x2074,   // 2074..207E; COMMON
3733            0x207F,   // 207F..207F; LATIN
3734            0x2080,   // 2080..208F; COMMON
3735            0x2090,   // 2090..209F; LATIN
3736            0x20A0,   // 20A0..20CF; COMMON
3737            0x20D0,   // 20D0..20FF; INHERITED
3738            0x2100,   // 2100..2125; COMMON
3739            0x2126,   // 2126..2126; GREEK
3740            0x2127,   // 2127..2129; COMMON
3741            0x212A,   // 212A..212B; LATIN
3742            0x212C,   // 212C..2131; COMMON
3743            0x2132,   // 2132..2132; LATIN
3744            0x2133,   // 2133..214D; COMMON
3745            0x214E,   // 214E..214E; LATIN
3746            0x214F,   // 214F..215F; COMMON
3747            0x2160,   // 2160..2188; LATIN
3748            0x2189,   // 2189..27FF; COMMON
3749            0x2800,   // 2800..28FF; BRAILLE
3750            0x2900,   // 2900..2BFF; COMMON
3751            0x2C00,   // 2C00..2C5F; GLAGOLITIC
3752            0x2C60,   // 2C60..2C7F; LATIN
3753            0x2C80,   // 2C80..2CFF; COPTIC
3754            0x2D00,   // 2D00..2D2F; GEORGIAN
3755            0x2D30,   // 2D30..2D7F; TIFINAGH
3756            0x2D80,   // 2D80..2DDF; ETHIOPIC
3757            0x2DE0,   // 2DE0..2DFF; CYRILLIC
3758            0x2E00,   // 2E00..2E7F; COMMON
3759            0x2E80,   // 2E80..2FEF; HAN
3760            0x2FF0,   // 2FF0..3004; COMMON
3761            0x3005,   // 3005..3005; HAN
3762            0x3006,   // 3006..3006; COMMON
3763            0x3007,   // 3007..3007; HAN
3764            0x3008,   // 3008..3020; COMMON
3765            0x3021,   // 3021..3029; HAN
3766            0x302A,   // 302A..302D; INHERITED
3767            0x302E,   // 302E..302F; HANGUL
3768            0x3030,   // 3030..3037; COMMON
3769            0x3038,   // 3038..303B; HAN
3770            0x303C,   // 303C..3040; COMMON
3771            0x3041,   // 3041..3098; HIRAGANA
3772            0x3099,   // 3099..309A; INHERITED
3773            0x309B,   // 309B..309C; COMMON
3774            0x309D,   // 309D..309F; HIRAGANA
3775            0x30A0,   // 30A0..30A0; COMMON
3776            0x30A1,   // 30A1..30FA; KATAKANA
3777            0x30FB,   // 30FB..30FC; COMMON
3778            0x30FD,   // 30FD..3104; KATAKANA
3779            0x3105,   // 3105..3130; BOPOMOFO
3780            0x3131,   // 3131..318F; HANGUL
3781            0x3190,   // 3190..319F; COMMON
3782            0x31A0,   // 31A0..31BF; BOPOMOFO
3783            0x31C0,   // 31C0..31EF; COMMON
3784            0x31F0,   // 31F0..31FF; KATAKANA
3785            0x3200,   // 3200..321F; HANGUL
3786            0x3220,   // 3220..325F; COMMON
3787            0x3260,   // 3260..327E; HANGUL
3788            0x327F,   // 327F..32CF; COMMON
3789            0x32D0,   // 32D0..3357; KATAKANA
3790            0x3358,   // 3358..33FF; COMMON
3791            0x3400,   // 3400..4DBF; HAN
3792            0x4DC0,   // 4DC0..4DFF; COMMON
3793            0x4E00,   // 4E00..9FFF; HAN
3794            0xA000,   // A000..A4CF; YI
3795            0xA4D0,   // A4D0..A4FF; LISU
3796            0xA500,   // A500..A63F; VAI
3797            0xA640,   // A640..A69F; CYRILLIC
3798            0xA6A0,   // A6A0..A6FF; BAMUM
3799            0xA700,   // A700..A721; COMMON
3800            0xA722,   // A722..A787; LATIN
3801            0xA788,   // A788..A78A; COMMON
3802            0xA78B,   // A78B..A7FF; LATIN
3803            0xA800,   // A800..A82F; SYLOTI_NAGRI
3804            0xA830,   // A830..A83F; COMMON
3805            0xA840,   // A840..A87F; PHAGS_PA
3806            0xA880,   // A880..A8DF; SAURASHTRA
3807            0xA8E0,   // A8E0..A8FF; DEVANAGARI
3808            0xA900,   // A900..A92F; KAYAH_LI
3809            0xA930,   // A930..A95F; REJANG
3810            0xA960,   // A960..A97F; HANGUL
3811            0xA980,   // A980..A9FF; JAVANESE
3812            0xAA00,   // AA00..AA5F; CHAM
3813            0xAA60,   // AA60..AA7F; MYANMAR
3814            0xAA80,   // AA80..AB00; TAI_VIET
3815            0xAB01,   // AB01..ABBF; ETHIOPIC
3816            0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3817            0xAC00,   // AC00..D7FB; HANGUL
3818            0xD7FC,   // D7FC..F8FF; UNKNOWN
3819            0xF900,   // F900..FAFF; HAN
3820            0xFB00,   // FB00..FB12; LATIN
3821            0xFB13,   // FB13..FB1C; ARMENIAN
3822            0xFB1D,   // FB1D..FB4F; HEBREW
3823            0xFB50,   // FB50..FD3D; ARABIC
3824            0xFD3E,   // FD3E..FD4F; COMMON
3825            0xFD50,   // FD50..FDFC; ARABIC
3826            0xFDFD,   // FDFD..FDFF; COMMON
3827            0xFE00,   // FE00..FE0F; INHERITED
3828            0xFE10,   // FE10..FE1F; COMMON
3829            0xFE20,   // FE20..FE2F; INHERITED
3830            0xFE30,   // FE30..FE6F; COMMON
3831            0xFE70,   // FE70..FEFE; ARABIC
3832            0xFEFF,   // FEFF..FF20; COMMON
3833            0xFF21,   // FF21..FF3A; LATIN
3834            0xFF3B,   // FF3B..FF40; COMMON
3835            0xFF41,   // FF41..FF5A; LATIN
3836            0xFF5B,   // FF5B..FF65; COMMON
3837            0xFF66,   // FF66..FF6F; KATAKANA
3838            0xFF70,   // FF70..FF70; COMMON
3839            0xFF71,   // FF71..FF9D; KATAKANA
3840            0xFF9E,   // FF9E..FF9F; COMMON
3841            0xFFA0,   // FFA0..FFDF; HANGUL
3842            0xFFE0,   // FFE0..FFFF; COMMON
3843            0x10000,  // 10000..100FF; LINEAR_B
3844            0x10100,  // 10100..1013F; COMMON
3845            0x10140,  // 10140..1018F; GREEK
3846            0x10190,  // 10190..101FC; COMMON
3847            0x101FD,  // 101FD..1027F; INHERITED
3848            0x10280,  // 10280..1029F; LYCIAN
3849            0x102A0,  // 102A0..102FF; CARIAN
3850            0x10300,  // 10300..1032F; OLD_ITALIC
3851            0x10330,  // 10330..1037F; GOTHIC
3852            0x10380,  // 10380..1039F; UGARITIC
3853            0x103A0,  // 103A0..103FF; OLD_PERSIAN
3854            0x10400,  // 10400..1044F; DESERET
3855            0x10450,  // 10450..1047F; SHAVIAN
3856            0x10480,  // 10480..107FF; OSMANYA
3857            0x10800,  // 10800..1083F; CYPRIOT
3858            0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
3859            0x10900,  // 10900..1091F; PHOENICIAN
3860            0x10920,  // 10920..109FF; LYDIAN
3861            0x10A00,  // 10A00..10A5F; KHAROSHTHI
3862            0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
3863            0x10B00,  // 10B00..10B3F; AVESTAN
3864            0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
3865            0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
3866            0x10C00,  // 10C00..10E5F; OLD_TURKIC
3867            0x10E60,  // 10E60..10FFF; ARABIC
3868            0x11000,  // 11000..1107F; BRAHMI
3869            0x11080,  // 11080..11FFF; KAITHI
3870            0x12000,  // 12000..12FFF; CUNEIFORM
3871            0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
3872            0x16800,  // 16800..16A38; BAMUM
3873            0x1B000,  // 1B000..1B000; KATAKANA
3874            0x1B001,  // 1B001..1CFFF; HIRAGANA
3875            0x1D000,  // 1D000..1D166; COMMON
3876            0x1D167,  // 1D167..1D169; INHERITED
3877            0x1D16A,  // 1D16A..1D17A; COMMON
3878            0x1D17B,  // 1D17B..1D182; INHERITED
3879            0x1D183,  // 1D183..1D184; COMMON
3880            0x1D185,  // 1D185..1D18B; INHERITED
3881            0x1D18C,  // 1D18C..1D1A9; COMMON
3882            0x1D1AA,  // 1D1AA..1D1AD; INHERITED
3883            0x1D1AE,  // 1D1AE..1D1FF; COMMON
3884            0x1D200,  // 1D200..1D2FF; GREEK
3885            0x1D300,  // 1D300..1F1FF; COMMON
3886            0x1F200,  // 1F200..1F200; HIRAGANA
3887            0x1F201,  // 1F210..1FFFF; COMMON
3888            0x20000,  // 20000..E0000; HAN
3889            0xE0001,  // E0001..E00FF; COMMON
3890            0xE0100,  // E0100..E01EF; INHERITED
3891            0xE01F0   // E01F0..10FFFF; UNKNOWN
3892
3893        };
3894
3895        private static final UnicodeScript[] scripts = {
3896            COMMON,
3897            LATIN,
3898            COMMON,
3899            LATIN,
3900            COMMON,
3901            LATIN,
3902            COMMON,
3903            LATIN,
3904            COMMON,
3905            LATIN,
3906            COMMON,
3907            LATIN,
3908            COMMON,
3909            LATIN,
3910            COMMON,
3911            LATIN,
3912            COMMON,
3913            BOPOMOFO,
3914            COMMON,
3915            INHERITED,
3916            GREEK,
3917            COMMON,
3918            GREEK,
3919            COMMON,
3920            GREEK,
3921            COMMON,
3922            GREEK,
3923            COMMON,
3924            GREEK,
3925            COPTIC,
3926            GREEK,
3927            CYRILLIC,
3928            INHERITED,
3929            CYRILLIC,
3930            ARMENIAN,
3931            COMMON,
3932            ARMENIAN,
3933            HEBREW,
3934            ARABIC,
3935            COMMON,
3936            ARABIC,
3937            COMMON,
3938            ARABIC,
3939            COMMON,
3940            ARABIC,
3941            COMMON,
3942            ARABIC,
3943            INHERITED,
3944            ARABIC,
3945            INHERITED,
3946            COMMON,
3947            ARABIC,
3948            INHERITED,
3949            ARABIC,
3950            COMMON,
3951            ARABIC,
3952            SYRIAC,
3953            ARABIC,
3954            THAANA,
3955            NKO,
3956            SAMARITAN,
3957            MANDAIC,
3958            DEVANAGARI,
3959            INHERITED,
3960            DEVANAGARI,
3961            COMMON,
3962            DEVANAGARI,
3963            COMMON,
3964            DEVANAGARI,
3965            BENGALI,
3966            GURMUKHI,
3967            GUJARATI,
3968            ORIYA,
3969            TAMIL,
3970            TELUGU,
3971            KANNADA,
3972            MALAYALAM,
3973            SINHALA,
3974            THAI,
3975            COMMON,
3976            THAI,
3977            LAO,
3978            TIBETAN,
3979            COMMON,
3980            TIBETAN,
3981            MYANMAR,
3982            GEORGIAN,
3983            COMMON,
3984            GEORGIAN,
3985            HANGUL,
3986            ETHIOPIC,
3987            CHEROKEE,
3988            CANADIAN_ABORIGINAL,
3989            OGHAM,
3990            RUNIC,
3991            COMMON,
3992            RUNIC,
3993            TAGALOG,
3994            HANUNOO,
3995            COMMON,
3996            BUHID,
3997            TAGBANWA,
3998            KHMER,
3999            MONGOLIAN,
4000            COMMON,
4001            MONGOLIAN,
4002            COMMON,
4003            MONGOLIAN,
4004            CANADIAN_ABORIGINAL,
4005            LIMBU,
4006            TAI_LE,
4007            NEW_TAI_LUE,
4008            KHMER,
4009            BUGINESE,
4010            TAI_THAM,
4011            BALINESE,
4012            SUNDANESE,
4013            BATAK,
4014            LEPCHA,
4015            OL_CHIKI,
4016            INHERITED,
4017            COMMON,
4018            INHERITED,
4019            COMMON,
4020            INHERITED,
4021            COMMON,
4022            INHERITED,
4023            COMMON,
4024            LATIN,
4025            GREEK,
4026            CYRILLIC,
4027            LATIN,
4028            GREEK,
4029            LATIN,
4030            GREEK,
4031            LATIN,
4032            CYRILLIC,
4033            LATIN,
4034            GREEK,
4035            INHERITED,
4036            LATIN,
4037            GREEK,
4038            COMMON,
4039            INHERITED,
4040            COMMON,
4041            LATIN,
4042            COMMON,
4043            LATIN,
4044            COMMON,
4045            LATIN,
4046            COMMON,
4047            INHERITED,
4048            COMMON,
4049            GREEK,
4050            COMMON,
4051            LATIN,
4052            COMMON,
4053            LATIN,
4054            COMMON,
4055            LATIN,
4056            COMMON,
4057            LATIN,
4058            COMMON,
4059            BRAILLE,
4060            COMMON,
4061            GLAGOLITIC,
4062            LATIN,
4063            COPTIC,
4064            GEORGIAN,
4065            TIFINAGH,
4066            ETHIOPIC,
4067            CYRILLIC,
4068            COMMON,
4069            HAN,
4070            COMMON,
4071            HAN,
4072            COMMON,
4073            HAN,
4074            COMMON,
4075            HAN,
4076            INHERITED,
4077            HANGUL,
4078            COMMON,
4079            HAN,
4080            COMMON,
4081            HIRAGANA,
4082            INHERITED,
4083            COMMON,
4084            HIRAGANA,
4085            COMMON,
4086            KATAKANA,
4087            COMMON,
4088            KATAKANA,
4089            BOPOMOFO,
4090            HANGUL,
4091            COMMON,
4092            BOPOMOFO,
4093            COMMON,
4094            KATAKANA,
4095            HANGUL,
4096            COMMON,
4097            HANGUL,
4098            COMMON,
4099            KATAKANA,
4100            COMMON,
4101            HAN,
4102            COMMON,
4103            HAN,
4104            YI,
4105            LISU,
4106            VAI,
4107            CYRILLIC,
4108            BAMUM,
4109            COMMON,
4110            LATIN,
4111            COMMON,
4112            LATIN,
4113            SYLOTI_NAGRI,
4114            COMMON,
4115            PHAGS_PA,
4116            SAURASHTRA,
4117            DEVANAGARI,
4118            KAYAH_LI,
4119            REJANG,
4120            HANGUL,
4121            JAVANESE,
4122            CHAM,
4123            MYANMAR,
4124            TAI_VIET,
4125            ETHIOPIC,
4126            MEETEI_MAYEK,
4127            HANGUL,
4128            UNKNOWN,
4129            HAN,
4130            LATIN,
4131            ARMENIAN,
4132            HEBREW,
4133            ARABIC,
4134            COMMON,
4135            ARABIC,
4136            COMMON,
4137            INHERITED,
4138            COMMON,
4139            INHERITED,
4140            COMMON,
4141            ARABIC,
4142            COMMON,
4143            LATIN,
4144            COMMON,
4145            LATIN,
4146            COMMON,
4147            KATAKANA,
4148            COMMON,
4149            KATAKANA,
4150            COMMON,
4151            HANGUL,
4152            COMMON,
4153            LINEAR_B,
4154            COMMON,
4155            GREEK,
4156            COMMON,
4157            INHERITED,
4158            LYCIAN,
4159            CARIAN,
4160            OLD_ITALIC,
4161            GOTHIC,
4162            UGARITIC,
4163            OLD_PERSIAN,
4164            DESERET,
4165            SHAVIAN,
4166            OSMANYA,
4167            CYPRIOT,
4168            IMPERIAL_ARAMAIC,
4169            PHOENICIAN,
4170            LYDIAN,
4171            KHAROSHTHI,
4172            OLD_SOUTH_ARABIAN,
4173            AVESTAN,
4174            INSCRIPTIONAL_PARTHIAN,
4175            INSCRIPTIONAL_PAHLAVI,
4176            OLD_TURKIC,
4177            ARABIC,
4178            BRAHMI,
4179            KAITHI,
4180            CUNEIFORM,
4181            EGYPTIAN_HIEROGLYPHS,
4182            BAMUM,
4183            KATAKANA,
4184            HIRAGANA,
4185            COMMON,
4186            INHERITED,
4187            COMMON,
4188            INHERITED,
4189            COMMON,
4190            INHERITED,
4191            COMMON,
4192            INHERITED,
4193            COMMON,
4194            GREEK,
4195            COMMON,
4196            HIRAGANA,
4197            COMMON,
4198            HAN,
4199            COMMON,
4200            INHERITED,
4201            UNKNOWN
4202        };
4203
4204        private static HashMap<String, Character.UnicodeScript> aliases;
4205        static {
4206            aliases = new HashMap<>(128);
4207            aliases.put("ARAB", ARABIC);
4208            aliases.put("ARMI", IMPERIAL_ARAMAIC);
4209            aliases.put("ARMN", ARMENIAN);
4210            aliases.put("AVST", AVESTAN);
4211            aliases.put("BALI", BALINESE);
4212            aliases.put("BAMU", BAMUM);
4213            aliases.put("BATK", BATAK);
4214            aliases.put("BENG", BENGALI);
4215            aliases.put("BOPO", BOPOMOFO);
4216            aliases.put("BRAI", BRAILLE);
4217            aliases.put("BRAH", BRAHMI);
4218            aliases.put("BUGI", BUGINESE);
4219            aliases.put("BUHD", BUHID);
4220            aliases.put("CANS", CANADIAN_ABORIGINAL);
4221            aliases.put("CARI", CARIAN);
4222            aliases.put("CHAM", CHAM);
4223            aliases.put("CHER", CHEROKEE);
4224            aliases.put("COPT", COPTIC);
4225            aliases.put("CPRT", CYPRIOT);
4226            aliases.put("CYRL", CYRILLIC);
4227            aliases.put("DEVA", DEVANAGARI);
4228            aliases.put("DSRT", DESERET);
4229            aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4230            aliases.put("ETHI", ETHIOPIC);
4231            aliases.put("GEOR", GEORGIAN);
4232            aliases.put("GLAG", GLAGOLITIC);
4233            aliases.put("GOTH", GOTHIC);
4234            aliases.put("GREK", GREEK);
4235            aliases.put("GUJR", GUJARATI);
4236            aliases.put("GURU", GURMUKHI);
4237            aliases.put("HANG", HANGUL);
4238            aliases.put("HANI", HAN);
4239            aliases.put("HANO", HANUNOO);
4240            aliases.put("HEBR", HEBREW);
4241            aliases.put("HIRA", HIRAGANA);
4242            // it appears we don't have the KATAKANA_OR_HIRAGANA
4243            //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4244            aliases.put("ITAL", OLD_ITALIC);
4245            aliases.put("JAVA", JAVANESE);
4246            aliases.put("KALI", KAYAH_LI);
4247            aliases.put("KANA", KATAKANA);
4248            aliases.put("KHAR", KHAROSHTHI);
4249            aliases.put("KHMR", KHMER);
4250            aliases.put("KNDA", KANNADA);
4251            aliases.put("KTHI", KAITHI);
4252            aliases.put("LANA", TAI_THAM);
4253            aliases.put("LAOO", LAO);
4254            aliases.put("LATN", LATIN);
4255            aliases.put("LEPC", LEPCHA);
4256            aliases.put("LIMB", LIMBU);
4257            aliases.put("LINB", LINEAR_B);
4258            aliases.put("LISU", LISU);
4259            aliases.put("LYCI", LYCIAN);
4260            aliases.put("LYDI", LYDIAN);
4261            aliases.put("MAND", MANDAIC);
4262            aliases.put("MLYM", MALAYALAM);
4263            aliases.put("MONG", MONGOLIAN);
4264            aliases.put("MTEI", MEETEI_MAYEK);
4265            aliases.put("MYMR", MYANMAR);
4266            aliases.put("NKOO", NKO);
4267            aliases.put("OGAM", OGHAM);
4268            aliases.put("OLCK", OL_CHIKI);
4269            aliases.put("ORKH", OLD_TURKIC);
4270            aliases.put("ORYA", ORIYA);
4271            aliases.put("OSMA", OSMANYA);
4272            aliases.put("PHAG", PHAGS_PA);
4273            aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4274            aliases.put("PHNX", PHOENICIAN);
4275            aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4276            aliases.put("RJNG", REJANG);
4277            aliases.put("RUNR", RUNIC);
4278            aliases.put("SAMR", SAMARITAN);
4279            aliases.put("SARB", OLD_SOUTH_ARABIAN);
4280            aliases.put("SAUR", SAURASHTRA);
4281            aliases.put("SHAW", SHAVIAN);
4282            aliases.put("SINH", SINHALA);
4283            aliases.put("SUND", SUNDANESE);
4284            aliases.put("SYLO", SYLOTI_NAGRI);
4285            aliases.put("SYRC", SYRIAC);
4286            aliases.put("TAGB", TAGBANWA);
4287            aliases.put("TALE", TAI_LE);
4288            aliases.put("TALU", NEW_TAI_LUE);
4289            aliases.put("TAML", TAMIL);
4290            aliases.put("TAVT", TAI_VIET);
4291            aliases.put("TELU", TELUGU);
4292            aliases.put("TFNG", TIFINAGH);
4293            aliases.put("TGLG", TAGALOG);
4294            aliases.put("THAA", THAANA);
4295            aliases.put("THAI", THAI);
4296            aliases.put("TIBT", TIBETAN);
4297            aliases.put("UGAR", UGARITIC);
4298            aliases.put("VAII", VAI);
4299            aliases.put("XPEO", OLD_PERSIAN);
4300            aliases.put("XSUX", CUNEIFORM);
4301            aliases.put("YIII", YI);
4302            aliases.put("ZINH", INHERITED);
4303            aliases.put("ZYYY", COMMON);
4304            aliases.put("ZZZZ", UNKNOWN);
4305        }
4306
4307        /**
4308         * Returns the enum constant representing the Unicode script of which
4309         * the given character (Unicode code point) is assigned to.
4310         *
4311         * @param   codePoint the character (Unicode code point) in question.
4312         * @return  The {@code UnicodeScript} constant representing the
4313         *          Unicode script of which this character is assigned to.
4314         *
4315         * @exception IllegalArgumentException if the specified
4316         * {@code codePoint} is an invalid Unicode code point.
4317         * @see Character#isValidCodePoint(int)
4318         *
4319         */
4320        public static UnicodeScript of(int codePoint) {
4321            if (!isValidCodePoint(codePoint))
4322                throw new IllegalArgumentException();
4323            int type = getType(codePoint);
4324            // leave SURROGATE and PRIVATE_USE for table lookup
4325            if (type == UNASSIGNED)
4326                return UNKNOWN;
4327            int index = Arrays.binarySearch(scriptStarts, codePoint);
4328            if (index < 0)
4329                index = -index - 2;
4330            return scripts[index];
4331        }
4332
4333        /**
4334         * Returns the UnicodeScript constant with the given Unicode script
4335         * name or the script name alias. Script names and their aliases are
4336         * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4337         * and PropertyValueAliases&lt;version&gt;.txt define script names
4338         * and the script name aliases for a particular version of the
4339         * standard. The {@link Character} class specifies the version of
4340         * the standard that it supports.
4341         * <p>
4342         * Character case is ignored for all of the valid script names.
4343         * The en_US locale's case mapping rules are used to provide
4344         * case-insensitive string comparisons for script name validation.
4345         * <p>
4346         *
4347         * @param scriptName A {@code UnicodeScript} name.
4348         * @return The {@code UnicodeScript} constant identified
4349         *         by {@code scriptName}
4350         * @throws IllegalArgumentException if {@code scriptName} is an
4351         *         invalid name
4352         * @throws NullPointerException if {@code scriptName} is null
4353         */
4354        public static final UnicodeScript forName(String scriptName) {
4355            scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4356                                 //.replace(' ', '_'));
4357            UnicodeScript sc = aliases.get(scriptName);
4358            if (sc != null)
4359                return sc;
4360            return valueOf(scriptName);
4361        }
4362    }
4363
4364    /**
4365     * The value of the {@code Character}.
4366     *
4367     * @serial
4368     */
4369    private final char value;
4370
4371    /** use serialVersionUID from JDK 1.0.2 for interoperability */
4372    private static final long serialVersionUID = 3786198910865385080L;
4373
4374    /**
4375     * Constructs a newly allocated {@code Character} object that
4376     * represents the specified {@code char} value.
4377     *
4378     * @param  value   the value to be represented by the
4379     *                  {@code Character} object.
4380     */
4381    public Character(char value) {
4382        this.value = value;
4383    }
4384
4385    private static class CharacterCache {
4386        private CharacterCache(){}
4387
4388        static final Character cache[] = new Character[127 + 1];
4389
4390        static {
4391            for (int i = 0; i < cache.length; i++)
4392                cache[i] = new Character((char)i);
4393        }
4394    }
4395
4396    /**
4397     * Returns a <tt>Character</tt> instance representing the specified
4398     * <tt>char</tt> value.
4399     * If a new <tt>Character</tt> instance is not required, this method
4400     * should generally be used in preference to the constructor
4401     * {@link #Character(char)}, as this method is likely to yield
4402     * significantly better space and time performance by caching
4403     * frequently requested values.
4404     *
4405     * This method will always cache values in the range {@code
4406     * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4407     * cache other values outside of this range.
4408     *
4409     * @param  c a char value.
4410     * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4411     * @since  1.5
4412     */
4413    public static Character valueOf(char c) {
4414        if (c <= 127) { // must cache
4415            return CharacterCache.cache[(int)c];
4416        }
4417        return new Character(c);
4418    }
4419
4420    /**
4421     * Returns the value of this {@code Character} object.
4422     * @return  the primitive {@code char} value represented by
4423     *          this object.
4424     */
4425    public char charValue() {
4426        return value;
4427    }
4428
4429    /**
4430     * Returns a hash code for this {@code Character}; equal to the result
4431     * of invoking {@code charValue()}.
4432     *
4433     * @return a hash code value for this {@code Character}
4434     */
4435    public int hashCode() {
4436        return Character.hashCode(value);
4437    }
4438
4439    /**
4440     * Returns a hash code for a {@code char} value; compatible with
4441     * {@code Character.hashCode()}.
4442     *
4443     * @since 1.8
4444     *
4445     * @param value The {@code char} for which to return a hash code.
4446     * @return a hash code value for a {@code char} value.
4447     */
4448    public static int hashCode(char value) {
4449        return (int)value;
4450    }
4451
4452    /**
4453     * Compares this object against the specified object.
4454     * The result is {@code true} if and only if the argument is not
4455     * {@code null} and is a {@code Character} object that
4456     * represents the same {@code char} value as this object.
4457     *
4458     * @param   obj   the object to compare with.
4459     * @return  {@code true} if the objects are the same;
4460     *          {@code false} otherwise.
4461     */
4462    public boolean equals(Object obj) {
4463        if (obj instanceof Character) {
4464            return value == ((Character)obj).charValue();
4465        }
4466        return false;
4467    }
4468
4469    /**
4470     * Returns a {@code String} object representing this
4471     * {@code Character}'s value.  The result is a string of
4472     * length 1 whose sole component is the primitive
4473     * {@code char} value represented by this
4474     * {@code Character} object.
4475     *
4476     * @return  a string representation of this object.
4477     */
4478    public String toString() {
4479        char buf[] = {value};
4480        return String.valueOf(buf);
4481    }
4482
4483    /**
4484     * Returns a {@code String} object representing the
4485     * specified {@code char}.  The result is a string of length
4486     * 1 consisting solely of the specified {@code char}.
4487     *
4488     * @param c the {@code char} to be converted
4489     * @return the string representation of the specified {@code char}
4490     * @since 1.4
4491     */
4492    public static String toString(char c) {
4493        return String.valueOf(c);
4494    }
4495
4496    /**
4497     * Determines whether the specified code point is a valid
4498     * <a href="http://www.unicode.org/glossary/#code_point">
4499     * Unicode code point value</a>.
4500     *
4501     * @param  codePoint the Unicode code point to be tested
4502     * @return {@code true} if the specified code point value is between
4503     *         {@link #MIN_CODE_POINT} and
4504     *         {@link #MAX_CODE_POINT} inclusive;
4505     *         {@code false} otherwise.
4506     * @since  1.5
4507     */
4508    public static boolean isValidCodePoint(int codePoint) {
4509        // Optimized form of:
4510        //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4511        int plane = codePoint >>> 16;
4512        return plane < ((MAX_CODE_POINT + 1) >>> 16);
4513    }
4514
4515    /**
4516     * Determines whether the specified character (Unicode code point)
4517     * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4518     * Such code points can be represented using a single {@code char}.
4519     *
4520     * @param  codePoint the character (Unicode code point) to be tested
4521     * @return {@code true} if the specified code point is between
4522     *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4523     *         {@code false} otherwise.
4524     * @since  1.7
4525     */
4526    public static boolean isBmpCodePoint(int codePoint) {
4527        return codePoint >>> 16 == 0;
4528        // Optimized form of:
4529        //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4530        // We consistently use logical shift (>>>) to facilitate
4531        // additional runtime optimizations.
4532    }
4533
4534    /**
4535     * Determines whether the specified character (Unicode code point)
4536     * is in the <a href="#supplementary">supplementary character</a> range.
4537     *
4538     * @param  codePoint the character (Unicode code point) to be tested
4539     * @return {@code true} if the specified code point is between
4540     *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4541     *         {@link #MAX_CODE_POINT} inclusive;
4542     *         {@code false} otherwise.
4543     * @since  1.5
4544     */
4545    public static boolean isSupplementaryCodePoint(int codePoint) {
4546        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4547            && codePoint <  MAX_CODE_POINT + 1;
4548    }
4549
4550    /**
4551     * Determines if the given {@code char} value is a
4552     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4553     * Unicode high-surrogate code unit</a>
4554     * (also known as <i>leading-surrogate code unit</i>).
4555     *
4556     * <p>Such values do not represent characters by themselves,
4557     * but are used in the representation of
4558     * <a href="#supplementary">supplementary characters</a>
4559     * in the UTF-16 encoding.
4560     *
4561     * @param  ch the {@code char} value to be tested.
4562     * @return {@code true} if the {@code char} value is between
4563     *         {@link #MIN_HIGH_SURROGATE} and
4564     *         {@link #MAX_HIGH_SURROGATE} inclusive;
4565     *         {@code false} otherwise.
4566     * @see    Character#isLowSurrogate(char)
4567     * @see    Character.UnicodeBlock#of(int)
4568     * @since  1.5
4569     */
4570    public static boolean isHighSurrogate(char ch) {
4571        // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4572        return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4573    }
4574
4575    /**
4576     * Determines if the given {@code char} value is a
4577     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4578     * Unicode low-surrogate code unit</a>
4579     * (also known as <i>trailing-surrogate code unit</i>).
4580     *
4581     * <p>Such values do not represent characters by themselves,
4582     * but are used in the representation of
4583     * <a href="#supplementary">supplementary characters</a>
4584     * in the UTF-16 encoding.
4585     *
4586     * @param  ch the {@code char} value to be tested.
4587     * @return {@code true} if the {@code char} value is between
4588     *         {@link #MIN_LOW_SURROGATE} and
4589     *         {@link #MAX_LOW_SURROGATE} inclusive;
4590     *         {@code false} otherwise.
4591     * @see    Character#isHighSurrogate(char)
4592     * @since  1.5
4593     */
4594    public static boolean isLowSurrogate(char ch) {
4595        return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4596    }
4597
4598    /**
4599     * Determines if the given {@code char} value is a Unicode
4600     * <i>surrogate code unit</i>.
4601     *
4602     * <p>Such values do not represent characters by themselves,
4603     * but are used in the representation of
4604     * <a href="#supplementary">supplementary characters</a>
4605     * in the UTF-16 encoding.
4606     *
4607     * <p>A char value is a surrogate code unit if and only if it is either
4608     * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4609     * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4610     *
4611     * @param  ch the {@code char} value to be tested.
4612     * @return {@code true} if the {@code char} value is between
4613     *         {@link #MIN_SURROGATE} and
4614     *         {@link #MAX_SURROGATE} inclusive;
4615     *         {@code false} otherwise.
4616     * @since  1.7
4617     */
4618    public static boolean isSurrogate(char ch) {
4619        return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4620    }
4621
4622    /**
4623     * Determines whether the specified pair of {@code char}
4624     * values is a valid
4625     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4626     * Unicode surrogate pair</a>.
4627
4628     * <p>This method is equivalent to the expression:
4629     * <blockquote><pre>
4630     * isHighSurrogate(high) && isLowSurrogate(low)
4631     * </pre></blockquote>
4632     *
4633     * @param  high the high-surrogate code value to be tested
4634     * @param  low the low-surrogate code value to be tested
4635     * @return {@code true} if the specified high and
4636     * low-surrogate code values represent a valid surrogate pair;
4637     * {@code false} otherwise.
4638     * @since  1.5
4639     */
4640    public static boolean isSurrogatePair(char high, char low) {
4641        return isHighSurrogate(high) && isLowSurrogate(low);
4642    }
4643
4644    /**
4645     * Determines the number of {@code char} values needed to
4646     * represent the specified character (Unicode code point). If the
4647     * specified character is equal to or greater than 0x10000, then
4648     * the method returns 2. Otherwise, the method returns 1.
4649     *
4650     * <p>This method doesn't validate the specified character to be a
4651     * valid Unicode code point. The caller must validate the
4652     * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4653     * if necessary.
4654     *
4655     * @param   codePoint the character (Unicode code point) to be tested.
4656     * @return  2 if the character is a valid supplementary character; 1 otherwise.
4657     * @see     Character#isSupplementaryCodePoint(int)
4658     * @since   1.5
4659     */
4660    public static int charCount(int codePoint) {
4661        return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4662    }
4663
4664    /**
4665     * Converts the specified surrogate pair to its supplementary code
4666     * point value. This method does not validate the specified
4667     * surrogate pair. The caller must validate it using {@link
4668     * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4669     *
4670     * @param  high the high-surrogate code unit
4671     * @param  low the low-surrogate code unit
4672     * @return the supplementary code point composed from the
4673     *         specified surrogate pair.
4674     * @since  1.5
4675     */
4676    public static int toCodePoint(char high, char low) {
4677        // Optimized form of:
4678        // return ((high - MIN_HIGH_SURROGATE) << 10)
4679        //         + (low - MIN_LOW_SURROGATE)
4680        //         + MIN_SUPPLEMENTARY_CODE_POINT;
4681        return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4682                                       - (MIN_HIGH_SURROGATE << 10)
4683                                       - MIN_LOW_SURROGATE);
4684    }
4685
4686    /**
4687     * Returns the code point at the given index of the
4688     * {@code CharSequence}. If the {@code char} value at
4689     * the given index in the {@code CharSequence} is in the
4690     * high-surrogate range, the following index is less than the
4691     * length of the {@code CharSequence}, and the
4692     * {@code char} value at the following index is in the
4693     * low-surrogate range, then the supplementary code point
4694     * corresponding to this surrogate pair is returned. Otherwise,
4695     * the {@code char} value at the given index is returned.
4696     *
4697     * @param seq a sequence of {@code char} values (Unicode code
4698     * units)
4699     * @param index the index to the {@code char} values (Unicode
4700     * code units) in {@code seq} to be converted
4701     * @return the Unicode code point at the given index
4702     * @exception NullPointerException if {@code seq} is null.
4703     * @exception IndexOutOfBoundsException if the value
4704     * {@code index} is negative or not less than
4705     * {@link CharSequence#length() seq.length()}.
4706     * @since  1.5
4707     */
4708    public static int codePointAt(CharSequence seq, int index) {
4709        char c1 = seq.charAt(index++);
4710        if (isHighSurrogate(c1)) {
4711            if (index < seq.length()) {
4712                char c2 = seq.charAt(index);
4713                if (isLowSurrogate(c2)) {
4714                    return toCodePoint(c1, c2);
4715                }
4716            }
4717        }
4718        return c1;
4719    }
4720
4721    /**
4722     * Returns the code point at the given index of the
4723     * {@code char} array. If the {@code char} value at
4724     * the given index in the {@code char} array is in the
4725     * high-surrogate range, the following index is less than the
4726     * length of the {@code char} array, and the
4727     * {@code char} value at the following index is in the
4728     * low-surrogate range, then the supplementary code point
4729     * corresponding to this surrogate pair is returned. Otherwise,
4730     * the {@code char} value at the given index is returned.
4731     *
4732     * @param a the {@code char} array
4733     * @param index the index to the {@code char} values (Unicode
4734     * code units) in the {@code char} array to be converted
4735     * @return the Unicode code point at the given index
4736     * @exception NullPointerException if {@code a} is null.
4737     * @exception IndexOutOfBoundsException if the value
4738     * {@code index} is negative or not less than
4739     * the length of the {@code char} array.
4740     * @since  1.5
4741     */
4742    public static int codePointAt(char[] a, int index) {
4743        return codePointAtImpl(a, index, a.length);
4744    }
4745
4746    /**
4747     * Returns the code point at the given index of the
4748     * {@code char} array, where only array elements with
4749     * {@code index} less than {@code limit} can be used. If
4750     * the {@code char} value at the given index in the
4751     * {@code char} array is in the high-surrogate range, the
4752     * following index is less than the {@code limit}, and the
4753     * {@code char} value at the following index is in the
4754     * low-surrogate range, then the supplementary code point
4755     * corresponding to this surrogate pair is returned. Otherwise,
4756     * the {@code char} value at the given index is returned.
4757     *
4758     * @param a the {@code char} array
4759     * @param index the index to the {@code char} values (Unicode
4760     * code units) in the {@code char} array to be converted
4761     * @param limit the index after the last array element that
4762     * can be used in the {@code char} array
4763     * @return the Unicode code point at the given index
4764     * @exception NullPointerException if {@code a} is null.
4765     * @exception IndexOutOfBoundsException if the {@code index}
4766     * argument is negative or not less than the {@code limit}
4767     * argument, or if the {@code limit} argument is negative or
4768     * greater than the length of the {@code char} array.
4769     * @since  1.5
4770     */
4771    public static int codePointAt(char[] a, int index, int limit) {
4772        if (index >= limit || limit < 0 || limit > a.length) {
4773            throw new IndexOutOfBoundsException();
4774        }
4775        return codePointAtImpl(a, index, limit);
4776    }
4777
4778    // throws ArrayIndexOutofBoundsException if index out of bounds
4779    static int codePointAtImpl(char[] a, int index, int limit) {
4780        char c1 = a[index++];
4781        if (isHighSurrogate(c1)) {
4782            if (index < limit) {
4783                char c2 = a[index];
4784                if (isLowSurrogate(c2)) {
4785                    return toCodePoint(c1, c2);
4786                }
4787            }
4788        }
4789        return c1;
4790    }
4791
4792    /**
4793     * Returns the code point preceding the given index of the
4794     * {@code CharSequence}. If the {@code char} value at
4795     * {@code (index - 1)} in the {@code CharSequence} is in
4796     * the low-surrogate range, {@code (index - 2)} is not
4797     * negative, and the {@code char} value at {@code (index - 2)}
4798     * in the {@code CharSequence} is in the
4799     * high-surrogate range, then the supplementary code point
4800     * corresponding to this surrogate pair is returned. Otherwise,
4801     * the {@code char} value at {@code (index - 1)} is
4802     * returned.
4803     *
4804     * @param seq the {@code CharSequence} instance
4805     * @param index the index following the code point that should be returned
4806     * @return the Unicode code point value before the given index.
4807     * @exception NullPointerException if {@code seq} is null.
4808     * @exception IndexOutOfBoundsException if the {@code index}
4809     * argument is less than 1 or greater than {@link
4810     * CharSequence#length() seq.length()}.
4811     * @since  1.5
4812     */
4813    public static int codePointBefore(CharSequence seq, int index) {
4814        char c2 = seq.charAt(--index);
4815        if (isLowSurrogate(c2)) {
4816            if (index > 0) {
4817                char c1 = seq.charAt(--index);
4818                if (isHighSurrogate(c1)) {
4819                    return toCodePoint(c1, c2);
4820                }
4821            }
4822        }
4823        return c2;
4824    }
4825
4826    /**
4827     * Returns the code point preceding the given index of the
4828     * {@code char} array. If the {@code char} value at
4829     * {@code (index - 1)} in the {@code char} array is in
4830     * the low-surrogate range, {@code (index - 2)} is not
4831     * negative, and the {@code char} value at {@code (index - 2)}
4832     * in the {@code char} array is in the
4833     * high-surrogate range, then the supplementary code point
4834     * corresponding to this surrogate pair is returned. Otherwise,
4835     * the {@code char} value at {@code (index - 1)} is
4836     * returned.
4837     *
4838     * @param a the {@code char} array
4839     * @param index the index following the code point that should be returned
4840     * @return the Unicode code point value before the given index.
4841     * @exception NullPointerException if {@code a} is null.
4842     * @exception IndexOutOfBoundsException if the {@code index}
4843     * argument is less than 1 or greater than the length of the
4844     * {@code char} array
4845     * @since  1.5
4846     */
4847    public static int codePointBefore(char[] a, int index) {
4848        return codePointBeforeImpl(a, index, 0);
4849    }
4850
4851    /**
4852     * Returns the code point preceding the given index of the
4853     * {@code char} array, where only array elements with
4854     * {@code index} greater than or equal to {@code start}
4855     * can be used. If the {@code char} value at {@code (index - 1)}
4856     * in the {@code char} array is in the
4857     * low-surrogate range, {@code (index - 2)} is not less than
4858     * {@code start}, and the {@code char} value at
4859     * {@code (index - 2)} in the {@code char} array is in
4860     * the high-surrogate range, then the supplementary code point
4861     * corresponding to this surrogate pair is returned. Otherwise,
4862     * the {@code char} value at {@code (index - 1)} is
4863     * returned.
4864     *
4865     * @param a the {@code char} array
4866     * @param index the index following the code point that should be returned
4867     * @param start the index of the first array element in the
4868     * {@code char} array
4869     * @return the Unicode code point value before the given index.
4870     * @exception NullPointerException if {@code a} is null.
4871     * @exception IndexOutOfBoundsException if the {@code index}
4872     * argument is not greater than the {@code start} argument or
4873     * is greater than the length of the {@code char} array, or
4874     * if the {@code start} argument is negative or not less than
4875     * the length of the {@code char} array.
4876     * @since  1.5
4877     */
4878    public static int codePointBefore(char[] a, int index, int start) {
4879        if (index <= start || start < 0 || start >= a.length) {
4880            throw new IndexOutOfBoundsException();
4881        }
4882        return codePointBeforeImpl(a, index, start);
4883    }
4884
4885    // throws ArrayIndexOutofBoundsException if index-1 out of bounds
4886    static int codePointBeforeImpl(char[] a, int index, int start) {
4887        char c2 = a[--index];
4888        if (isLowSurrogate(c2)) {
4889            if (index > start) {
4890                char c1 = a[--index];
4891                if (isHighSurrogate(c1)) {
4892                    return toCodePoint(c1, c2);
4893                }
4894            }
4895        }
4896        return c2;
4897    }
4898
4899    /**
4900     * Returns the leading surrogate (a
4901     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4902     * high surrogate code unit</a>) of the
4903     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4904     * surrogate pair</a>
4905     * representing the specified supplementary character (Unicode
4906     * code point) in the UTF-16 encoding.  If the specified character
4907     * is not a
4908     * <a href="Character.html#supplementary">supplementary character</a>,
4909     * an unspecified {@code char} is returned.
4910     *
4911     * <p>If
4912     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
4913     * is {@code true}, then
4914     * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
4915     * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
4916     * are also always {@code true}.
4917     *
4918     * @param   codePoint a supplementary character (Unicode code point)
4919     * @return  the leading surrogate code unit used to represent the
4920     *          character in the UTF-16 encoding
4921     * @since   1.7
4922     */
4923    public static char highSurrogate(int codePoint) {
4924        return (char) ((codePoint >>> 10)
4925            + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
4926    }
4927
4928    /**
4929     * Returns the trailing surrogate (a
4930     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4931     * low surrogate code unit</a>) of the
4932     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4933     * surrogate pair</a>
4934     * representing the specified supplementary character (Unicode
4935     * code point) in the UTF-16 encoding.  If the specified character
4936     * is not a
4937     * <a href="Character.html#supplementary">supplementary character</a>,
4938     * an unspecified {@code char} is returned.
4939     *
4940     * <p>If
4941     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
4942     * is {@code true}, then
4943     * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
4944     * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
4945     * are also always {@code true}.
4946     *
4947     * @param   codePoint a supplementary character (Unicode code point)
4948     * @return  the trailing surrogate code unit used to represent the
4949     *          character in the UTF-16 encoding
4950     * @since   1.7
4951     */
4952    public static char lowSurrogate(int codePoint) {
4953        return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
4954    }
4955
4956    /**
4957     * Converts the specified character (Unicode code point) to its
4958     * UTF-16 representation. If the specified code point is a BMP
4959     * (Basic Multilingual Plane or Plane 0) value, the same value is
4960     * stored in {@code dst[dstIndex]}, and 1 is returned. If the
4961     * specified code point is a supplementary character, its
4962     * surrogate values are stored in {@code dst[dstIndex]}
4963     * (high-surrogate) and {@code dst[dstIndex+1]}
4964     * (low-surrogate), and 2 is returned.
4965     *
4966     * @param  codePoint the character (Unicode code point) to be converted.
4967     * @param  dst an array of {@code char} in which the
4968     * {@code codePoint}'s UTF-16 value is stored.
4969     * @param dstIndex the start index into the {@code dst}
4970     * array where the converted value is stored.
4971     * @return 1 if the code point is a BMP code point, 2 if the
4972     * code point is a supplementary code point.
4973     * @exception IllegalArgumentException if the specified
4974     * {@code codePoint} is not a valid Unicode code point.
4975     * @exception NullPointerException if the specified {@code dst} is null.
4976     * @exception IndexOutOfBoundsException if {@code dstIndex}
4977     * is negative or not less than {@code dst.length}, or if
4978     * {@code dst} at {@code dstIndex} doesn't have enough
4979     * array element(s) to store the resulting {@code char}
4980     * value(s). (If {@code dstIndex} is equal to
4981     * {@code dst.length-1} and the specified
4982     * {@code codePoint} is a supplementary character, the
4983     * high-surrogate value is not stored in
4984     * {@code dst[dstIndex]}.)
4985     * @since  1.5
4986     */
4987    public static int toChars(int codePoint, char[] dst, int dstIndex) {
4988        if (isBmpCodePoint(codePoint)) {
4989            dst[dstIndex] = (char) codePoint;
4990            return 1;
4991        } else if (isValidCodePoint(codePoint)) {
4992            toSurrogates(codePoint, dst, dstIndex);
4993            return 2;
4994        } else {
4995            throw new IllegalArgumentException();
4996        }
4997    }
4998
4999    /**
5000     * Converts the specified character (Unicode code point) to its
5001     * UTF-16 representation stored in a {@code char} array. If
5002     * the specified code point is a BMP (Basic Multilingual Plane or
5003     * Plane 0) value, the resulting {@code char} array has
5004     * the same value as {@code codePoint}. If the specified code
5005     * point is a supplementary code point, the resulting
5006     * {@code char} array has the corresponding surrogate pair.
5007     *
5008     * @param  codePoint a Unicode code point
5009     * @return a {@code char} array having
5010     *         {@code codePoint}'s UTF-16 representation.
5011     * @exception IllegalArgumentException if the specified
5012     * {@code codePoint} is not a valid Unicode code point.
5013     * @since  1.5
5014     */
5015    public static char[] toChars(int codePoint) {
5016        if (isBmpCodePoint(codePoint)) {
5017            return new char[] { (char) codePoint };
5018        } else if (isValidCodePoint(codePoint)) {
5019            char[] result = new char[2];
5020            toSurrogates(codePoint, result, 0);
5021            return result;
5022        } else {
5023            throw new IllegalArgumentException();
5024        }
5025    }
5026
5027    static void toSurrogates(int codePoint, char[] dst, int index) {
5028        // We write elements "backwards" to guarantee all-or-nothing
5029        dst[index+1] = lowSurrogate(codePoint);
5030        dst[index] = highSurrogate(codePoint);
5031    }
5032
5033    /**
5034     * Returns the number of Unicode code points in the text range of
5035     * the specified char sequence. The text range begins at the
5036     * specified {@code beginIndex} and extends to the
5037     * {@code char} at index {@code endIndex - 1}. Thus the
5038     * length (in {@code char}s) of the text range is
5039     * {@code endIndex-beginIndex}. Unpaired surrogates within
5040     * the text range count as one code point each.
5041     *
5042     * @param seq the char sequence
5043     * @param beginIndex the index to the first {@code char} of
5044     * the text range.
5045     * @param endIndex the index after the last {@code char} of
5046     * the text range.
5047     * @return the number of Unicode code points in the specified text
5048     * range
5049     * @exception NullPointerException if {@code seq} is null.
5050     * @exception IndexOutOfBoundsException if the
5051     * {@code beginIndex} is negative, or {@code endIndex}
5052     * is larger than the length of the given sequence, or
5053     * {@code beginIndex} is larger than {@code endIndex}.
5054     * @since  1.5
5055     */
5056    public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5057        int length = seq.length();
5058        if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5059            throw new IndexOutOfBoundsException();
5060        }
5061        int n = endIndex - beginIndex;
5062        for (int i = beginIndex; i < endIndex; ) {
5063            if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5064                isLowSurrogate(seq.charAt(i))) {
5065                n--;
5066                i++;
5067            }
5068        }
5069        return n;
5070    }
5071
5072    /**
5073     * Returns the number of Unicode code points in a subarray of the
5074     * {@code char} array argument. The {@code offset}
5075     * argument is the index of the first {@code char} of the
5076     * subarray and the {@code count} argument specifies the
5077     * length of the subarray in {@code char}s. Unpaired
5078     * surrogates within the subarray count as one code point each.
5079     *
5080     * @param a the {@code char} array
5081     * @param offset the index of the first {@code char} in the
5082     * given {@code char} array
5083     * @param count the length of the subarray in {@code char}s
5084     * @return the number of Unicode code points in the specified subarray
5085     * @exception NullPointerException if {@code a} is null.
5086     * @exception IndexOutOfBoundsException if {@code offset} or
5087     * {@code count} is negative, or if {@code offset +
5088     * count} is larger than the length of the given array.
5089     * @since  1.5
5090     */
5091    public static int codePointCount(char[] a, int offset, int count) {
5092        if (count > a.length - offset || offset < 0 || count < 0) {
5093            throw new IndexOutOfBoundsException();
5094        }
5095        return codePointCountImpl(a, offset, count);
5096    }
5097
5098    static int codePointCountImpl(char[] a, int offset, int count) {
5099        int endIndex = offset + count;
5100        int n = count;
5101        for (int i = offset; i < endIndex; ) {
5102            if (isHighSurrogate(a[i++]) && i < endIndex &&
5103                isLowSurrogate(a[i])) {
5104                n--;
5105                i++;
5106            }
5107        }
5108        return n;
5109    }
5110
5111    /**
5112     * Returns the index within the given char sequence that is offset
5113     * from the given {@code index} by {@code codePointOffset}
5114     * code points. Unpaired surrogates within the text range given by
5115     * {@code index} and {@code codePointOffset} count as
5116     * one code point each.
5117     *
5118     * @param seq the char sequence
5119     * @param index the index to be offset
5120     * @param codePointOffset the offset in code points
5121     * @return the index within the char sequence
5122     * @exception NullPointerException if {@code seq} is null.
5123     * @exception IndexOutOfBoundsException if {@code index}
5124     *   is negative or larger then the length of the char sequence,
5125     *   or if {@code codePointOffset} is positive and the
5126     *   subsequence starting with {@code index} has fewer than
5127     *   {@code codePointOffset} code points, or if
5128     *   {@code codePointOffset} is negative and the subsequence
5129     *   before {@code index} has fewer than the absolute value
5130     *   of {@code codePointOffset} code points.
5131     * @since 1.5
5132     */
5133    public static int offsetByCodePoints(CharSequence seq, int index,
5134                                         int codePointOffset) {
5135        int length = seq.length();
5136        if (index < 0 || index > length) {
5137            throw new IndexOutOfBoundsException();
5138        }
5139
5140        int x = index;
5141        if (codePointOffset >= 0) {
5142            int i;
5143            for (i = 0; x < length && i < codePointOffset; i++) {
5144                if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5145                    isLowSurrogate(seq.charAt(x))) {
5146                    x++;
5147                }
5148            }
5149            if (i < codePointOffset) {
5150                throw new IndexOutOfBoundsException();
5151            }
5152        } else {
5153            int i;
5154            for (i = codePointOffset; x > 0 && i < 0; i++) {
5155                if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5156                    isHighSurrogate(seq.charAt(x-1))) {
5157                    x--;
5158                }
5159            }
5160            if (i < 0) {
5161                throw new IndexOutOfBoundsException();
5162            }
5163        }
5164        return x;
5165    }
5166
5167    /**
5168     * Returns the index within the given {@code char} subarray
5169     * that is offset from the given {@code index} by
5170     * {@code codePointOffset} code points. The
5171     * {@code start} and {@code count} arguments specify a
5172     * subarray of the {@code char} array. Unpaired surrogates
5173     * within the text range given by {@code index} and
5174     * {@code codePointOffset} count as one code point each.
5175     *
5176     * @param a the {@code char} array
5177     * @param start the index of the first {@code char} of the
5178     * subarray
5179     * @param count the length of the subarray in {@code char}s
5180     * @param index the index to be offset
5181     * @param codePointOffset the offset in code points
5182     * @return the index within the subarray
5183     * @exception NullPointerException if {@code a} is null.
5184     * @exception IndexOutOfBoundsException
5185     *   if {@code start} or {@code count} is negative,
5186     *   or if {@code start + count} is larger than the length of
5187     *   the given array,
5188     *   or if {@code index} is less than {@code start} or
5189     *   larger then {@code start + count},
5190     *   or if {@code codePointOffset} is positive and the text range
5191     *   starting with {@code index} and ending with {@code start + count - 1}
5192     *   has fewer than {@code codePointOffset} code
5193     *   points,
5194     *   or if {@code codePointOffset} is negative and the text range
5195     *   starting with {@code start} and ending with {@code index - 1}
5196     *   has fewer than the absolute value of
5197     *   {@code codePointOffset} code points.
5198     * @since 1.5
5199     */
5200    public static int offsetByCodePoints(char[] a, int start, int count,
5201                                         int index, int codePointOffset) {
5202        if (count > a.length-start || start < 0 || count < 0
5203            || index < start || index > start+count) {
5204            throw new IndexOutOfBoundsException();
5205        }
5206        return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5207    }
5208
5209    static int offsetByCodePointsImpl(char[]a, int start, int count,
5210                                      int index, int codePointOffset) {
5211        int x = index;
5212        if (codePointOffset >= 0) {
5213            int limit = start + count;
5214            int i;
5215            for (i = 0; x < limit && i < codePointOffset; i++) {
5216                if (isHighSurrogate(a[x++]) && x < limit &&
5217                    isLowSurrogate(a[x])) {
5218                    x++;
5219                }
5220            }
5221            if (i < codePointOffset) {
5222                throw new IndexOutOfBoundsException();
5223            }
5224        } else {
5225            int i;
5226            for (i = codePointOffset; x > start && i < 0; i++) {
5227                if (isLowSurrogate(a[--x]) && x > start &&
5228                    isHighSurrogate(a[x-1])) {
5229                    x--;
5230                }
5231            }
5232            if (i < 0) {
5233                throw new IndexOutOfBoundsException();
5234            }
5235        }
5236        return x;
5237    }
5238
5239    /**
5240     * Determines if the specified character is a lowercase character.
5241     * <p>
5242     * A character is lowercase if its general category type, provided
5243     * by {@code Character.getType(ch)}, is
5244     * {@code LOWERCASE_LETTER}, or it has contributory property
5245     * Other_Lowercase as defined by the Unicode Standard.
5246     * <p>
5247     * The following are examples of lowercase characters:
5248     * <p><blockquote><pre>
5249     * a b c d e f g h i j k l m n o p q r s t u v w x y z
5250     * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5251     * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5252     * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5253     * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5254     * </pre></blockquote>
5255     * <p> Many other Unicode characters are lowercase too.
5256     *
5257     * <p><b>Note:</b> This method cannot handle <a
5258     * href="#supplementary"> supplementary characters</a>. To support
5259     * all Unicode characters, including supplementary characters, use
5260     * the {@link #isLowerCase(int)} method.
5261     *
5262     * @param   ch   the character to be tested.
5263     * @return  {@code true} if the character is lowercase;
5264     *          {@code false} otherwise.
5265     * @see     Character#isLowerCase(char)
5266     * @see     Character#isTitleCase(char)
5267     * @see     Character#toLowerCase(char)
5268     * @see     Character#getType(char)
5269     */
5270    public static boolean isLowerCase(char ch) {
5271        return isLowerCase((int)ch);
5272    }
5273
5274    /**
5275     * Determines if the specified character (Unicode code point) is a
5276     * lowercase character.
5277     * <p>
5278     * A character is lowercase if its general category type, provided
5279     * by {@link Character#getType getType(codePoint)}, is
5280     * {@code LOWERCASE_LETTER}, or it has contributory property
5281     * Other_Lowercase as defined by the Unicode Standard.
5282     * <p>
5283     * The following are examples of lowercase characters:
5284     * <p><blockquote><pre>
5285     * a b c d e f g h i j k l m n o p q r s t u v w x y z
5286     * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5287     * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5288     * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5289     * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5290     * </pre></blockquote>
5291     * <p> Many other Unicode characters are lowercase too.
5292     *
5293     * @param   codePoint the character (Unicode code point) to be tested.
5294     * @return  {@code true} if the character is lowercase;
5295     *          {@code false} otherwise.
5296     * @see     Character#isLowerCase(int)
5297     * @see     Character#isTitleCase(int)
5298     * @see     Character#toLowerCase(int)
5299     * @see     Character#getType(int)
5300     * @since   1.5
5301     */
5302    public static boolean isLowerCase(int codePoint) {
5303        return isLowerCaseImpl(codePoint);
5304    }
5305
5306    static native boolean isLowerCaseImpl(int codePoint);
5307
5308    /**
5309     * Determines if the specified character is an uppercase character.
5310     * <p>
5311     * A character is uppercase if its general category type, provided by
5312     * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5313     * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5314     * <p>
5315     * The following are examples of uppercase characters:
5316     * <p><blockquote><pre>
5317     * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5318     * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5319     * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5320     * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5321     * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5322     * </pre></blockquote>
5323     * <p> Many other Unicode characters are uppercase too.<p>
5324     *
5325     * <p><b>Note:</b> This method cannot handle <a
5326     * href="#supplementary"> supplementary characters</a>. To support
5327     * all Unicode characters, including supplementary characters, use
5328     * the {@link #isUpperCase(int)} method.
5329     *
5330     * @param   ch   the character to be tested.
5331     * @return  {@code true} if the character is uppercase;
5332     *          {@code false} otherwise.
5333     * @see     Character#isLowerCase(char)
5334     * @see     Character#isTitleCase(char)
5335     * @see     Character#toUpperCase(char)
5336     * @see     Character#getType(char)
5337     * @since   1.0
5338     */
5339    public static boolean isUpperCase(char ch) {
5340        return isUpperCase((int)ch);
5341    }
5342
5343    /**
5344     * Determines if the specified character (Unicode code point) is an uppercase character.
5345     * <p>
5346     * A character is uppercase if its general category type, provided by
5347     * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5348     * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5349     * <p>
5350     * The following are examples of uppercase characters:
5351     * <p><blockquote><pre>
5352     * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5353     * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5354     * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5355     * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5356     * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5357     * </pre></blockquote>
5358     * <p> Many other Unicode characters are uppercase too.<p>
5359     *
5360     * @param   codePoint the character (Unicode code point) to be tested.
5361     * @return  {@code true} if the character is uppercase;
5362     *          {@code false} otherwise.
5363     * @see     Character#isLowerCase(int)
5364     * @see     Character#isTitleCase(int)
5365     * @see     Character#toUpperCase(int)
5366     * @see     Character#getType(int)
5367     * @since   1.5
5368     */
5369    public static boolean isUpperCase(int codePoint) {
5370        return isUpperCaseImpl(codePoint);
5371    }
5372
5373    static native boolean isUpperCaseImpl(int codePoint);
5374
5375
5376    /**
5377     * Determines if the specified character is a titlecase character.
5378     * <p>
5379     * A character is a titlecase character if its general
5380     * category type, provided by {@code Character.getType(ch)},
5381     * is {@code TITLECASE_LETTER}.
5382     * <p>
5383     * Some characters look like pairs of Latin letters. For example, there
5384     * is an uppercase letter that looks like "LJ" and has a corresponding
5385     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5386     * is the appropriate form to use when rendering a word in lowercase
5387     * with initial capitals, as for a book title.
5388     * <p>
5389     * These are some of the Unicode characters for which this method returns
5390     * {@code true}:
5391     * <ul>
5392     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5393     * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5394     * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5395     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5396     * </ul>
5397     * <p> Many other Unicode characters are titlecase too.<p>
5398     *
5399     * <p><b>Note:</b> This method cannot handle <a
5400     * href="#supplementary"> supplementary characters</a>. To support
5401     * all Unicode characters, including supplementary characters, use
5402     * the {@link #isTitleCase(int)} method.
5403     *
5404     * @param   ch   the character to be tested.
5405     * @return  {@code true} if the character is titlecase;
5406     *          {@code false} otherwise.
5407     * @see     Character#isLowerCase(char)
5408     * @see     Character#isUpperCase(char)
5409     * @see     Character#toTitleCase(char)
5410     * @see     Character#getType(char)
5411     * @since   1.0.2
5412     */
5413    public static boolean isTitleCase(char ch) {
5414        return isTitleCase((int)ch);
5415    }
5416
5417    /**
5418     * Determines if the specified character (Unicode code point) is a titlecase character.
5419     * <p>
5420     * A character is a titlecase character if its general
5421     * category type, provided by {@link Character#getType(int) getType(codePoint)},
5422     * is {@code TITLECASE_LETTER}.
5423     * <p>
5424     * Some characters look like pairs of Latin letters. For example, there
5425     * is an uppercase letter that looks like "LJ" and has a corresponding
5426     * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5427     * is the appropriate form to use when rendering a word in lowercase
5428     * with initial capitals, as for a book title.
5429     * <p>
5430     * These are some of the Unicode characters for which this method returns
5431     * {@code true}:
5432     * <ul>
5433     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5434     * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5435     * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5436     * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5437     * </ul>
5438     * <p> Many other Unicode characters are titlecase too.<p>
5439     *
5440     * @param   codePoint the character (Unicode code point) to be tested.
5441     * @return  {@code true} if the character is titlecase;
5442     *          {@code false} otherwise.
5443     * @see     Character#isLowerCase(int)
5444     * @see     Character#isUpperCase(int)
5445     * @see     Character#toTitleCase(int)
5446     * @see     Character#getType(int)
5447     * @since   1.5
5448     */
5449    public static boolean isTitleCase(int codePoint) {
5450        return isTitleCaseImpl(codePoint);
5451    }
5452
5453    static native boolean isTitleCaseImpl(int codePoint);
5454
5455    /**
5456     * Determines if the specified character is a digit.
5457     * <p>
5458     * A character is a digit if its general category type, provided
5459     * by {@code Character.getType(ch)}, is
5460     * {@code DECIMAL_DIGIT_NUMBER}.
5461     * <p>
5462     * Some Unicode character ranges that contain digits:
5463     * <ul>
5464     * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5465     *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5466     * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5467     *     Arabic-Indic digits
5468     * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5469     *     Extended Arabic-Indic digits
5470     * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5471     *     Devanagari digits
5472     * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5473     *     Fullwidth digits
5474     * </ul>
5475     *
5476     * Many other character ranges contain digits as well.
5477     *
5478     * <p><b>Note:</b> This method cannot handle <a
5479     * href="#supplementary"> supplementary characters</a>. To support
5480     * all Unicode characters, including supplementary characters, use
5481     * the {@link #isDigit(int)} method.
5482     *
5483     * @param   ch   the character to be tested.
5484     * @return  {@code true} if the character is a digit;
5485     *          {@code false} otherwise.
5486     * @see     Character#digit(char, int)
5487     * @see     Character#forDigit(int, int)
5488     * @see     Character#getType(char)
5489     */
5490    public static boolean isDigit(char ch) {
5491        return isDigit((int)ch);
5492    }
5493
5494    /**
5495     * Determines if the specified character (Unicode code point) is a digit.
5496     * <p>
5497     * A character is a digit if its general category type, provided
5498     * by {@link Character#getType(int) getType(codePoint)}, is
5499     * {@code DECIMAL_DIGIT_NUMBER}.
5500     * <p>
5501     * Some Unicode character ranges that contain digits:
5502     * <ul>
5503     * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5504     *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5505     * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5506     *     Arabic-Indic digits
5507     * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5508     *     Extended Arabic-Indic digits
5509     * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5510     *     Devanagari digits
5511     * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5512     *     Fullwidth digits
5513     * </ul>
5514     *
5515     * Many other character ranges contain digits as well.
5516     *
5517     * @param   codePoint the character (Unicode code point) to be tested.
5518     * @return  {@code true} if the character is a digit;
5519     *          {@code false} otherwise.
5520     * @see     Character#forDigit(int, int)
5521     * @see     Character#getType(int)
5522     * @since   1.5
5523     */
5524    public static boolean isDigit(int codePoint) {
5525        return isDigitImpl(codePoint);
5526    }
5527
5528    static native boolean isDigitImpl(int codePoint);
5529
5530    /**
5531     * Determines if a character is defined in Unicode.
5532     * <p>
5533     * A character is defined if at least one of the following is true:
5534     * <ul>
5535     * <li>It has an entry in the UnicodeData file.
5536     * <li>It has a value in a range defined by the UnicodeData file.
5537     * </ul>
5538     *
5539     * <p><b>Note:</b> This method cannot handle <a
5540     * href="#supplementary"> supplementary characters</a>. To support
5541     * all Unicode characters, including supplementary characters, use
5542     * the {@link #isDefined(int)} method.
5543     *
5544     * @param   ch   the character to be tested
5545     * @return  {@code true} if the character has a defined meaning
5546     *          in Unicode; {@code false} otherwise.
5547     * @see     Character#isDigit(char)
5548     * @see     Character#isLetter(char)
5549     * @see     Character#isLetterOrDigit(char)
5550     * @see     Character#isLowerCase(char)
5551     * @see     Character#isTitleCase(char)
5552     * @see     Character#isUpperCase(char)
5553     * @since   1.0.2
5554     */
5555    public static boolean isDefined(char ch) {
5556        return isDefined((int)ch);
5557    }
5558
5559    /**
5560     * Determines if a character (Unicode code point) is defined in Unicode.
5561     * <p>
5562     * A character is defined if at least one of the following is true:
5563     * <ul>
5564     * <li>It has an entry in the UnicodeData file.
5565     * <li>It has a value in a range defined by the UnicodeData file.
5566     * </ul>
5567     *
5568     * @param   codePoint the character (Unicode code point) to be tested.
5569     * @return  {@code true} if the character has a defined meaning
5570     *          in Unicode; {@code false} otherwise.
5571     * @see     Character#isDigit(int)
5572     * @see     Character#isLetter(int)
5573     * @see     Character#isLetterOrDigit(int)
5574     * @see     Character#isLowerCase(int)
5575     * @see     Character#isTitleCase(int)
5576     * @see     Character#isUpperCase(int)
5577     * @since   1.5
5578     */
5579    public static boolean isDefined(int codePoint) {
5580        return isDefinedImpl(codePoint);
5581    }
5582
5583    static native boolean isDefinedImpl(int codePoint);
5584
5585    /**
5586     * Determines if the specified character is a letter.
5587     * <p>
5588     * A character is considered to be a letter if its general
5589     * category type, provided by {@code Character.getType(ch)},
5590     * is any of the following:
5591     * <ul>
5592     * <li> {@code UPPERCASE_LETTER}
5593     * <li> {@code LOWERCASE_LETTER}
5594     * <li> {@code TITLECASE_LETTER}
5595     * <li> {@code MODIFIER_LETTER}
5596     * <li> {@code OTHER_LETTER}
5597     * </ul>
5598     *
5599     * Not all letters have case. Many characters are
5600     * letters but are neither uppercase nor lowercase nor titlecase.
5601     *
5602     * <p><b>Note:</b> This method cannot handle <a
5603     * href="#supplementary"> supplementary characters</a>. To support
5604     * all Unicode characters, including supplementary characters, use
5605     * the {@link #isLetter(int)} method.
5606     *
5607     * @param   ch   the character to be tested.
5608     * @return  {@code true} if the character is a letter;
5609     *          {@code false} otherwise.
5610     * @see     Character#isDigit(char)
5611     * @see     Character#isJavaIdentifierStart(char)
5612     * @see     Character#isJavaLetter(char)
5613     * @see     Character#isJavaLetterOrDigit(char)
5614     * @see     Character#isLetterOrDigit(char)
5615     * @see     Character#isLowerCase(char)
5616     * @see     Character#isTitleCase(char)
5617     * @see     Character#isUnicodeIdentifierStart(char)
5618     * @see     Character#isUpperCase(char)
5619     */
5620    public static boolean isLetter(char ch) {
5621        return isLetter((int)ch);
5622    }
5623
5624    /**
5625     * Determines if the specified character (Unicode code point) is a letter.
5626     * <p>
5627     * A character is considered to be a letter if its general
5628     * category type, provided by {@link Character#getType(int) getType(codePoint)},
5629     * is any of the following:
5630     * <ul>
5631     * <li> {@code UPPERCASE_LETTER}
5632     * <li> {@code LOWERCASE_LETTER}
5633     * <li> {@code TITLECASE_LETTER}
5634     * <li> {@code MODIFIER_LETTER}
5635     * <li> {@code OTHER_LETTER}
5636     * </ul>
5637     *
5638     * Not all letters have case. Many characters are
5639     * letters but are neither uppercase nor lowercase nor titlecase.
5640     *
5641     * @param   codePoint the character (Unicode code point) to be tested.
5642     * @return  {@code true} if the character is a letter;
5643     *          {@code false} otherwise.
5644     * @see     Character#isDigit(int)
5645     * @see     Character#isJavaIdentifierStart(int)
5646     * @see     Character#isLetterOrDigit(int)
5647     * @see     Character#isLowerCase(int)
5648     * @see     Character#isTitleCase(int)
5649     * @see     Character#isUnicodeIdentifierStart(int)
5650     * @see     Character#isUpperCase(int)
5651     * @since   1.5
5652     */
5653    public static boolean isLetter(int codePoint) {
5654        return isLetterImpl(codePoint);
5655    }
5656
5657    static native boolean isLetterImpl(int codePoint);
5658
5659    /**
5660     * Determines if the specified character is a letter or digit.
5661     * <p>
5662     * A character is considered to be a letter or digit if either
5663     * {@code Character.isLetter(char ch)} or
5664     * {@code Character.isDigit(char ch)} returns
5665     * {@code true} for the character.
5666     *
5667     * <p><b>Note:</b> This method cannot handle <a
5668     * href="#supplementary"> supplementary characters</a>. To support
5669     * all Unicode characters, including supplementary characters, use
5670     * the {@link #isLetterOrDigit(int)} method.
5671     *
5672     * @param   ch   the character to be tested.
5673     * @return  {@code true} if the character is a letter or digit;
5674     *          {@code false} otherwise.
5675     * @see     Character#isDigit(char)
5676     * @see     Character#isJavaIdentifierPart(char)
5677     * @see     Character#isJavaLetter(char)
5678     * @see     Character#isJavaLetterOrDigit(char)
5679     * @see     Character#isLetter(char)
5680     * @see     Character#isUnicodeIdentifierPart(char)
5681     * @since   1.0.2
5682     */
5683    public static boolean isLetterOrDigit(char ch) {
5684        return isLetterOrDigit((int)ch);
5685    }
5686
5687    /**
5688     * Determines if the specified character (Unicode code point) is a letter or digit.
5689     * <p>
5690     * A character is considered to be a letter or digit if either
5691     * {@link #isLetter(int) isLetter(codePoint)} or
5692     * {@link #isDigit(int) isDigit(codePoint)} returns
5693     * {@code true} for the character.
5694     *
5695     * @param   codePoint the character (Unicode code point) to be tested.
5696     * @return  {@code true} if the character is a letter or digit;
5697     *          {@code false} otherwise.
5698     * @see     Character#isDigit(int)
5699     * @see     Character#isJavaIdentifierPart(int)
5700     * @see     Character#isLetter(int)
5701     * @see     Character#isUnicodeIdentifierPart(int)
5702     * @since   1.5
5703     */
5704    public static boolean isLetterOrDigit(int codePoint) {
5705        return isLetterOrDigitImpl(codePoint);
5706    }
5707
5708    static native boolean isLetterOrDigitImpl(int codePoint);
5709
5710    /**
5711     * Determines if the specified character is permissible as the first
5712     * character in a Java identifier.
5713     * <p>
5714     * A character may start a Java identifier if and only if
5715     * one of the following is true:
5716     * <ul>
5717     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5718     * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5719     * <li> {@code ch} is a currency symbol (such as {@code '$'})
5720     * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5721     * </ul>
5722     *
5723     * @param   ch the character to be tested.
5724     * @return  {@code true} if the character may start a Java
5725     *          identifier; {@code false} otherwise.
5726     * @see     Character#isJavaLetterOrDigit(char)
5727     * @see     Character#isJavaIdentifierStart(char)
5728     * @see     Character#isJavaIdentifierPart(char)
5729     * @see     Character#isLetter(char)
5730     * @see     Character#isLetterOrDigit(char)
5731     * @see     Character#isUnicodeIdentifierStart(char)
5732     * @since   1.02
5733     * @deprecated Replaced by isJavaIdentifierStart(char).
5734     */
5735    @Deprecated
5736    public static boolean isJavaLetter(char ch) {
5737        return isJavaIdentifierStart(ch);
5738    }
5739
5740    /**
5741     * Determines if the specified character may be part of a Java
5742     * identifier as other than the first character.
5743     * <p>
5744     * A character may be part of a Java identifier if and only if any
5745     * of the following are true:
5746     * <ul>
5747     * <li>  it is a letter
5748     * <li>  it is a currency symbol (such as {@code '$'})
5749     * <li>  it is a connecting punctuation character (such as {@code '_'})
5750     * <li>  it is a digit
5751     * <li>  it is a numeric letter (such as a Roman numeral character)
5752     * <li>  it is a combining mark
5753     * <li>  it is a non-spacing mark
5754     * <li> {@code isIdentifierIgnorable} returns
5755     * {@code true} for the character.
5756     * </ul>
5757     *
5758     * @param   ch the character to be tested.
5759     * @return  {@code true} if the character may be part of a
5760     *          Java identifier; {@code false} otherwise.
5761     * @see     Character#isJavaLetter(char)
5762     * @see     Character#isJavaIdentifierStart(char)
5763     * @see     Character#isJavaIdentifierPart(char)
5764     * @see     Character#isLetter(char)
5765     * @see     Character#isLetterOrDigit(char)
5766     * @see     Character#isUnicodeIdentifierPart(char)
5767     * @see     Character#isIdentifierIgnorable(char)
5768     * @since   1.02
5769     * @deprecated Replaced by isJavaIdentifierPart(char).
5770     */
5771    @Deprecated
5772    public static boolean isJavaLetterOrDigit(char ch) {
5773        return isJavaIdentifierPart(ch);
5774    }
5775
5776    /**
5777     * Determines if the specified character (Unicode code point) is an alphabet.
5778     * <p>
5779     * A character is considered to be alphabetic if its general category type,
5780     * provided by {@link Character#getType(int) getType(codePoint)}, is any of
5781     * the following:
5782     * <ul>
5783     * <li> <code>UPPERCASE_LETTER</code>
5784     * <li> <code>LOWERCASE_LETTER</code>
5785     * <li> <code>TITLECASE_LETTER</code>
5786     * <li> <code>MODIFIER_LETTER</code>
5787     * <li> <code>OTHER_LETTER</code>
5788     * <li> <code>LETTER_NUMBER</code>
5789     * </ul>
5790     * or it has contributory property Other_Alphabetic as defined by the
5791     * Unicode Standard.
5792     *
5793     * @param   codePoint the character (Unicode code point) to be tested.
5794     * @return  <code>true</code> if the character is a Unicode alphabet
5795     *          character, <code>false</code> otherwise.
5796     * @since   1.7
5797     */
5798    public static boolean isAlphabetic(int codePoint) {
5799        return isAlphabeticImpl(codePoint);
5800    }
5801
5802    static native boolean isAlphabeticImpl(int codePoint);
5803
5804
5805    /**
5806     * Determines if the specified character (Unicode code point) is a CJKV
5807     * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
5808     * the Unicode Standard.
5809     *
5810     * @param   codePoint the character (Unicode code point) to be tested.
5811     * @return  <code>true</code> if the character is a Unicode ideograph
5812     *          character, <code>false</code> otherwise.
5813     * @since   1.7
5814     */
5815    public static boolean isIdeographic(int codePoint) {
5816        return isIdeographicImpl(codePoint);
5817    }
5818
5819    static native boolean isIdeographicImpl(int codePoint);
5820
5821    /**
5822     * Determines if the specified character is
5823     * permissible as the first character in a Java identifier.
5824     * <p>
5825     * A character may start a Java identifier if and only if
5826     * one of the following conditions is true:
5827     * <ul>
5828     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5829     * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5830     * <li> {@code ch} is a currency symbol (such as {@code '$'})
5831     * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5832     * </ul>
5833     *
5834     * <p><b>Note:</b> This method cannot handle <a
5835     * href="#supplementary"> supplementary characters</a>. To support
5836     * all Unicode characters, including supplementary characters, use
5837     * the {@link #isJavaIdentifierStart(int)} method.
5838     *
5839     * @param   ch the character to be tested.
5840     * @return  {@code true} if the character may start a Java identifier;
5841     *          {@code false} otherwise.
5842     * @see     Character#isJavaIdentifierPart(char)
5843     * @see     Character#isLetter(char)
5844     * @see     Character#isUnicodeIdentifierStart(char)
5845     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5846     * @since   1.1
5847     */
5848    public static boolean isJavaIdentifierStart(char ch) {
5849        return isJavaIdentifierStart((int)ch);
5850    }
5851
5852    /**
5853     * Determines if the character (Unicode code point) is
5854     * permissible as the first character in a Java identifier.
5855     * <p>
5856     * A character may start a Java identifier if and only if
5857     * one of the following conditions is true:
5858     * <ul>
5859     * <li> {@link #isLetter(int) isLetter(codePoint)}
5860     *      returns {@code true}
5861     * <li> {@link #getType(int) getType(codePoint)}
5862     *      returns {@code LETTER_NUMBER}
5863     * <li> the referenced character is a currency symbol (such as {@code '$'})
5864     * <li> the referenced character is a connecting punctuation character
5865     *      (such as {@code '_'}).
5866     * </ul>
5867     *
5868     * @param   codePoint the character (Unicode code point) to be tested.
5869     * @return  {@code true} if the character may start a Java identifier;
5870     *          {@code false} otherwise.
5871     * @see     Character#isJavaIdentifierPart(int)
5872     * @see     Character#isLetter(int)
5873     * @see     Character#isUnicodeIdentifierStart(int)
5874     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5875     * @since   1.5
5876     */
5877    public static boolean isJavaIdentifierStart(int codePoint) {
5878        // Use precomputed bitmasks to optimize the ASCII range.
5879        if (codePoint < 64) {
5880            return (codePoint == '$'); // There's only one character in this range.
5881        } else if (codePoint < 128) {
5882            return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
5883        }
5884        return ((1 << getType(codePoint))
5885                & ((1 << UPPERCASE_LETTER)
5886                   | (1 << LOWERCASE_LETTER)
5887                   | (1  << TITLECASE_LETTER)
5888                   | (1  << MODIFIER_LETTER)
5889                   | (1  << OTHER_LETTER)
5890                   | (1  << CURRENCY_SYMBOL)
5891                   | (1  << CONNECTOR_PUNCTUATION)
5892                   | (1  << LETTER_NUMBER))) != 0;
5893    }
5894
5895    /**
5896     * Determines if the specified character may be part of a Java
5897     * identifier as other than the first character.
5898     * <p>
5899     * A character may be part of a Java identifier if any of the following
5900     * are true:
5901     * <ul>
5902     * <li>  it is a letter
5903     * <li>  it is a currency symbol (such as {@code '$'})
5904     * <li>  it is a connecting punctuation character (such as {@code '_'})
5905     * <li>  it is a digit
5906     * <li>  it is a numeric letter (such as a Roman numeral character)
5907     * <li>  it is a combining mark
5908     * <li>  it is a non-spacing mark
5909     * <li> {@code isIdentifierIgnorable} returns
5910     * {@code true} for the character
5911     * </ul>
5912     *
5913     * <p><b>Note:</b> This method cannot handle <a
5914     * href="#supplementary"> supplementary characters</a>. To support
5915     * all Unicode characters, including supplementary characters, use
5916     * the {@link #isJavaIdentifierPart(int)} method.
5917     *
5918     * @param   ch      the character to be tested.
5919     * @return {@code true} if the character may be part of a
5920     *          Java identifier; {@code false} otherwise.
5921     * @see     Character#isIdentifierIgnorable(char)
5922     * @see     Character#isJavaIdentifierStart(char)
5923     * @see     Character#isLetterOrDigit(char)
5924     * @see     Character#isUnicodeIdentifierPart(char)
5925     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5926     * @since   1.1
5927     */
5928    public static boolean isJavaIdentifierPart(char ch) {
5929        return isJavaIdentifierPart((int)ch);
5930    }
5931
5932    /**
5933     * Determines if the character (Unicode code point) may be part of a Java
5934     * identifier as other than the first character.
5935     * <p>
5936     * A character may be part of a Java identifier if any of the following
5937     * are true:
5938     * <ul>
5939     * <li>  it is a letter
5940     * <li>  it is a currency symbol (such as {@code '$'})
5941     * <li>  it is a connecting punctuation character (such as {@code '_'})
5942     * <li>  it is a digit
5943     * <li>  it is a numeric letter (such as a Roman numeral character)
5944     * <li>  it is a combining mark
5945     * <li>  it is a non-spacing mark
5946     * <li> {@link #isIdentifierIgnorable(int)
5947     * isIdentifierIgnorable(codePoint)} returns {@code true} for
5948     * the character
5949     * </ul>
5950     *
5951     * @param   codePoint the character (Unicode code point) to be tested.
5952     * @return {@code true} if the character may be part of a
5953     *          Java identifier; {@code false} otherwise.
5954     * @see     Character#isIdentifierIgnorable(int)
5955     * @see     Character#isJavaIdentifierStart(int)
5956     * @see     Character#isLetterOrDigit(int)
5957     * @see     Character#isUnicodeIdentifierPart(int)
5958     * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5959     * @since   1.5
5960     */
5961    public static boolean isJavaIdentifierPart(int codePoint) {
5962        // Use precomputed bitmasks to optimize the ASCII range.
5963        if (codePoint < 64) {
5964            return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
5965        } else if (codePoint < 128) {
5966            return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
5967        }
5968        return ((1 << getType(codePoint))
5969                & ((1 << UPPERCASE_LETTER)
5970                   | (1 << LOWERCASE_LETTER)
5971                   | (1 << TITLECASE_LETTER)
5972                   | (1 << MODIFIER_LETTER)
5973                   | (1 << OTHER_LETTER)
5974                   | (1 << CURRENCY_SYMBOL)
5975                   | (1 << CONNECTOR_PUNCTUATION)
5976                   | (1 << DECIMAL_DIGIT_NUMBER)
5977                   | (1 << LETTER_NUMBER)
5978                   | (1 << FORMAT)
5979                   | (1 << COMBINING_SPACING_MARK)
5980                   | (1 << NON_SPACING_MARK))) != 0
5981                || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
5982                || (codePoint >= 0x7f && codePoint <= 0x9f);
5983    }
5984
5985    /**
5986     * Determines if the specified character is permissible as the
5987     * first character in a Unicode identifier.
5988     * <p>
5989     * A character may start a Unicode identifier if and only if
5990     * one of the following conditions is true:
5991     * <ul>
5992     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5993     * <li> {@link #getType(char) getType(ch)} returns
5994     *      {@code LETTER_NUMBER}.
5995     * </ul>
5996     *
5997     * <p><b>Note:</b> This method cannot handle <a
5998     * href="#supplementary"> supplementary characters</a>. To support
5999     * all Unicode characters, including supplementary characters, use
6000     * the {@link #isUnicodeIdentifierStart(int)} method.
6001     *
6002     * @param   ch      the character to be tested.
6003     * @return  {@code true} if the character may start a Unicode
6004     *          identifier; {@code false} otherwise.
6005     * @see     Character#isJavaIdentifierStart(char)
6006     * @see     Character#isLetter(char)
6007     * @see     Character#isUnicodeIdentifierPart(char)
6008     * @since   1.1
6009     */
6010    public static boolean isUnicodeIdentifierStart(char ch) {
6011        return isUnicodeIdentifierStart((int)ch);
6012    }
6013
6014    /**
6015     * Determines if the specified character (Unicode code point) is permissible as the
6016     * first character in a Unicode identifier.
6017     * <p>
6018     * A character may start a Unicode identifier if and only if
6019     * one of the following conditions is true:
6020     * <ul>
6021     * <li> {@link #isLetter(int) isLetter(codePoint)}
6022     *      returns {@code true}
6023     * <li> {@link #getType(int) getType(codePoint)}
6024     *      returns {@code LETTER_NUMBER}.
6025     * </ul>
6026     * @param   codePoint the character (Unicode code point) to be tested.
6027     * @return  {@code true} if the character may start a Unicode
6028     *          identifier; {@code false} otherwise.
6029     * @see     Character#isJavaIdentifierStart(int)
6030     * @see     Character#isLetter(int)
6031     * @see     Character#isUnicodeIdentifierPart(int)
6032     * @since   1.5
6033     */
6034    public static boolean isUnicodeIdentifierStart(int codePoint) {
6035        return isUnicodeIdentifierStartImpl(codePoint);
6036    }
6037
6038    static native boolean isUnicodeIdentifierStartImpl(int codePoint);
6039
6040    /**
6041     * Determines if the specified character may be part of a Unicode
6042     * identifier as other than the first character.
6043     * <p>
6044     * A character may be part of a Unicode identifier if and only if
6045     * one of the following statements is true:
6046     * <ul>
6047     * <li>  it is a letter
6048     * <li>  it is a connecting punctuation character (such as {@code '_'})
6049     * <li>  it is a digit
6050     * <li>  it is a numeric letter (such as a Roman numeral character)
6051     * <li>  it is a combining mark
6052     * <li>  it is a non-spacing mark
6053     * <li> {@code isIdentifierIgnorable} returns
6054     * {@code true} for this character.
6055     * </ul>
6056     *
6057     * <p><b>Note:</b> This method cannot handle <a
6058     * href="#supplementary"> supplementary characters</a>. To support
6059     * all Unicode characters, including supplementary characters, use
6060     * the {@link #isUnicodeIdentifierPart(int)} method.
6061     *
6062     * @param   ch      the character to be tested.
6063     * @return  {@code true} if the character may be part of a
6064     *          Unicode identifier; {@code false} otherwise.
6065     * @see     Character#isIdentifierIgnorable(char)
6066     * @see     Character#isJavaIdentifierPart(char)
6067     * @see     Character#isLetterOrDigit(char)
6068     * @see     Character#isUnicodeIdentifierStart(char)
6069     * @since   1.1
6070     */
6071    public static boolean isUnicodeIdentifierPart(char ch) {
6072        return isUnicodeIdentifierPart((int)ch);
6073    }
6074
6075    /**
6076     * Determines if the specified character (Unicode code point) may be part of a Unicode
6077     * identifier as other than the first character.
6078     * <p>
6079     * A character may be part of a Unicode identifier if and only if
6080     * one of the following statements is true:
6081     * <ul>
6082     * <li>  it is a letter
6083     * <li>  it is a connecting punctuation character (such as {@code '_'})
6084     * <li>  it is a digit
6085     * <li>  it is a numeric letter (such as a Roman numeral character)
6086     * <li>  it is a combining mark
6087     * <li>  it is a non-spacing mark
6088     * <li> {@code isIdentifierIgnorable} returns
6089     * {@code true} for this character.
6090     * </ul>
6091     * @param   codePoint the character (Unicode code point) to be tested.
6092     * @return  {@code true} if the character may be part of a
6093     *          Unicode identifier; {@code false} otherwise.
6094     * @see     Character#isIdentifierIgnorable(int)
6095     * @see     Character#isJavaIdentifierPart(int)
6096     * @see     Character#isLetterOrDigit(int)
6097     * @see     Character#isUnicodeIdentifierStart(int)
6098     * @since   1.5
6099     */
6100    public static boolean isUnicodeIdentifierPart(int codePoint) {
6101        return isUnicodeIdentifierPartImpl(codePoint);
6102    }
6103
6104    static native boolean isUnicodeIdentifierPartImpl(int codePoint);
6105
6106    /**
6107     * Determines if the specified character should be regarded as
6108     * an ignorable character in a Java identifier or a Unicode identifier.
6109     * <p>
6110     * The following Unicode characters are ignorable in a Java identifier
6111     * or a Unicode identifier:
6112     * <ul>
6113     * <li>ISO control characters that are not whitespace
6114     * <ul>
6115     * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6116     * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6117     * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6118     * </ul>
6119     *
6120     * <li>all characters that have the {@code FORMAT} general
6121     * category value
6122     * </ul>
6123     *
6124     * <p><b>Note:</b> This method cannot handle <a
6125     * href="#supplementary"> supplementary characters</a>. To support
6126     * all Unicode characters, including supplementary characters, use
6127     * the {@link #isIdentifierIgnorable(int)} method.
6128     *
6129     * @param   ch      the character to be tested.
6130     * @return  {@code true} if the character is an ignorable control
6131     *          character that may be part of a Java or Unicode identifier;
6132     *           {@code false} otherwise.
6133     * @see     Character#isJavaIdentifierPart(char)
6134     * @see     Character#isUnicodeIdentifierPart(char)
6135     * @since   1.1
6136     */
6137    public static boolean isIdentifierIgnorable(char ch) {
6138        return isIdentifierIgnorable((int)ch);
6139    }
6140
6141    /**
6142     * Determines if the specified character (Unicode code point) should be regarded as
6143     * an ignorable character in a Java identifier or a Unicode identifier.
6144     * <p>
6145     * The following Unicode characters are ignorable in a Java identifier
6146     * or a Unicode identifier:
6147     * <ul>
6148     * <li>ISO control characters that are not whitespace
6149     * <ul>
6150     * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6151     * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6152     * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6153     * </ul>
6154     *
6155     * <li>all characters that have the {@code FORMAT} general
6156     * category value
6157     * </ul>
6158     *
6159     * @param   codePoint the character (Unicode code point) to be tested.
6160     * @return  {@code true} if the character is an ignorable control
6161     *          character that may be part of a Java or Unicode identifier;
6162     *          {@code false} otherwise.
6163     * @see     Character#isJavaIdentifierPart(int)
6164     * @see     Character#isUnicodeIdentifierPart(int)
6165     * @since   1.5
6166     */
6167    public static boolean isIdentifierIgnorable(int codePoint) {
6168        return isIdentifierIgnorableImpl(codePoint);
6169    }
6170
6171    static native boolean isIdentifierIgnorableImpl(int codePoint);
6172
6173    /**
6174     * Converts the character argument to lowercase using case
6175     * mapping information from the UnicodeData file.
6176     * <p>
6177     * Note that
6178     * {@code Character.isLowerCase(Character.toLowerCase(ch))}
6179     * does not always return {@code true} for some ranges of
6180     * characters, particularly those that are symbols or ideographs.
6181     *
6182     * <p>In general, {@link String#toLowerCase()} should be used to map
6183     * characters to lowercase. {@code String} case mapping methods
6184     * have several benefits over {@code Character} case mapping methods.
6185     * {@code String} case mapping methods can perform locale-sensitive
6186     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6187     * the {@code Character} case mapping methods cannot.
6188     *
6189     * <p><b>Note:</b> This method cannot handle <a
6190     * href="#supplementary"> supplementary characters</a>. To support
6191     * all Unicode characters, including supplementary characters, use
6192     * the {@link #toLowerCase(int)} method.
6193     *
6194     * @param   ch   the character to be converted.
6195     * @return  the lowercase equivalent of the character, if any;
6196     *          otherwise, the character itself.
6197     * @see     Character#isLowerCase(char)
6198     * @see     String#toLowerCase()
6199     */
6200    public static char toLowerCase(char ch) {
6201        return (char)toLowerCase((int)ch);
6202    }
6203
6204    /**
6205     * Converts the character (Unicode code point) argument to
6206     * lowercase using case mapping information from the UnicodeData
6207     * file.
6208     *
6209     * <p> Note that
6210     * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6211     * does not always return {@code true} for some ranges of
6212     * characters, particularly those that are symbols or ideographs.
6213     *
6214     * <p>In general, {@link String#toLowerCase()} should be used to map
6215     * characters to lowercase. {@code String} case mapping methods
6216     * have several benefits over {@code Character} case mapping methods.
6217     * {@code String} case mapping methods can perform locale-sensitive
6218     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6219     * the {@code Character} case mapping methods cannot.
6220     *
6221     * @param   codePoint   the character (Unicode code point) to be converted.
6222     * @return  the lowercase equivalent of the character (Unicode code
6223     *          point), if any; otherwise, the character itself.
6224     * @see     Character#isLowerCase(int)
6225     * @see     String#toLowerCase()
6226     *
6227     * @since   1.5
6228     */
6229    public static int toLowerCase(int codePoint) {
6230        return toLowerCaseImpl(codePoint);
6231    }
6232
6233    static native int toLowerCaseImpl(int codePoint);
6234
6235    /**
6236     * Converts the character argument to uppercase using case mapping
6237     * information from the UnicodeData file.
6238     * <p>
6239     * Note that
6240     * {@code Character.isUpperCase(Character.toUpperCase(ch))}
6241     * does not always return {@code true} for some ranges of
6242     * characters, particularly those that are symbols or ideographs.
6243     *
6244     * <p>In general, {@link String#toUpperCase()} should be used to map
6245     * characters to uppercase. {@code String} case mapping methods
6246     * have several benefits over {@code Character} case mapping methods.
6247     * {@code String} case mapping methods can perform locale-sensitive
6248     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6249     * the {@code Character} case mapping methods cannot.
6250     *
6251     * <p><b>Note:</b> This method cannot handle <a
6252     * href="#supplementary"> supplementary characters</a>. To support
6253     * all Unicode characters, including supplementary characters, use
6254     * the {@link #toUpperCase(int)} method.
6255     *
6256     * @param   ch   the character to be converted.
6257     * @return  the uppercase equivalent of the character, if any;
6258     *          otherwise, the character itself.
6259     * @see     Character#isUpperCase(char)
6260     * @see     String#toUpperCase()
6261     */
6262    public static char toUpperCase(char ch) {
6263        return (char)toUpperCase((int)ch);
6264    }
6265
6266    /**
6267     * Converts the character (Unicode code point) argument to
6268     * uppercase using case mapping information from the UnicodeData
6269     * file.
6270     *
6271     * <p>Note that
6272     * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6273     * does not always return {@code true} for some ranges of
6274     * characters, particularly those that are symbols or ideographs.
6275     *
6276     * <p>In general, {@link String#toUpperCase()} should be used to map
6277     * characters to uppercase. {@code String} case mapping methods
6278     * have several benefits over {@code Character} case mapping methods.
6279     * {@code String} case mapping methods can perform locale-sensitive
6280     * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6281     * the {@code Character} case mapping methods cannot.
6282     *
6283     * @param   codePoint   the character (Unicode code point) to be converted.
6284     * @return  the uppercase equivalent of the character, if any;
6285     *          otherwise, the character itself.
6286     * @see     Character#isUpperCase(int)
6287     * @see     String#toUpperCase()
6288     *
6289     * @since   1.5
6290     */
6291    public static int toUpperCase(int codePoint) {
6292        return toUpperCaseImpl(codePoint);
6293    }
6294
6295    static native int toUpperCaseImpl(int codePoint);
6296
6297    /**
6298     * Converts the character argument to titlecase using case mapping
6299     * information from the UnicodeData file. If a character has no
6300     * explicit titlecase mapping and is not itself a titlecase char
6301     * according to UnicodeData, then the uppercase mapping is
6302     * returned as an equivalent titlecase mapping. If the
6303     * {@code char} argument is already a titlecase
6304     * {@code char}, the same {@code char} value will be
6305     * returned.
6306     * <p>
6307     * Note that
6308     * {@code Character.isTitleCase(Character.toTitleCase(ch))}
6309     * does not always return {@code true} for some ranges of
6310     * characters.
6311     *
6312     * <p><b>Note:</b> This method cannot handle <a
6313     * href="#supplementary"> supplementary characters</a>. To support
6314     * all Unicode characters, including supplementary characters, use
6315     * the {@link #toTitleCase(int)} method.
6316     *
6317     * @param   ch   the character to be converted.
6318     * @return  the titlecase equivalent of the character, if any;
6319     *          otherwise, the character itself.
6320     * @see     Character#isTitleCase(char)
6321     * @see     Character#toLowerCase(char)
6322     * @see     Character#toUpperCase(char)
6323     * @since   1.0.2
6324     */
6325    public static char toTitleCase(char ch) {
6326        return (char)toTitleCase((int)ch);
6327    }
6328
6329    /**
6330     * Converts the character (Unicode code point) argument to titlecase using case mapping
6331     * information from the UnicodeData file. If a character has no
6332     * explicit titlecase mapping and is not itself a titlecase char
6333     * according to UnicodeData, then the uppercase mapping is
6334     * returned as an equivalent titlecase mapping. If the
6335     * character argument is already a titlecase
6336     * character, the same character value will be
6337     * returned.
6338     *
6339     * <p>Note that
6340     * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6341     * does not always return {@code true} for some ranges of
6342     * characters.
6343     *
6344     * @param   codePoint   the character (Unicode code point) to be converted.
6345     * @return  the titlecase equivalent of the character, if any;
6346     *          otherwise, the character itself.
6347     * @see     Character#isTitleCase(int)
6348     * @see     Character#toLowerCase(int)
6349     * @see     Character#toUpperCase(int)
6350     * @since   1.5
6351     */
6352    public static int toTitleCase(int codePoint) {
6353        return toTitleCaseImpl(codePoint);
6354    }
6355
6356    static native int toTitleCaseImpl(int codePoint);
6357
6358    /**
6359     * Returns the numeric value of the character {@code ch} in the
6360     * specified radix.
6361     * <p>
6362     * If the radix is not in the range {@code MIN_RADIX} &le;
6363     * {@code radix} &le; {@code MAX_RADIX} or if the
6364     * value of {@code ch} is not a valid digit in the specified
6365     * radix, {@code -1} is returned. A character is a valid digit
6366     * if at least one of the following is true:
6367     * <ul>
6368     * <li>The method {@code isDigit} is {@code true} of the character
6369     *     and the Unicode decimal digit value of the character (or its
6370     *     single-character decomposition) is less than the specified radix.
6371     *     In this case the decimal digit value is returned.
6372     * <li>The character is one of the uppercase Latin letters
6373     *     {@code 'A'} through {@code 'Z'} and its code is less than
6374     *     {@code radix + 'A' - 10}.
6375     *     In this case, {@code ch - 'A' + 10}
6376     *     is returned.
6377     * <li>The character is one of the lowercase Latin letters
6378     *     {@code 'a'} through {@code 'z'} and its code is less than
6379     *     {@code radix + 'a' - 10}.
6380     *     In this case, {@code ch - 'a' + 10}
6381     *     is returned.
6382     * <li>The character is one of the fullwidth uppercase Latin letters A
6383     *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6384     *     and its code is less than
6385     *     {@code radix + '\u005CuFF21' - 10}.
6386     *     In this case, {@code ch - '\u005CuFF21' + 10}
6387     *     is returned.
6388     * <li>The character is one of the fullwidth lowercase Latin letters a
6389     *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6390     *     and its code is less than
6391     *     {@code radix + '\u005CuFF41' - 10}.
6392     *     In this case, {@code ch - '\u005CuFF41' + 10}
6393     *     is returned.
6394     * </ul>
6395     *
6396     * <p><b>Note:</b> This method cannot handle <a
6397     * href="#supplementary"> supplementary characters</a>. To support
6398     * all Unicode characters, including supplementary characters, use
6399     * the {@link #digit(int, int)} method.
6400     *
6401     * @param   ch      the character to be converted.
6402     * @param   radix   the radix.
6403     * @return  the numeric value represented by the character in the
6404     *          specified radix.
6405     * @see     Character#forDigit(int, int)
6406     * @see     Character#isDigit(char)
6407     */
6408    public static int digit(char ch, int radix) {
6409        return digit((int)ch, radix);
6410    }
6411
6412    /**
6413     * Returns the numeric value of the specified character (Unicode
6414     * code point) in the specified radix.
6415     *
6416     * <p>If the radix is not in the range {@code MIN_RADIX} &le;
6417     * {@code radix} &le; {@code MAX_RADIX} or if the
6418     * character is not a valid digit in the specified
6419     * radix, {@code -1} is returned. A character is a valid digit
6420     * if at least one of the following is true:
6421     * <ul>
6422     * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6423     *     and the Unicode decimal digit value of the character (or its
6424     *     single-character decomposition) is less than the specified radix.
6425     *     In this case the decimal digit value is returned.
6426     * <li>The character is one of the uppercase Latin letters
6427     *     {@code 'A'} through {@code 'Z'} and its code is less than
6428     *     {@code radix + 'A' - 10}.
6429     *     In this case, {@code codePoint - 'A' + 10}
6430     *     is returned.
6431     * <li>The character is one of the lowercase Latin letters
6432     *     {@code 'a'} through {@code 'z'} and its code is less than
6433     *     {@code radix + 'a' - 10}.
6434     *     In this case, {@code codePoint - 'a' + 10}
6435     *     is returned.
6436     * <li>The character is one of the fullwidth uppercase Latin letters A
6437     *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6438     *     and its code is less than
6439     *     {@code radix + '\u005CuFF21' - 10}.
6440     *     In this case,
6441     *     {@code codePoint - '\u005CuFF21' + 10}
6442     *     is returned.
6443     * <li>The character is one of the fullwidth lowercase Latin letters a
6444     *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6445     *     and its code is less than
6446     *     {@code radix + '\u005CuFF41'- 10}.
6447     *     In this case,
6448     *     {@code codePoint - '\u005CuFF41' + 10}
6449     *     is returned.
6450     * </ul>
6451     *
6452     * @param   codePoint the character (Unicode code point) to be converted.
6453     * @param   radix   the radix.
6454     * @return  the numeric value represented by the character in the
6455     *          specified radix.
6456     * @see     Character#forDigit(int, int)
6457     * @see     Character#isDigit(int)
6458     * @since   1.5
6459     */
6460    public static int digit(int codePoint, int radix) {
6461        if (radix < MIN_RADIX || radix > MAX_RADIX) {
6462            return -1;
6463        }
6464        if (codePoint < 128) {
6465            // Optimized for ASCII
6466            int result = -1;
6467            if ('0' <= codePoint && codePoint <= '9') {
6468                result = codePoint - '0';
6469            } else if ('a' <= codePoint && codePoint <= 'z') {
6470                result = 10 + (codePoint - 'a');
6471            } else if ('A' <= codePoint && codePoint <= 'Z') {
6472                result = 10 + (codePoint - 'A');
6473            }
6474            return result < radix ? result : -1;
6475        }
6476        return digitImpl(codePoint, radix);
6477    }
6478
6479    native static int digitImpl(int codePoint, int radix);
6480
6481    /**
6482     * Returns the {@code int} value that the specified Unicode
6483     * character represents. For example, the character
6484     * {@code '\u005Cu216C'} (the roman numeral fifty) will return
6485     * an int with a value of 50.
6486     * <p>
6487     * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6488     * {@code '\u005Cu005A'}), lowercase
6489     * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6490     * full width variant ({@code '\u005CuFF21'} through
6491     * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6492     * {@code '\u005CuFF5A'}) forms have numeric values from 10
6493     * through 35. This is independent of the Unicode specification,
6494     * which does not assign numeric values to these {@code char}
6495     * values.
6496     * <p>
6497     * If the character does not have a numeric value, then -1 is returned.
6498     * If the character has a numeric value that cannot be represented as a
6499     * nonnegative integer (for example, a fractional value), then -2
6500     * is returned.
6501     *
6502     * <p><b>Note:</b> This method cannot handle <a
6503     * href="#supplementary"> supplementary characters</a>. To support
6504     * all Unicode characters, including supplementary characters, use
6505     * the {@link #getNumericValue(int)} method.
6506     *
6507     * @param   ch      the character to be converted.
6508     * @return  the numeric value of the character, as a nonnegative {@code int}
6509     *           value; -2 if the character has a numeric value that is not a
6510     *          nonnegative integer; -1 if the character has no numeric value.
6511     * @see     Character#forDigit(int, int)
6512     * @see     Character#isDigit(char)
6513     * @since   1.1
6514     */
6515    public static int getNumericValue(char ch) {
6516        return getNumericValue((int)ch);
6517    }
6518
6519    /**
6520     * Returns the {@code int} value that the specified
6521     * character (Unicode code point) represents. For example, the character
6522     * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6523     * an {@code int} with a value of 50.
6524     * <p>
6525     * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6526     * {@code '\u005Cu005A'}), lowercase
6527     * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6528     * full width variant ({@code '\u005CuFF21'} through
6529     * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6530     * {@code '\u005CuFF5A'}) forms have numeric values from 10
6531     * through 35. This is independent of the Unicode specification,
6532     * which does not assign numeric values to these {@code char}
6533     * values.
6534     * <p>
6535     * If the character does not have a numeric value, then -1 is returned.
6536     * If the character has a numeric value that cannot be represented as a
6537     * nonnegative integer (for example, a fractional value), then -2
6538     * is returned.
6539     *
6540     * @param   codePoint the character (Unicode code point) to be converted.
6541     * @return  the numeric value of the character, as a nonnegative {@code int}
6542     *          value; -2 if the character has a numeric value that is not a
6543     *          nonnegative integer; -1 if the character has no numeric value.
6544     * @see     Character#forDigit(int, int)
6545     * @see     Character#isDigit(int)
6546     * @since   1.5
6547     */
6548    public static int getNumericValue(int codePoint) {
6549        // This is both an optimization and papers over differences between Java and ICU.
6550        if (codePoint < 128) {
6551            if (codePoint >= '0' && codePoint <= '9') {
6552                return codePoint - '0';
6553            }
6554            if (codePoint >= 'a' && codePoint <= 'z') {
6555                return codePoint - ('a' - 10);
6556            }
6557            if (codePoint >= 'A' && codePoint <= 'Z') {
6558                return codePoint - ('A' - 10);
6559            }
6560            return -1;
6561        }
6562        // Full-width uppercase A-Z.
6563        if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
6564            return codePoint - 0xff17;
6565        }
6566        // Full-width lowercase a-z.
6567        if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
6568            return codePoint - 0xff37;
6569        }
6570        return getNumericValueImpl(codePoint);
6571    }
6572
6573    native static int getNumericValueImpl(int codePoint);
6574
6575    /**
6576     * Determines if the specified character is ISO-LATIN-1 white space.
6577     * This method returns {@code true} for the following five
6578     * characters only:
6579     * <table>
6580     * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
6581     *     <td>{@code HORIZONTAL TABULATION}</td></tr>
6582     * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
6583     *     <td>{@code NEW LINE}</td></tr>
6584     * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
6585     *     <td>{@code FORM FEED}</td></tr>
6586     * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
6587     *     <td>{@code CARRIAGE RETURN}</td></tr>
6588     * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
6589     *     <td>{@code SPACE}</td></tr>
6590     * </table>
6591     *
6592     * @param      ch   the character to be tested.
6593     * @return     {@code true} if the character is ISO-LATIN-1 white
6594     *             space; {@code false} otherwise.
6595     * @see        Character#isSpaceChar(char)
6596     * @see        Character#isWhitespace(char)
6597     * @deprecated Replaced by isWhitespace(char).
6598     */
6599    @Deprecated
6600    public static boolean isSpace(char ch) {
6601        return (ch <= 0x0020) &&
6602            (((((1L << 0x0009) |
6603            (1L << 0x000A) |
6604            (1L << 0x000C) |
6605            (1L << 0x000D) |
6606            (1L << 0x0020)) >> ch) & 1L) != 0);
6607    }
6608
6609
6610    /**
6611     * Determines if the specified character is a Unicode space character.
6612     * A character is considered to be a space character if and only if
6613     * it is specified to be a space character by the Unicode Standard. This
6614     * method returns true if the character's general category type is any of
6615     * the following:
6616     * <ul>
6617     * <li> {@code SPACE_SEPARATOR}
6618     * <li> {@code LINE_SEPARATOR}
6619     * <li> {@code PARAGRAPH_SEPARATOR}
6620     * </ul>
6621     *
6622     * <p><b>Note:</b> This method cannot handle <a
6623     * href="#supplementary"> supplementary characters</a>. To support
6624     * all Unicode characters, including supplementary characters, use
6625     * the {@link #isSpaceChar(int)} method.
6626     *
6627     * @param   ch      the character to be tested.
6628     * @return  {@code true} if the character is a space character;
6629     *          {@code false} otherwise.
6630     * @see     Character#isWhitespace(char)
6631     * @since   1.1
6632     */
6633    public static boolean isSpaceChar(char ch) {
6634        return isSpaceChar((int)ch);
6635    }
6636
6637    /**
6638     * Determines if the specified character (Unicode code point) is a
6639     * Unicode space character.  A character is considered to be a
6640     * space character if and only if it is specified to be a space
6641     * character by the Unicode Standard. This method returns true if
6642     * the character's general category type is any of the following:
6643     *
6644     * <ul>
6645     * <li> {@link #SPACE_SEPARATOR}
6646     * <li> {@link #LINE_SEPARATOR}
6647     * <li> {@link #PARAGRAPH_SEPARATOR}
6648     * </ul>
6649     *
6650     * @param   codePoint the character (Unicode code point) to be tested.
6651     * @return  {@code true} if the character is a space character;
6652     *          {@code false} otherwise.
6653     * @see     Character#isWhitespace(int)
6654     * @since   1.5
6655     */
6656    public static boolean isSpaceChar(int codePoint) {
6657        // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
6658        // SPACE or NO-BREAK SPACE?
6659        if (codePoint == 0x20 || codePoint == 0xa0) {
6660            return true;
6661        }
6662        if (codePoint < 0x1000) {
6663            return false;
6664        }
6665        // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
6666        if (codePoint == 0x1680 || codePoint == 0x180e) {
6667            return true;
6668        }
6669        if (codePoint < 0x2000) {
6670            return false;
6671        }
6672        if (codePoint <= 0xffff) {
6673            // Other whitespace from General Punctuation...
6674            return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f ||
6675                codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
6676        }
6677        // Let icu4c worry about non-BMP code points.
6678        return isSpaceCharImpl(codePoint);
6679    }
6680
6681    static native boolean isSpaceCharImpl(int codePoint);
6682
6683    /**
6684     * Determines if the specified character is white space according to Java.
6685     * A character is a Java whitespace character if and only if it satisfies
6686     * one of the following criteria:
6687     * <ul>
6688     * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
6689     *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
6690     *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6691     *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6692     * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6693     * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6694     * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6695     * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6696     * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6697     * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6698     * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6699     * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6700     * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6701     * </ul>
6702     *
6703     * <p><b>Note:</b> This method cannot handle <a
6704     * href="#supplementary"> supplementary characters</a>. To support
6705     * all Unicode characters, including supplementary characters, use
6706     * the {@link #isWhitespace(int)} method.
6707     *
6708     * @param   ch the character to be tested.
6709     * @return  {@code true} if the character is a Java whitespace
6710     *          character; {@code false} otherwise.
6711     * @see     Character#isSpaceChar(char)
6712     * @since   1.1
6713     */
6714    public static boolean isWhitespace(char ch) {
6715        return isWhitespace((int)ch);
6716    }
6717
6718    /**
6719     * Determines if the specified character (Unicode code point) is
6720     * white space according to Java.  A character is a Java
6721     * whitespace character if and only if it satisfies one of the
6722     * following criteria:
6723     * <ul>
6724     * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6725     *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6726     *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6727     *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6728     * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6729     * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6730     * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6731     * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6732     * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6733     * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6734     * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6735     * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6736     * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6737     * </ul>
6738     * <p>
6739     *
6740     * @param   codePoint the character (Unicode code point) to be tested.
6741     * @return  {@code true} if the character is a Java whitespace
6742     *          character; {@code false} otherwise.
6743     * @see     Character#isSpaceChar(int)
6744     * @since   1.5
6745     */
6746    public static boolean isWhitespace(int codePoint) {
6747        // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
6748        // Any ASCII whitespace character?
6749        if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) {
6750            return true;
6751        }
6752        if (codePoint < 0x1000) {
6753            return false;
6754        }
6755        // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
6756        if (codePoint == 0x1680 || codePoint == 0x180e) {
6757            return true;
6758        }
6759        if (codePoint < 0x2000) {
6760            return false;
6761        }
6762        // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE).
6763        if (codePoint == 0x2007 || codePoint == 0x202f) {
6764            return false;
6765        }
6766        if (codePoint <= 0xffff) {
6767            // Other whitespace from General Punctuation...
6768            return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f ||
6769                codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
6770        }
6771        // Let icu4c worry about non-BMP code points.
6772        return isWhitespaceImpl(codePoint);
6773    }
6774
6775    native static boolean isWhitespaceImpl(int codePoint);
6776
6777    /**
6778     * Determines if the specified character is an ISO control
6779     * character.  A character is considered to be an ISO control
6780     * character if its code is in the range {@code '\u005Cu0000'}
6781     * through {@code '\u005Cu001F'} or in the range
6782     * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6783     *
6784     * <p><b>Note:</b> This method cannot handle <a
6785     * href="#supplementary"> supplementary characters</a>. To support
6786     * all Unicode characters, including supplementary characters, use
6787     * the {@link #isISOControl(int)} method.
6788     *
6789     * @param   ch      the character to be tested.
6790     * @return  {@code true} if the character is an ISO control character;
6791     *          {@code false} otherwise.
6792     *
6793     * @see     Character#isSpaceChar(char)
6794     * @see     Character#isWhitespace(char)
6795     * @since   1.1
6796     */
6797    public static boolean isISOControl(char ch) {
6798        return isISOControl((int)ch);
6799    }
6800
6801    /**
6802     * Determines if the referenced character (Unicode code point) is an ISO control
6803     * character.  A character is considered to be an ISO control
6804     * character if its code is in the range {@code '\u005Cu0000'}
6805     * through {@code '\u005Cu001F'} or in the range
6806     * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6807     *
6808     * @param   codePoint the character (Unicode code point) to be tested.
6809     * @return  {@code true} if the character is an ISO control character;
6810     *          {@code false} otherwise.
6811     * @see     Character#isSpaceChar(int)
6812     * @see     Character#isWhitespace(int)
6813     * @since   1.5
6814     */
6815    public static boolean isISOControl(int codePoint) {
6816        // Optimized form of:
6817        //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
6818        //     (codePoint >= 0x7F && codePoint <= 0x9F);
6819        return codePoint <= 0x9F &&
6820            (codePoint >= 0x7F || (codePoint >>> 5 == 0));
6821    }
6822
6823    /**
6824     * Returns a value indicating a character's general category.
6825     *
6826     * <p><b>Note:</b> This method cannot handle <a
6827     * href="#supplementary"> supplementary characters</a>. To support
6828     * all Unicode characters, including supplementary characters, use
6829     * the {@link #getType(int)} method.
6830     *
6831     * @param   ch      the character to be tested.
6832     * @return  a value of type {@code int} representing the
6833     *          character's general category.
6834     * @see     Character#COMBINING_SPACING_MARK
6835     * @see     Character#CONNECTOR_PUNCTUATION
6836     * @see     Character#CONTROL
6837     * @see     Character#CURRENCY_SYMBOL
6838     * @see     Character#DASH_PUNCTUATION
6839     * @see     Character#DECIMAL_DIGIT_NUMBER
6840     * @see     Character#ENCLOSING_MARK
6841     * @see     Character#END_PUNCTUATION
6842     * @see     Character#FINAL_QUOTE_PUNCTUATION
6843     * @see     Character#FORMAT
6844     * @see     Character#INITIAL_QUOTE_PUNCTUATION
6845     * @see     Character#LETTER_NUMBER
6846     * @see     Character#LINE_SEPARATOR
6847     * @see     Character#LOWERCASE_LETTER
6848     * @see     Character#MATH_SYMBOL
6849     * @see     Character#MODIFIER_LETTER
6850     * @see     Character#MODIFIER_SYMBOL
6851     * @see     Character#NON_SPACING_MARK
6852     * @see     Character#OTHER_LETTER
6853     * @see     Character#OTHER_NUMBER
6854     * @see     Character#OTHER_PUNCTUATION
6855     * @see     Character#OTHER_SYMBOL
6856     * @see     Character#PARAGRAPH_SEPARATOR
6857     * @see     Character#PRIVATE_USE
6858     * @see     Character#SPACE_SEPARATOR
6859     * @see     Character#START_PUNCTUATION
6860     * @see     Character#SURROGATE
6861     * @see     Character#TITLECASE_LETTER
6862     * @see     Character#UNASSIGNED
6863     * @see     Character#UPPERCASE_LETTER
6864     * @since   1.1
6865     */
6866    public static int getType(char ch) {
6867        return getType((int)ch);
6868    }
6869
6870    /**
6871     * Returns a value indicating a character's general category.
6872     *
6873     * @param   codePoint the character (Unicode code point) to be tested.
6874     * @return  a value of type {@code int} representing the
6875     *          character's general category.
6876     * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
6877     * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
6878     * @see     Character#CONTROL CONTROL
6879     * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
6880     * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
6881     * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
6882     * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
6883     * @see     Character#END_PUNCTUATION END_PUNCTUATION
6884     * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
6885     * @see     Character#FORMAT FORMAT
6886     * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
6887     * @see     Character#LETTER_NUMBER LETTER_NUMBER
6888     * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
6889     * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
6890     * @see     Character#MATH_SYMBOL MATH_SYMBOL
6891     * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
6892     * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
6893     * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
6894     * @see     Character#OTHER_LETTER OTHER_LETTER
6895     * @see     Character#OTHER_NUMBER OTHER_NUMBER
6896     * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
6897     * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
6898     * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
6899     * @see     Character#PRIVATE_USE PRIVATE_USE
6900     * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
6901     * @see     Character#START_PUNCTUATION START_PUNCTUATION
6902     * @see     Character#SURROGATE SURROGATE
6903     * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
6904     * @see     Character#UNASSIGNED UNASSIGNED
6905     * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
6906     * @since   1.5
6907     */
6908    public static int getType(int codePoint) {
6909        int type = getTypeImpl(codePoint);
6910        // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
6911        if (type <= Character.FORMAT) {
6912            return type;
6913        }
6914        return (type + 1);
6915    }
6916
6917    static native int getTypeImpl(int codePoint);
6918
6919    /**
6920     * Determines the character representation for a specific digit in
6921     * the specified radix. If the value of {@code radix} is not a
6922     * valid radix, or the value of {@code digit} is not a valid
6923     * digit in the specified radix, the null character
6924     * ({@code '\u005Cu0000'}) is returned.
6925     * <p>
6926     * The {@code radix} argument is valid if it is greater than or
6927     * equal to {@code MIN_RADIX} and less than or equal to
6928     * {@code MAX_RADIX}. The {@code digit} argument is valid if
6929     * {@code 0 <= digit < radix}.
6930     * <p>
6931     * If the digit is less than 10, then
6932     * {@code '0' + digit} is returned. Otherwise, the value
6933     * {@code 'a' + digit - 10} is returned.
6934     *
6935     * @param   digit   the number to convert to a character.
6936     * @param   radix   the radix.
6937     * @return  the {@code char} representation of the specified digit
6938     *          in the specified radix.
6939     * @see     Character#MIN_RADIX
6940     * @see     Character#MAX_RADIX
6941     * @see     Character#digit(char, int)
6942     */
6943    public static char forDigit(int digit, int radix) {
6944        if ((digit >= radix) || (digit < 0)) {
6945            return '\0';
6946        }
6947        if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
6948            return '\0';
6949        }
6950        if (digit < 10) {
6951            return (char)('0' + digit);
6952        }
6953        return (char)('a' - 10 + digit);
6954    }
6955
6956    /**
6957     * Returns the Unicode directionality property for the given
6958     * character.  Character directionality is used to calculate the
6959     * visual ordering of text. The directionality value of undefined
6960     * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
6961     *
6962     * <p><b>Note:</b> This method cannot handle <a
6963     * href="#supplementary"> supplementary characters</a>. To support
6964     * all Unicode characters, including supplementary characters, use
6965     * the {@link #getDirectionality(int)} method.
6966     *
6967     * @param  ch {@code char} for which the directionality property
6968     *            is requested.
6969     * @return the directionality property of the {@code char} value.
6970     *
6971     * @see Character#DIRECTIONALITY_UNDEFINED
6972     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
6973     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
6974     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6975     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
6976     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6977     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6978     * @see Character#DIRECTIONALITY_ARABIC_NUMBER
6979     * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6980     * @see Character#DIRECTIONALITY_NONSPACING_MARK
6981     * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
6982     * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
6983     * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
6984     * @see Character#DIRECTIONALITY_WHITESPACE
6985     * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
6986     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6987     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6988     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6989     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
6990     * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
6991     * @since 1.4
6992     */
6993    public static byte getDirectionality(char ch) {
6994        return getDirectionality((int)ch);
6995    }
6996
6997    /**
6998     * Returns the Unicode directionality property for the given
6999     * character (Unicode code point).  Character directionality is
7000     * used to calculate the visual ordering of text. The
7001     * directionality value of undefined character is {@link
7002     * #DIRECTIONALITY_UNDEFINED}.
7003     *
7004     * @param   codePoint the character (Unicode code point) for which
7005     *          the directionality property is requested.
7006     * @return the directionality property of the character.
7007     *
7008     * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
7009     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
7010     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
7011     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7012     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
7013     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7014     * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7015     * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
7016     * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7017     * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
7018     * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
7019     * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
7020     * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
7021     * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
7022     * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
7023     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7024     * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7025     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7026     * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7027     * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7028     * @since    1.5
7029     */
7030    public static byte getDirectionality(int codePoint) {
7031        if (getType(codePoint) == Character.UNASSIGNED) {
7032            return Character.DIRECTIONALITY_UNDEFINED;
7033        }
7034
7035        byte directionality = getDirectionalityImpl(codePoint);
7036        if (directionality >= 0 && directionality < DIRECTIONALITY.length) {
7037            return DIRECTIONALITY[directionality];
7038        }
7039        return Character.DIRECTIONALITY_UNDEFINED;
7040    }
7041
7042    native static byte getDirectionalityImpl(int codePoint);
7043    /**
7044     * Determines whether the character is mirrored according to the
7045     * Unicode specification.  Mirrored characters should have their
7046     * glyphs horizontally mirrored when displayed in text that is
7047     * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
7048     * PARENTHESIS is semantically defined to be an <i>opening
7049     * parenthesis</i>.  This will appear as a "(" in text that is
7050     * left-to-right but as a ")" in text that is right-to-left.
7051     *
7052     * <p><b>Note:</b> This method cannot handle <a
7053     * href="#supplementary"> supplementary characters</a>. To support
7054     * all Unicode characters, including supplementary characters, use
7055     * the {@link #isMirrored(int)} method.
7056     *
7057     * @param  ch {@code char} for which the mirrored property is requested
7058     * @return {@code true} if the char is mirrored, {@code false}
7059     *         if the {@code char} is not mirrored or is not defined.
7060     * @since 1.4
7061     */
7062    public static boolean isMirrored(char ch) {
7063        return isMirrored((int)ch);
7064    }
7065
7066    /**
7067     * Determines whether the specified character (Unicode code point)
7068     * is mirrored according to the Unicode specification.  Mirrored
7069     * characters should have their glyphs horizontally mirrored when
7070     * displayed in text that is right-to-left.  For example,
7071     * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
7072     * defined to be an <i>opening parenthesis</i>.  This will appear
7073     * as a "(" in text that is left-to-right but as a ")" in text
7074     * that is right-to-left.
7075     *
7076     * @param   codePoint the character (Unicode code point) to be tested.
7077     * @return  {@code true} if the character is mirrored, {@code false}
7078     *          if the character is not mirrored or is not defined.
7079     * @since   1.5
7080     */
7081    public static boolean isMirrored(int codePoint) {
7082        return isMirroredImpl(codePoint);
7083    }
7084
7085    native static boolean isMirroredImpl(int codePoint);
7086    /**
7087     * Compares two {@code Character} objects numerically.
7088     *
7089     * @param   anotherCharacter   the {@code Character} to be compared.
7090
7091     * @return  the value {@code 0} if the argument {@code Character}
7092     *          is equal to this {@code Character}; a value less than
7093     *          {@code 0} if this {@code Character} is numerically less
7094     *          than the {@code Character} argument; and a value greater than
7095     *          {@code 0} if this {@code Character} is numerically greater
7096     *          than the {@code Character} argument (unsigned comparison).
7097     *          Note that this is strictly a numerical comparison; it is not
7098     *          locale-dependent.
7099     * @since   1.2
7100     */
7101    public int compareTo(Character anotherCharacter) {
7102        return compare(this.value, anotherCharacter.value);
7103    }
7104
7105    /**
7106     * Compares two {@code char} values numerically.
7107     * The value returned is identical to what would be returned by:
7108     * <pre>
7109     *    Character.valueOf(x).compareTo(Character.valueOf(y))
7110     * </pre>
7111     *
7112     * @param  x the first {@code char} to compare
7113     * @param  y the second {@code char} to compare
7114     * @return the value {@code 0} if {@code x == y};
7115     *         a value less than {@code 0} if {@code x < y}; and
7116     *         a value greater than {@code 0} if {@code x > y}
7117     * @since 1.7
7118     */
7119    public static int compare(char x, char y) {
7120        return x - y;
7121    }
7122
7123    /**
7124     * The number of bits used to represent a <tt>char</tt> value in unsigned
7125     * binary form, constant {@code 16}.
7126     *
7127     * @since 1.5
7128     */
7129    public static final int SIZE = 16;
7130
7131    /**
7132     * The number of bytes used to represent a {@code char} value in unsigned
7133     * binary form.
7134     *
7135     * @since 1.8
7136     */
7137    public static final int BYTES = SIZE / Byte.SIZE;
7138
7139    /**
7140     * Returns the value obtained by reversing the order of the bytes in the
7141     * specified <tt>char</tt> value.
7142     *
7143     * @return the value obtained by reversing (or, equivalently, swapping)
7144     *     the bytes in the specified <tt>char</tt> value.
7145     * @since 1.5
7146     */
7147    public static char reverseBytes(char ch) {
7148        return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
7149    }
7150
7151    /**
7152     * Returns the Unicode name of the specified character
7153     * {@code codePoint}, or null if the code point is
7154     * {@link #UNASSIGNED unassigned}.
7155     * <p>
7156     * Note: if the specified character is not assigned a name by
7157     * the <i>UnicodeData</i> file (part of the Unicode Character
7158     * Database maintained by the Unicode Consortium), the returned
7159     * name is the same as the result of expression.
7160     *
7161     * <blockquote>{@code
7162     *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
7163     *     + " "
7164     *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7165     *
7166     * }</blockquote>
7167     *
7168     * @param  codePoint the character (Unicode code point)
7169     *
7170     * @return the Unicode name of the specified character, or null if
7171     *         the code point is unassigned.
7172     *
7173     * @exception IllegalArgumentException if the specified
7174     *            {@code codePoint} is not a valid Unicode
7175     *            code point.
7176     *
7177     * @since 1.7
7178     */
7179    public static String getName(int codePoint) {
7180        if (!isValidCodePoint(codePoint)) {
7181            throw new IllegalArgumentException();
7182        }
7183        String name = getNameImpl(codePoint);
7184        if (name != null)
7185            return name;
7186        if (getType(codePoint) == UNASSIGNED)
7187            return null;
7188        UnicodeBlock block = UnicodeBlock.of(codePoint);
7189        if (block != null)
7190            return block.toString().replace('_', ' ') + " "
7191                   + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7192        // should never come here
7193        return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7194    }
7195
7196    private static native String getNameImpl(int codePoint);
7197}
7198