Character.java revision 48819fe0b9130618a430ec52b3f8526c4c0a5f8a
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.lang; 19 20import java.io.Serializable; 21import java.util.Arrays; 22 23/** 24 * The wrapper for the primitive type {@code char}. This class also provides a 25 * number of utility methods for working with characters. 26 * 27 * <p>Character data is kept up to date as Unicode evolves. 28 * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of 29 * the {@code Locale} documentation for details of the Unicode versions implemented by current 30 * and historical Android releases. 31 * 32 * <p>The Unicode specification, character tables, and other information are available at 33 * <a href="http://www.unicode.org/">http://www.unicode.org/</a>. 34 * 35 * <p>Unicode characters are referred to as <i>code points</i>. The range of valid 36 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i> 37 * is the code point range U+0000 to U+FFFF. Characters above the BMP are 38 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16 39 * encoding and {@code char} pairs are used to represent code points in the 40 * supplementary range. A pair of {@code char} values that represent a 41 * supplementary character are made up of a <i>high surrogate</i> with a value 42 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of 43 * 0xDC00 to 0xDFFF. 44 * <p> 45 * On the Java platform a {@code char} value represents either a single BMP code 46 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type 47 * is used to represent all Unicode code points. 48 * 49 * <a name="unicode_categories"><h3>Unicode categories</h3></a> 50 * <p>Here's a list of the Unicode character categories and the corresponding Java constant, 51 * grouped semantically to provide a convenient overview. This table is also useful in 52 * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}. 53 * <span class="datatable"> 54 * <style type="text/css"> 55 * .datatable td { padding-right: 20px; } 56 * </style> 57 * <p><table> 58 * <tr> <td> Cn </td> <td> Unassigned </td> <td>{@link #UNASSIGNED}</td> </tr> 59 * <tr> <td> Cc </td> <td> Control </td> <td>{@link #CONTROL}</td> </tr> 60 * <tr> <td> Cf </td> <td> Format </td> <td>{@link #FORMAT}</td> </tr> 61 * <tr> <td> Co </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr> 62 * <tr> <td> Cs </td> <td> Surrogate </td> <td>{@link #SURROGATE}</td> </tr> 63 * <tr> <td><br></td> </tr> 64 * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr> 65 * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr> 66 * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr> 67 * <tr> <td> Lm </td> <td> Modifier letter </td> <td>{@link #MODIFIER_LETTER}</td> </tr> 68 * <tr> <td> Lo </td> <td> Other letter </td> <td>{@link #OTHER_LETTER}</td> </tr> 69 * <tr> <td><br></td> </tr> 70 * <tr> <td> Mn </td> <td> Non-spacing mark </td> <td>{@link #NON_SPACING_MARK}</td> </tr> 71 * <tr> <td> Me </td> <td> Enclosing mark </td> <td>{@link #ENCLOSING_MARK}</td> </tr> 72 * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr> 73 * <tr> <td><br></td> </tr> 74 * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr> 75 * <tr> <td> Nl </td> <td> Letter number </td> <td>{@link #LETTER_NUMBER}</td> </tr> 76 * <tr> <td> No </td> <td> Other number </td> <td>{@link #OTHER_NUMBER}</td> </tr> 77 * <tr> <td><br></td> </tr> 78 * <tr> <td> Pd </td> <td> Dash punctuation </td> <td>{@link #DASH_PUNCTUATION}</td> </tr> 79 * <tr> <td> Ps </td> <td> Start punctuation </td> <td>{@link #START_PUNCTUATION}</td> </tr> 80 * <tr> <td> Pe </td> <td> End punctuation </td> <td>{@link #END_PUNCTUATION}</td> </tr> 81 * <tr> <td> Pc </td> <td> Connector punctuation </td> <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr> 82 * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr> 83 * <tr> <td> Pf </td> <td> Final quote punctuation </td> <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr> 84 * <tr> <td> Po </td> <td> Other punctuation </td> <td>{@link #OTHER_PUNCTUATION}</td> </tr> 85 * <tr> <td><br></td> </tr> 86 * <tr> <td> Sm </td> <td> Math symbol </td> <td>{@link #MATH_SYMBOL}</td> </tr> 87 * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr> 88 * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr> 89 * <tr> <td> So </td> <td> Other symbol </td> <td>{@link #OTHER_SYMBOL}</td> </tr> 90 * <tr> <td><br></td> </tr> 91 * <tr> <td> Zs </td> <td> Space separator </td> <td>{@link #SPACE_SEPARATOR}</td> </tr> 92 * <tr> <td> Zl </td> <td> Line separator </td> <td>{@link #LINE_SEPARATOR}</td> </tr> 93 * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr> 94 * </table> 95 * </span> 96 * 97 * @since 1.0 98 */ 99@FindBugsSuppressWarnings("DM_NUMBER_CTOR") 100public final class Character implements Serializable, Comparable<Character> { 101 private static final long serialVersionUID = 3786198910865385080L; 102 103 private final char value; 104 105 /** 106 * The minimum {@code Character} value. 107 */ 108 public static final char MIN_VALUE = '\u0000'; 109 110 /** 111 * The maximum {@code Character} value. 112 */ 113 public static final char MAX_VALUE = '\uffff'; 114 115 /** 116 * The minimum radix used for conversions between characters and integers. 117 */ 118 public static final int MIN_RADIX = 2; 119 120 /** 121 * The maximum radix used for conversions between characters and integers. 122 */ 123 public static final int MAX_RADIX = 36; 124 125 /** 126 * The {@link Class} object that represents the primitive type {@code char}. 127 */ 128 @SuppressWarnings("unchecked") 129 public static final Class<Character> TYPE 130 = (Class<Character>) char[].class.getComponentType(); 131 // Note: Character.TYPE can't be set to "char.class", since *that* is 132 // defined to be "java.lang.Character.TYPE"; 133 134 /** 135 * Unicode category constant Cn. 136 */ 137 public static final byte UNASSIGNED = 0; 138 139 /** 140 * Unicode category constant Lu. 141 */ 142 public static final byte UPPERCASE_LETTER = 1; 143 144 /** 145 * Unicode category constant Ll. 146 */ 147 public static final byte LOWERCASE_LETTER = 2; 148 149 /** 150 * Unicode category constant Lt. 151 */ 152 public static final byte TITLECASE_LETTER = 3; 153 154 /** 155 * Unicode category constant Lm. 156 */ 157 public static final byte MODIFIER_LETTER = 4; 158 159 /** 160 * Unicode category constant Lo. 161 */ 162 public static final byte OTHER_LETTER = 5; 163 164 /** 165 * Unicode category constant Mn. 166 */ 167 public static final byte NON_SPACING_MARK = 6; 168 169 /** 170 * Unicode category constant Me. 171 */ 172 public static final byte ENCLOSING_MARK = 7; 173 174 /** 175 * Unicode category constant Mc. 176 */ 177 public static final byte COMBINING_SPACING_MARK = 8; 178 179 /** 180 * Unicode category constant Nd. 181 */ 182 public static final byte DECIMAL_DIGIT_NUMBER = 9; 183 184 /** 185 * Unicode category constant Nl. 186 */ 187 public static final byte LETTER_NUMBER = 10; 188 189 /** 190 * Unicode category constant No. 191 */ 192 public static final byte OTHER_NUMBER = 11; 193 194 /** 195 * Unicode category constant Zs. 196 */ 197 public static final byte SPACE_SEPARATOR = 12; 198 199 /** 200 * Unicode category constant Zl. 201 */ 202 public static final byte LINE_SEPARATOR = 13; 203 204 /** 205 * Unicode category constant Zp. 206 */ 207 public static final byte PARAGRAPH_SEPARATOR = 14; 208 209 /** 210 * Unicode category constant Cc. 211 */ 212 public static final byte CONTROL = 15; 213 214 /** 215 * Unicode category constant Cf. 216 */ 217 public static final byte FORMAT = 16; 218 219 /** 220 * Unicode category constant Co. 221 */ 222 public static final byte PRIVATE_USE = 18; 223 224 /** 225 * Unicode category constant Cs. 226 */ 227 public static final byte SURROGATE = 19; 228 229 /** 230 * Unicode category constant Pd. 231 */ 232 public static final byte DASH_PUNCTUATION = 20; 233 234 /** 235 * Unicode category constant Ps. 236 */ 237 public static final byte START_PUNCTUATION = 21; 238 239 /** 240 * Unicode category constant Pe. 241 */ 242 public static final byte END_PUNCTUATION = 22; 243 244 /** 245 * Unicode category constant Pc. 246 */ 247 public static final byte CONNECTOR_PUNCTUATION = 23; 248 249 /** 250 * Unicode category constant Po. 251 */ 252 public static final byte OTHER_PUNCTUATION = 24; 253 254 /** 255 * Unicode category constant Sm. 256 */ 257 public static final byte MATH_SYMBOL = 25; 258 259 /** 260 * Unicode category constant Sc. 261 */ 262 public static final byte CURRENCY_SYMBOL = 26; 263 264 /** 265 * Unicode category constant Sk. 266 */ 267 public static final byte MODIFIER_SYMBOL = 27; 268 269 /** 270 * Unicode category constant So. 271 */ 272 public static final byte OTHER_SYMBOL = 28; 273 274 /** 275 * Unicode category constant Pi. 276 * 277 * @since 1.4 278 */ 279 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 280 281 /** 282 * Unicode category constant Pf. 283 * 284 * @since 1.4 285 */ 286 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 287 288 /** 289 * Unicode bidirectional constant. 290 * 291 * @since 1.4 292 */ 293 public static final byte DIRECTIONALITY_UNDEFINED = -1; 294 295 /** 296 * Unicode bidirectional constant L. 297 * 298 * @since 1.4 299 */ 300 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 301 302 /** 303 * Unicode bidirectional constant R. 304 * 305 * @since 1.4 306 */ 307 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 308 309 /** 310 * Unicode bidirectional constant AL. 311 * 312 * @since 1.4 313 */ 314 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 315 316 /** 317 * Unicode bidirectional constant EN. 318 * 319 * @since 1.4 320 */ 321 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 322 323 /** 324 * Unicode bidirectional constant ES. 325 * 326 * @since 1.4 327 */ 328 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 329 330 /** 331 * Unicode bidirectional constant ET. 332 * 333 * @since 1.4 334 */ 335 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 336 337 /** 338 * Unicode bidirectional constant AN. 339 * 340 * @since 1.4 341 */ 342 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 343 344 /** 345 * Unicode bidirectional constant CS. 346 * 347 * @since 1.4 348 */ 349 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 350 351 /** 352 * Unicode bidirectional constant NSM. 353 * 354 * @since 1.4 355 */ 356 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 357 358 /** 359 * Unicode bidirectional constant BN. 360 * 361 * @since 1.4 362 */ 363 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 364 365 /** 366 * Unicode bidirectional constant B. 367 * 368 * @since 1.4 369 */ 370 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 371 372 /** 373 * Unicode bidirectional constant S. 374 * 375 * @since 1.4 376 */ 377 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 378 379 /** 380 * Unicode bidirectional constant WS. 381 * 382 * @since 1.4 383 */ 384 public static final byte DIRECTIONALITY_WHITESPACE = 12; 385 386 /** 387 * Unicode bidirectional constant ON. 388 * 389 * @since 1.4 390 */ 391 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 392 393 /** 394 * Unicode bidirectional constant LRE. 395 * 396 * @since 1.4 397 */ 398 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 399 400 /** 401 * Unicode bidirectional constant LRO. 402 * 403 * @since 1.4 404 */ 405 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 406 407 /** 408 * Unicode bidirectional constant RLE. 409 * 410 * @since 1.4 411 */ 412 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 413 414 /** 415 * Unicode bidirectional constant RLO. 416 * 417 * @since 1.4 418 */ 419 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 420 421 /** 422 * Unicode bidirectional constant PDF. 423 * 424 * @since 1.4 425 */ 426 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 427 428 /** 429 * The minimum value of a high surrogate or leading surrogate unit in UTF-16 430 * encoding, {@code '\uD800'}. 431 * 432 * @since 1.5 433 */ 434 public static final char MIN_HIGH_SURROGATE = '\uD800'; 435 436 /** 437 * The maximum value of a high surrogate or leading surrogate unit in UTF-16 438 * encoding, {@code '\uDBFF'}. 439 * 440 * @since 1.5 441 */ 442 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 443 444 /** 445 * The minimum value of a low surrogate or trailing surrogate unit in UTF-16 446 * encoding, {@code '\uDC00'}. 447 * 448 * @since 1.5 449 */ 450 public static final char MIN_LOW_SURROGATE = '\uDC00'; 451 452 /** 453 * The maximum value of a low surrogate or trailing surrogate unit in UTF-16 454 * encoding, {@code '\uDFFF'}. 455 * 456 * @since 1.5 457 */ 458 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 459 460 /** 461 * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}. 462 * 463 * @since 1.5 464 */ 465 public static final char MIN_SURROGATE = '\uD800'; 466 467 /** 468 * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}. 469 * 470 * @since 1.5 471 */ 472 public static final char MAX_SURROGATE = '\uDFFF'; 473 474 /** 475 * The minimum value of a supplementary code point, {@code U+010000}. 476 * 477 * @since 1.5 478 */ 479 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; 480 481 /** 482 * The minimum code point value, {@code U+0000}. 483 * 484 * @since 1.5 485 */ 486 public static final int MIN_CODE_POINT = 0x000000; 487 488 /** 489 * The maximum code point value, {@code U+10FFFF}. 490 * 491 * @since 1.5 492 */ 493 public static final int MAX_CODE_POINT = 0x10FFFF; 494 495 /** 496 * The number of bits required to represent a {@code Character} value 497 * unsigned form. 498 * 499 * @since 1.5 500 */ 501 public static final int SIZE = 16; 502 503 private static final byte[] DIRECTIONALITY = new byte[] { 504 DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT, 505 DIRECTIONALITY_EUROPEAN_NUMBER, 506 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, 507 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, 508 DIRECTIONALITY_ARABIC_NUMBER, 509 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, 510 DIRECTIONALITY_PARAGRAPH_SEPARATOR, 511 DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE, 512 DIRECTIONALITY_OTHER_NEUTRALS, 513 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, 514 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, 515 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, 516 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, 517 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, 518 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, 519 DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL }; 520 521 /* 522 * Represents a subset of the Unicode character set. 523 */ 524 public static class Subset { 525 private final String name; 526 527 /** 528 * Constructs a new {@code Subset}. 529 */ 530 protected Subset(String name) { 531 if (name == null) { 532 throw new NullPointerException("name == null"); 533 } 534 this.name = name; 535 } 536 537 /** 538 * Compares this character subset for identity with the specified object. 539 */ 540 @Override public final boolean equals(Object object) { 541 return object == this; 542 } 543 544 /** 545 * Returns this subset's hash code, which is the hash code computed by 546 * {@link java.lang.Object#hashCode()}. 547 */ 548 @Override public final int hashCode() { 549 return super.hashCode(); 550 } 551 552 /** 553 * Returns this subset's name. 554 */ 555 @Override public final String toString() { 556 return name; 557 } 558 } 559 560 /** 561 * Represents a block of Unicode characters. This class provides constants for various 562 * well-known blocks (but not all blocks) and methods for looking up a block 563 * by name {@link #forName} or by code point {@link #of}. 564 * 565 * @since 1.2 566 */ 567 public static final class UnicodeBlock extends Subset { 568 /** 569 * The Surrogates Area Unicode block. 570 * 571 * @deprecated As of Java 5, this block has been replaced by 572 * {@link #HIGH_SURROGATES}, 573 * {@link #HIGH_PRIVATE_USE_SURROGATES} and 574 * {@link #LOW_SURROGATES}. 575 */ 576 @Deprecated 577 public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA"); 578 579 /** The Basic Latin Unicode block. */ 580 public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN"); 581 582 /** The Latin-1 Supplement Unicode block. */ 583 public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT"); 584 585 /** The Latin Extended-A Unicode block. */ 586 public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A"); 587 588 /** The Latin Extended-B Unicode block. */ 589 public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B"); 590 591 /** The IPA Extensions Unicode block. */ 592 public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS"); 593 594 /** The Spacing Modifier Letters Unicode block. */ 595 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS"); 596 597 /** The Combining Diacritical Marks Unicode block. */ 598 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS"); 599 600 /** 601 * The Greek and Coptic Unicode block. Previously referred to as Greek. 602 */ 603 public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK"); 604 605 /** The Cyrillic Unicode block. */ 606 public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC"); 607 608 /** 609 * The Cyrillic Supplement Unicode block. Previously referred to as Cyrillic Supplementary. 610 */ 611 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY"); 612 613 /** The Armenian Unicode block. */ 614 public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN"); 615 616 /** The Hebrew Unicode block. */ 617 public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW"); 618 619 /** The Arabic Unicode block. */ 620 public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC"); 621 622 /** The Syriac Unicode block. */ 623 public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC"); 624 625 /** The Thaana Unicode block. */ 626 public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA"); 627 628 /** The Devanagari Unicode block. */ 629 public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI"); 630 631 /** The Bengali Unicode block. */ 632 public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI"); 633 634 /** The Gurmukhi Unicode block. */ 635 public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI"); 636 637 /** The Gujarati Unicode block. */ 638 public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI"); 639 640 /** The Oriya Unicode block. */ 641 public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA"); 642 643 /** The Tamil Unicode block. */ 644 public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL"); 645 646 /** The Telugu Unicode block. */ 647 public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU"); 648 649 /** The Kannada Unicode block. */ 650 public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA"); 651 652 /** The Malayalam Unicode block. */ 653 public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM"); 654 655 /** The Sinhala Unicode block. */ 656 public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA"); 657 658 /** The Thai Unicode block. */ 659 public static final UnicodeBlock THAI = new UnicodeBlock("THAI"); 660 661 /** The Lao Unicode block. */ 662 public static final UnicodeBlock LAO = new UnicodeBlock("LAO"); 663 664 /** The Tibetan Unicode block. */ 665 public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN"); 666 667 /** The Myanmar Unicode block. */ 668 public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR"); 669 670 /** The Georgian Unicode block. */ 671 public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN"); 672 673 /** The Hangul Jamo Unicode block. */ 674 public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO"); 675 676 /** The Ethiopic Unicode block. */ 677 public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC"); 678 679 /** The Cherokee Unicode block. */ 680 public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE"); 681 682 /** The Unified Canadian Aboriginal Syllabics Unicode block. */ 683 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS"); 684 685 /** The Ogham Unicode block. */ 686 public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM"); 687 688 /** The Runic Unicode block. */ 689 public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC"); 690 691 /** The Tagalog Unicode block. */ 692 public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG"); 693 694 /** The Hanunoo Unicode block. */ 695 public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO"); 696 697 /** The Buhid Unicode block. */ 698 public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID"); 699 700 /** The Tagbanwa Unicode block. */ 701 public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA"); 702 703 /** The Khmer Unicode block. */ 704 public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER"); 705 706 /** The Mongolian Unicode block. */ 707 public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN"); 708 709 /** The Limbu Unicode block. */ 710 public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU"); 711 712 /** The Tai Le Unicode block. */ 713 public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE"); 714 715 /** The Khmer Symbols Unicode block. */ 716 public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS"); 717 718 /** The Phonetic Extensions Unicode block. */ 719 public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS"); 720 721 /** The Latin Extended Additional Unicode block. */ 722 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL"); 723 724 /** The Greek Extended Unicode block. */ 725 public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED"); 726 727 /** The General Punctuation Unicode block. */ 728 public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION"); 729 730 /** The Superscripts and Subscripts Unicode block. */ 731 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS"); 732 733 /** The Currency Symbols Unicode block. */ 734 public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS"); 735 736 /** 737 * The Combining Diacritical Marks for Symbols Unicode 738 * Block. Previously referred to as Combining Marks for 739 * Symbols. 740 */ 741 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS"); 742 743 /** The Letterlike Symbols Unicode block. */ 744 public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS"); 745 746 /** The Number Forms Unicode block. */ 747 public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS"); 748 749 /** The Arrows Unicode block. */ 750 public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS"); 751 752 /** The Mathematical Operators Unicode block. */ 753 public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS"); 754 755 /** The Miscellaneous Technical Unicode block. */ 756 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL"); 757 758 /** The Control Pictures Unicode block. */ 759 public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES"); 760 761 /** The Optical Character Recognition Unicode block. */ 762 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION"); 763 764 /** The Enclosed Alphanumerics Unicode block. */ 765 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS"); 766 767 /** The Box Drawing Unicode block. */ 768 public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING"); 769 770 /** The Block Elements Unicode block. */ 771 public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS"); 772 773 /** The Geometric Shapes Unicode block. */ 774 public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES"); 775 776 /** The Miscellaneous Symbols Unicode block. */ 777 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS"); 778 779 /** The Dingbats Unicode block. */ 780 public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS"); 781 782 /** The Miscellaneous Mathematical Symbols-A Unicode block. */ 783 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A"); 784 785 /** The Supplemental Arrows-A Unicode block. */ 786 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A"); 787 788 /** The Braille Patterns Unicode block. */ 789 public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS"); 790 791 /** The Supplemental Arrows-B Unicode block. */ 792 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B"); 793 794 /** The Miscellaneous Mathematical Symbols-B Unicode block. */ 795 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B"); 796 797 /** The Supplemental Mathematical Operators Unicode block. */ 798 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS"); 799 800 /** The Miscellaneous Symbols and Arrows Unicode block. */ 801 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS"); 802 803 /** The CJK Radicals Supplement Unicode block. */ 804 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT"); 805 806 /** The Kangxi Radicals Unicode block. */ 807 public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS"); 808 809 /** The Ideographic Description Characters Unicode block. */ 810 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS"); 811 812 /** The CJK Symbols and Punctuation Unicode block. */ 813 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION"); 814 815 /** The Hiragana Unicode block. */ 816 public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA"); 817 818 /** The Katakana Unicode block. */ 819 public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA"); 820 821 /** The Bopomofo Unicode block. */ 822 public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO"); 823 824 /** The Hangul Compatibility Jamo Unicode block. */ 825 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO"); 826 827 /** The Kanbun Unicode block. */ 828 public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN"); 829 830 /** The Bopomofo Extended Unicode block. */ 831 public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED"); 832 833 /** The Katakana Phonetic Extensions Unicode block. */ 834 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS"); 835 836 /** The Enclosed CJK Letters and Months Unicode block. */ 837 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS"); 838 839 /** The CJK Compatibility Unicode block. */ 840 public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY"); 841 842 /** The CJK Unified Ideographs Extension A Unicode block. */ 843 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A"); 844 845 /** The Yijing Hexagram Symbols Unicode block. */ 846 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS"); 847 848 /** The CJK Unified Ideographs Unicode block. */ 849 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS"); 850 851 /** The Yi Syllables Unicode block. */ 852 public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES"); 853 854 /** The Yi Radicals Unicode block. */ 855 public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS"); 856 857 /** The Hangul Syllables Unicode block. */ 858 public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES"); 859 860 /** 861 * The High Surrogates Unicode block. This block represents 862 * code point values in the high surrogate range 0xD800 to 0xDB7F 863 */ 864 public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES"); 865 866 /** 867 * The High Private Use Surrogates Unicode block. This block 868 * represents code point values in the high surrogate range 0xDB80 to 869 * 0xDBFF 870 */ 871 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES"); 872 873 /** 874 * The Low Surrogates Unicode block. This block represents 875 * code point values in the low surrogate range 0xDC00 to 0xDFFF 876 */ 877 public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES"); 878 879 /** The Private Use Area Unicode block. */ 880 public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA"); 881 882 /** The CJK Compatibility Ideographs Unicode block. */ 883 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS"); 884 885 /** The Alphabetic Presentation Forms Unicode block. */ 886 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS"); 887 888 /** The Arabic Presentation Forms-A Unicode block. */ 889 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A"); 890 891 /** The Variation Selectors Unicode block. */ 892 public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS"); 893 894 /** The Combining Half Marks Unicode block. */ 895 public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS"); 896 897 /** The CJK Compatibility Forms Unicode block. */ 898 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS"); 899 900 /** The Small Form Variants Unicode block. */ 901 public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS"); 902 903 /** The Arabic Presentation Forms-B Unicode block. */ 904 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B"); 905 906 /** The Halfwidth and Fullwidth Forms Unicode block. */ 907 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS"); 908 909 /** The Specials Unicode block. */ 910 public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS"); 911 912 /** The Linear B Syllabary Unicode block. */ 913 public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY"); 914 915 /** The Linear B Ideograms Unicode block. */ 916 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS"); 917 918 /** The Aegean Numbers Unicode block. */ 919 public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS"); 920 921 /** The Old Italic Unicode block. */ 922 public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC"); 923 924 /** The Gothic Unicode block. */ 925 public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC"); 926 927 /** The Ugaritic Unicode block. */ 928 public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC"); 929 930 /** The Deseret Unicode block. */ 931 public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET"); 932 933 /** The Shavian Unicode block. */ 934 public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN"); 935 936 /** The Osmanya Unicode block. */ 937 public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA"); 938 939 /** The Cypriot Syllabary Unicode block. */ 940 public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY"); 941 942 /** The Byzantine Musical Symbols Unicode block. */ 943 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS"); 944 945 /** The Musical Symbols Unicode block. */ 946 public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS"); 947 948 /** The Tai Xuan Jing Symbols Unicode block. */ 949 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS"); 950 951 /** The Mathematical Alphanumeric Symbols Unicode block. */ 952 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS"); 953 954 /** The CJK Unified Ideographs Extension B Unicode block. */ 955 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B"); 956 957 /** The CJK Compatibility Ideographs Supplement Unicode block. */ 958 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT"); 959 960 /** The Tags Unicode block. */ 961 public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS"); 962 963 /** The Variation Selectors Supplement Unicode block. */ 964 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT"); 965 966 /** The Supplementary Private Use Area-A Unicode block. */ 967 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A"); 968 969 /** The Supplementary Private Use Area-B Unicode block. */ 970 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B"); 971 972 // Unicode 4.1. 973 974 /** The Ancient Greek Musical Notation Unicode 4.1 block. */ 975 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION"); 976 977 /** The Ancient Greek Numbers Unicode 4.1 block. */ 978 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = new UnicodeBlock("ANCIENT_GREEK_NUMBERS"); 979 980 /** The Arabic Supplement Unicode 4.1 block. */ 981 public static final UnicodeBlock ARABIC_SUPPLEMENT = new UnicodeBlock("ARABIC_SUPPLEMENT"); 982 983 /** The Buginese Unicode 4.1 block. */ 984 public static final UnicodeBlock BUGINESE = new UnicodeBlock("BUGINESE"); 985 986 /** The CJK Strokes Unicode 4.1 block. */ 987 public static final UnicodeBlock CJK_STROKES = new UnicodeBlock("CJK_STROKES"); 988 989 /** The Combining Diacritical Marks Supplement Unicode 4.1 block. */ 990 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT"); 991 992 /** The Coptic Unicode 4.1 block. */ 993 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC"); 994 995 /** The Ethiopic Extended Unicode 4.1 block. */ 996 public static final UnicodeBlock ETHIOPIC_EXTENDED = new UnicodeBlock("ETHIOPIC_EXTENDED"); 997 998 /** The Ethiopic Supplement Unicode 4.1 block. */ 999 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = new UnicodeBlock("ETHIOPIC_SUPPLEMENT"); 1000 1001 /** The Georgian Supplement Unicode 4.1 block. */ 1002 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = new UnicodeBlock("GEORGIAN_SUPPLEMENT"); 1003 1004 /** The Glagolitic Unicode 4.1 block. */ 1005 public static final UnicodeBlock GLAGOLITIC = new UnicodeBlock("GLAGOLITIC"); 1006 1007 /** The Kharoshthi Unicode 4.1 block. */ 1008 public static final UnicodeBlock KHAROSHTHI = new UnicodeBlock("KHAROSHTHI"); 1009 1010 /** The Modifier Tone Letters Unicode 4.1 block. */ 1011 public static final UnicodeBlock MODIFIER_TONE_LETTERS = new UnicodeBlock("MODIFIER_TONE_LETTERS"); 1012 1013 /** The New Tai Lue Unicode 4.1 block. */ 1014 public static final UnicodeBlock NEW_TAI_LUE = new UnicodeBlock("NEW_TAI_LUE"); 1015 1016 /** The Old Persian Unicode 4.1 block. */ 1017 public static final UnicodeBlock OLD_PERSIAN = new UnicodeBlock("OLD_PERSIAN"); 1018 1019 /** The Phonetic Extensions Supplement Unicode 4.1 block. */ 1020 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT"); 1021 1022 /** The Supplemental Punctuation Unicode 4.1 block. */ 1023 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION"); 1024 1025 /** The Syloti Nagri Unicode 4.1 block. */ 1026 public static final UnicodeBlock SYLOTI_NAGRI = new UnicodeBlock("SYLOTI_NAGRI"); 1027 1028 /** The Tifinagh Unicode 4.1 block. */ 1029 public static final UnicodeBlock TIFINAGH = new UnicodeBlock("TIFINAGH"); 1030 1031 /** The Vertical Forms Unicode 4.1 block. */ 1032 public static final UnicodeBlock VERTICAL_FORMS = new UnicodeBlock("VERTICAL_FORMS"); 1033 1034 // Unicode 5.0. 1035 1036 /** The NKo Unicode 5.0 block. */ 1037 public static final UnicodeBlock NKO = new UnicodeBlock("NKO"); 1038 1039 /** The Balinese Unicode 5.0 block. */ 1040 public static final UnicodeBlock BALINESE = new UnicodeBlock("BALINESE"); 1041 1042 /** The Latin Extended C Unicode 5.0 block. */ 1043 public static final UnicodeBlock LATIN_EXTENDED_C = new UnicodeBlock("LATIN_EXTENDED_C"); 1044 1045 /** The Latin Extended D Unicode 5.0 block. */ 1046 public static final UnicodeBlock LATIN_EXTENDED_D = new UnicodeBlock("LATIN_EXTENDED_D"); 1047 1048 /** The Phags-pa Unicode 5.0 block. */ 1049 public static final UnicodeBlock PHAGS_PA = new UnicodeBlock("PHAGS_PA"); 1050 1051 /** The Phoenician Unicode 5.0 block. */ 1052 public static final UnicodeBlock PHOENICIAN = new UnicodeBlock("PHOENICIAN"); 1053 1054 /** The Cuneiform Unicode 5.0 block. */ 1055 public static final UnicodeBlock CUNEIFORM = new UnicodeBlock("CUNEIFORM"); 1056 1057 /** The Cuneiform Numbers And Punctuation Unicode 5.0 block. */ 1058 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION"); 1059 1060 /** The Counting Rod Numerals Unicode 5.0 block. */ 1061 public static final UnicodeBlock COUNTING_ROD_NUMERALS = new UnicodeBlock("COUNTING_ROD_NUMERALS"); 1062 1063 // Unicode 5.1. 1064 1065 /** The Sudanese Unicode 5.1 block. */ 1066 public static final UnicodeBlock SUNDANESE = new UnicodeBlock("SUNDANESE"); 1067 1068 /** The Lepcha Unicode 5.1 block. */ 1069 public static final UnicodeBlock LEPCHA = new UnicodeBlock("LEPCHA"); 1070 1071 /** The Ol Chiki Unicode 5.1 block. */ 1072 public static final UnicodeBlock OL_CHIKI = new UnicodeBlock("OL_CHIKI"); 1073 1074 /** The Cyrillic Extended-A Unicode 5.1 block. */ 1075 public static final UnicodeBlock CYRILLIC_EXTENDED_A = new UnicodeBlock("CYRILLIC_EXTENDED_A"); 1076 1077 /** The Vai Unicode 5.1 block. */ 1078 public static final UnicodeBlock VAI = new UnicodeBlock("VAI"); 1079 1080 /** The Cyrillic Extended-B Unicode 5.1 block. */ 1081 public static final UnicodeBlock CYRILLIC_EXTENDED_B = new UnicodeBlock("CYRILLIC_EXTENDED_B"); 1082 1083 /** The Saurashtra Unicode 5.1 block. */ 1084 public static final UnicodeBlock SAURASHTRA = new UnicodeBlock("SAURASHTRA"); 1085 1086 /** The Kayah Li Unicode 5.1 block. */ 1087 public static final UnicodeBlock KAYAH_LI = new UnicodeBlock("KAYAH_LI"); 1088 1089 /** The Rejang Unicode 5.1 block. */ 1090 public static final UnicodeBlock REJANG = new UnicodeBlock("REJANG"); 1091 1092 /** The Cham Unicode 5.1 block. */ 1093 public static final UnicodeBlock CHAM = new UnicodeBlock("CHAM"); 1094 1095 /** The Ancient Symbols Unicode 5.1 block. */ 1096 public static final UnicodeBlock ANCIENT_SYMBOLS = new UnicodeBlock("ANCIENT_SYMBOLS"); 1097 1098 /** The Phaistos Disc Unicode 5.1 block. */ 1099 public static final UnicodeBlock PHAISTOS_DISC = new UnicodeBlock("PHAISTOS_DISC"); 1100 1101 /** The Lycian Unicode 5.1 block. */ 1102 public static final UnicodeBlock LYCIAN = new UnicodeBlock("LYCIAN"); 1103 1104 /** The Carian Unicode 5.1 block. */ 1105 public static final UnicodeBlock CARIAN = new UnicodeBlock("CARIAN"); 1106 1107 /** The Lydian Unicode 5.1 block. */ 1108 public static final UnicodeBlock LYDIAN = new UnicodeBlock("LYDIAN"); 1109 1110 /** The Mahjong Tiles Unicode 5.1 block. */ 1111 public static final UnicodeBlock MAHJONG_TILES = new UnicodeBlock("MAHJONG_TILES"); 1112 1113 /** The Domino Tiles Unicode 5.1 block. */ 1114 public static final UnicodeBlock DOMINO_TILES = new UnicodeBlock("DOMINO_TILES"); 1115 1116 // Unicode 5.2. 1117 1118 /** The Samaritan Unicode 5.2 block. */ 1119 public static final UnicodeBlock SAMARITAN = new UnicodeBlock("SAMARITAN"); 1120 1121 /** The Unified Canadian Aboriginal Syllabics Expanded Unicode 5.2 block. */ 1122 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED"); 1123 1124 /** The Tai Tham Unicode 5.2 block. */ 1125 public static final UnicodeBlock TAI_THAM = new UnicodeBlock("TAI_THAM"); 1126 1127 /** The Vedic Extensions Unicode 5.2 block. */ 1128 public static final UnicodeBlock VEDIC_EXTENSIONS = new UnicodeBlock("VEDIC_EXTENSIONS"); 1129 1130 /** The Lisu Extensions Unicode 5.2 block. */ 1131 public static final UnicodeBlock LISU = new UnicodeBlock("LISU"); 1132 1133 /** The Bamum Extensions Unicode 5.2 block. */ 1134 public static final UnicodeBlock BAMUM = new UnicodeBlock("BAMUM"); 1135 1136 /** The Common Indic Number Forms Unicode 5.2 block. */ 1137 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS"); 1138 1139 /** The Devanagari Extended Unicode 5.2 block. */ 1140 public static final UnicodeBlock DEVANAGARI_EXTENDED = new UnicodeBlock("DEVANAGARI_EXTENDED"); 1141 1142 /** The Hangul Jamo Extended-A Unicode 5.2 block. */ 1143 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = new UnicodeBlock("HANGUL_JAMO_EXTENDED_A"); 1144 1145 /** The Javanese Unicode 5.2 block. */ 1146 public static final UnicodeBlock JAVANESE = new UnicodeBlock("JAVANESE"); 1147 1148 /** The Myanmar Extended-A Unicode 5.2 block. */ 1149 public static final UnicodeBlock MYANMAR_EXTENDED_A = new UnicodeBlock("MYANMAR_EXTENDED_A"); 1150 1151 /** The Tai Viet Unicode 5.2 block. */ 1152 public static final UnicodeBlock TAI_VIET = new UnicodeBlock("TAI_VIET"); 1153 1154 /** The Meetei Mayek Unicode 5.2 block. */ 1155 public static final UnicodeBlock MEETEI_MAYEK = new UnicodeBlock("MEETEI_MAYEK"); 1156 1157 /** The Hangul Jamo Extended-B Unicode 5.2 block. */ 1158 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = new UnicodeBlock("HANGUL_JAMO_EXTENDED_B"); 1159 1160 /** The Imperial Aramaic Unicode 5.2 block. */ 1161 public static final UnicodeBlock IMPERIAL_ARAMAIC = new UnicodeBlock("IMPERIAL_ARAMAIC"); 1162 1163 /** The Old South Arabian Unicode 5.2 block. */ 1164 public static final UnicodeBlock OLD_SOUTH_ARABIAN = new UnicodeBlock("OLD_SOUTH_ARABIAN"); 1165 1166 /** The Avestan Unicode 5.2 block. */ 1167 public static final UnicodeBlock AVESTAN = new UnicodeBlock("AVESTAN"); 1168 1169 /** The Inscriptional Pathian Unicode 5.2 block. */ 1170 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = new UnicodeBlock("INSCRIPTIONAL_PARTHIAN"); 1171 1172 /** The Inscriptional Pahlavi Unicode 5.2 block. */ 1173 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = new UnicodeBlock("INSCRIPTIONAL_PAHLAVI"); 1174 1175 /** The Old Turkic Unicode 5.2 block. */ 1176 public static final UnicodeBlock OLD_TURKIC = new UnicodeBlock("OLD_TURKIC"); 1177 1178 /** The Rumi Numeral Symbols Unicode 5.2 block. */ 1179 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = new UnicodeBlock("RUMI_NUMERAL_SYMBOLS"); 1180 1181 /** The Kaithi Unicode 5.2 block. */ 1182 public static final UnicodeBlock KAITHI = new UnicodeBlock("KAITHI"); 1183 1184 /** The Egyptian Hieroglyphs Unicode 5.2 block. */ 1185 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = new UnicodeBlock("EGYPTIAN_HIEROGLYPHS"); 1186 1187 /** The Enclosed Alphanumeric Supplement Unicode 5.2 block. */ 1188 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT"); 1189 1190 /** The Enclosed Ideographic Supplement Unicode 5.2 block. */ 1191 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT"); 1192 1193 /** The CJK Unified Ideographs Unicode 5.2 block. */ 1194 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C"); 1195 1196 // Unicode 6.0. 1197 1198 /** The Mandaic Unicode 6.0 block. */ 1199 public static final UnicodeBlock MANDAIC = new UnicodeBlock("MANDAIC"); 1200 1201 /** The Batak Unicode 6.0 block. */ 1202 public static final UnicodeBlock BATAK = new UnicodeBlock("BATAK"); 1203 1204 /** The Ethiopic Extended-A Unicode 6.0 block. */ 1205 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = new UnicodeBlock("ETHIOPIC_EXTENDED_A"); 1206 1207 /** The Brahmi Unicode 6.0 block. */ 1208 public static final UnicodeBlock BRAHMI = new UnicodeBlock("BRAHMI"); 1209 1210 /** The Bamum Supplement Unicode 6.0 block. */ 1211 public static final UnicodeBlock BAMUM_SUPPLEMENT = new UnicodeBlock("BAMUM_SUPPLEMENT"); 1212 1213 /** The Kana Supplement Unicode 6.0 block. */ 1214 public static final UnicodeBlock KANA_SUPPLEMENT = new UnicodeBlock("KANA_SUPPLEMENT"); 1215 1216 /** The Playing Cards Supplement Unicode 6.0 block. */ 1217 public static final UnicodeBlock PLAYING_CARDS = new UnicodeBlock("PLAYING_CARDS"); 1218 1219 /** The Miscellaneous Symbols And Pictographs Supplement Unicode 6.0 block. */ 1220 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS"); 1221 1222 /** The Emoticons Unicode 6.0 block. */ 1223 public static final UnicodeBlock EMOTICONS = new UnicodeBlock("EMOTICONS"); 1224 1225 /** The Transport And Map Symbols Unicode 6.0 block. */ 1226 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS"); 1227 1228 /** The Alchemical Symbols Unicode 6.0 block. */ 1229 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = new UnicodeBlock("ALCHEMICAL_SYMBOLS"); 1230 1231 /** The CJK Unified Ideographs Extension-D Unicode 6.0 block. */ 1232 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D"); 1233 1234 /* 1235 * All of the UnicodeBlocks above, in the icu4c UBlock enum order. 1236 */ 1237 private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] { 1238 null, // icu4c numbers blocks starting at 1, so index 0 should be null. 1239 1240 UnicodeBlock.BASIC_LATIN, 1241 UnicodeBlock.LATIN_1_SUPPLEMENT, 1242 UnicodeBlock.LATIN_EXTENDED_A, 1243 UnicodeBlock.LATIN_EXTENDED_B, 1244 UnicodeBlock.IPA_EXTENSIONS, 1245 UnicodeBlock.SPACING_MODIFIER_LETTERS, 1246 UnicodeBlock.COMBINING_DIACRITICAL_MARKS, 1247 UnicodeBlock.GREEK, 1248 UnicodeBlock.CYRILLIC, 1249 UnicodeBlock.ARMENIAN, 1250 UnicodeBlock.HEBREW, 1251 UnicodeBlock.ARABIC, 1252 UnicodeBlock.SYRIAC, 1253 UnicodeBlock.THAANA, 1254 UnicodeBlock.DEVANAGARI, 1255 UnicodeBlock.BENGALI, 1256 UnicodeBlock.GURMUKHI, 1257 UnicodeBlock.GUJARATI, 1258 UnicodeBlock.ORIYA, 1259 UnicodeBlock.TAMIL, 1260 UnicodeBlock.TELUGU, 1261 UnicodeBlock.KANNADA, 1262 UnicodeBlock.MALAYALAM, 1263 UnicodeBlock.SINHALA, 1264 UnicodeBlock.THAI, 1265 UnicodeBlock.LAO, 1266 UnicodeBlock.TIBETAN, 1267 UnicodeBlock.MYANMAR, 1268 UnicodeBlock.GEORGIAN, 1269 UnicodeBlock.HANGUL_JAMO, 1270 UnicodeBlock.ETHIOPIC, 1271 UnicodeBlock.CHEROKEE, 1272 UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 1273 UnicodeBlock.OGHAM, 1274 UnicodeBlock.RUNIC, 1275 UnicodeBlock.KHMER, 1276 UnicodeBlock.MONGOLIAN, 1277 UnicodeBlock.LATIN_EXTENDED_ADDITIONAL, 1278 UnicodeBlock.GREEK_EXTENDED, 1279 UnicodeBlock.GENERAL_PUNCTUATION, 1280 UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS, 1281 UnicodeBlock.CURRENCY_SYMBOLS, 1282 UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS, 1283 UnicodeBlock.LETTERLIKE_SYMBOLS, 1284 UnicodeBlock.NUMBER_FORMS, 1285 UnicodeBlock.ARROWS, 1286 UnicodeBlock.MATHEMATICAL_OPERATORS, 1287 UnicodeBlock.MISCELLANEOUS_TECHNICAL, 1288 UnicodeBlock.CONTROL_PICTURES, 1289 UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION, 1290 UnicodeBlock.ENCLOSED_ALPHANUMERICS, 1291 UnicodeBlock.BOX_DRAWING, 1292 UnicodeBlock.BLOCK_ELEMENTS, 1293 UnicodeBlock.GEOMETRIC_SHAPES, 1294 UnicodeBlock.MISCELLANEOUS_SYMBOLS, 1295 UnicodeBlock.DINGBATS, 1296 UnicodeBlock.BRAILLE_PATTERNS, 1297 UnicodeBlock.CJK_RADICALS_SUPPLEMENT, 1298 UnicodeBlock.KANGXI_RADICALS, 1299 UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 1300 UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION, 1301 UnicodeBlock.HIRAGANA, 1302 UnicodeBlock.KATAKANA, 1303 UnicodeBlock.BOPOMOFO, 1304 UnicodeBlock.HANGUL_COMPATIBILITY_JAMO, 1305 UnicodeBlock.KANBUN, 1306 UnicodeBlock.BOPOMOFO_EXTENDED, 1307 UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS, 1308 UnicodeBlock.CJK_COMPATIBILITY, 1309 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 1310 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS, 1311 UnicodeBlock.YI_SYLLABLES, 1312 UnicodeBlock.YI_RADICALS, 1313 UnicodeBlock.HANGUL_SYLLABLES, 1314 UnicodeBlock.HIGH_SURROGATES, 1315 UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES, 1316 UnicodeBlock.LOW_SURROGATES, 1317 UnicodeBlock.PRIVATE_USE_AREA, 1318 UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS, 1319 UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS, 1320 UnicodeBlock.ARABIC_PRESENTATION_FORMS_A, 1321 UnicodeBlock.COMBINING_HALF_MARKS, 1322 UnicodeBlock.CJK_COMPATIBILITY_FORMS, 1323 UnicodeBlock.SMALL_FORM_VARIANTS, 1324 UnicodeBlock.ARABIC_PRESENTATION_FORMS_B, 1325 UnicodeBlock.SPECIALS, 1326 UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS, 1327 1328 // Unicode 3.1. 1329 UnicodeBlock.OLD_ITALIC, 1330 UnicodeBlock.GOTHIC, 1331 UnicodeBlock.DESERET, 1332 UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS, 1333 UnicodeBlock.MUSICAL_SYMBOLS, 1334 UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 1335 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 1336 UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 1337 UnicodeBlock.TAGS, 1338 1339 // Unicode 3.2. 1340 UnicodeBlock.CYRILLIC_SUPPLEMENTARY, 1341 UnicodeBlock.TAGALOG, 1342 UnicodeBlock.HANUNOO, 1343 UnicodeBlock.BUHID, 1344 UnicodeBlock.TAGBANWA, 1345 UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 1346 UnicodeBlock.SUPPLEMENTAL_ARROWS_A, 1347 UnicodeBlock.SUPPLEMENTAL_ARROWS_B, 1348 UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 1349 UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 1350 UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS, 1351 UnicodeBlock.VARIATION_SELECTORS, 1352 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A, 1353 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B, 1354 1355 // Unicode 4.0. 1356 UnicodeBlock.LIMBU, 1357 UnicodeBlock.TAI_LE, 1358 UnicodeBlock.KHMER_SYMBOLS, 1359 UnicodeBlock.PHONETIC_EXTENSIONS, 1360 UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS, 1361 UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS, 1362 UnicodeBlock.LINEAR_B_SYLLABARY, 1363 UnicodeBlock.LINEAR_B_IDEOGRAMS, 1364 UnicodeBlock.AEGEAN_NUMBERS, 1365 UnicodeBlock.UGARITIC, 1366 UnicodeBlock.SHAVIAN, 1367 UnicodeBlock.OSMANYA, 1368 UnicodeBlock.CYPRIOT_SYLLABARY, 1369 UnicodeBlock.TAI_XUAN_JING_SYMBOLS, 1370 UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT, 1371 1372 // Unicode 4.1. 1373 UnicodeBlock.ANCIENT_GREEK_MUSICAL_NOTATION, 1374 UnicodeBlock.ANCIENT_GREEK_NUMBERS, 1375 UnicodeBlock.ARABIC_SUPPLEMENT, 1376 UnicodeBlock.BUGINESE, 1377 UnicodeBlock.CJK_STROKES, 1378 UnicodeBlock.COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 1379 UnicodeBlock.COPTIC, 1380 UnicodeBlock.ETHIOPIC_EXTENDED, 1381 UnicodeBlock.ETHIOPIC_SUPPLEMENT, 1382 UnicodeBlock.GEORGIAN_SUPPLEMENT, 1383 UnicodeBlock.GLAGOLITIC, 1384 UnicodeBlock.KHAROSHTHI, 1385 UnicodeBlock.MODIFIER_TONE_LETTERS, 1386 UnicodeBlock.NEW_TAI_LUE, 1387 UnicodeBlock.OLD_PERSIAN, 1388 UnicodeBlock.PHONETIC_EXTENSIONS_SUPPLEMENT, 1389 UnicodeBlock.SUPPLEMENTAL_PUNCTUATION, 1390 UnicodeBlock.SYLOTI_NAGRI, 1391 UnicodeBlock.TIFINAGH, 1392 UnicodeBlock.VERTICAL_FORMS, 1393 1394 // Unicode 5.0. 1395 UnicodeBlock.NKO, 1396 UnicodeBlock.BALINESE, 1397 UnicodeBlock.LATIN_EXTENDED_C, 1398 UnicodeBlock.LATIN_EXTENDED_D, 1399 UnicodeBlock.PHAGS_PA, 1400 UnicodeBlock.PHOENICIAN, 1401 UnicodeBlock.CUNEIFORM, 1402 UnicodeBlock.CUNEIFORM_NUMBERS_AND_PUNCTUATION, 1403 UnicodeBlock.COUNTING_ROD_NUMERALS, 1404 1405 // Unicode 5.1. 1406 UnicodeBlock.SUNDANESE, 1407 UnicodeBlock.LEPCHA, 1408 UnicodeBlock.OL_CHIKI, 1409 UnicodeBlock.CYRILLIC_EXTENDED_A, 1410 UnicodeBlock.VAI, 1411 UnicodeBlock.CYRILLIC_EXTENDED_B, 1412 UnicodeBlock.SAURASHTRA, 1413 UnicodeBlock.KAYAH_LI, 1414 UnicodeBlock.REJANG, 1415 UnicodeBlock.CHAM, 1416 UnicodeBlock.ANCIENT_SYMBOLS, 1417 UnicodeBlock.PHAISTOS_DISC, 1418 UnicodeBlock.LYCIAN, 1419 UnicodeBlock.CARIAN, 1420 UnicodeBlock.LYDIAN, 1421 UnicodeBlock.MAHJONG_TILES, 1422 UnicodeBlock.DOMINO_TILES, 1423 1424 // Unicode 5.2. 1425 UnicodeBlock.SAMARITAN, 1426 UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 1427 UnicodeBlock.TAI_THAM, 1428 UnicodeBlock.VEDIC_EXTENSIONS, 1429 UnicodeBlock.LISU, 1430 UnicodeBlock.BAMUM, 1431 UnicodeBlock.COMMON_INDIC_NUMBER_FORMS, 1432 UnicodeBlock.DEVANAGARI_EXTENDED, 1433 UnicodeBlock.HANGUL_JAMO_EXTENDED_A, 1434 UnicodeBlock.JAVANESE, 1435 UnicodeBlock.MYANMAR_EXTENDED_A, 1436 UnicodeBlock.TAI_VIET, 1437 UnicodeBlock.MEETEI_MAYEK, 1438 UnicodeBlock.HANGUL_JAMO_EXTENDED_B, 1439 UnicodeBlock.IMPERIAL_ARAMAIC, 1440 UnicodeBlock.OLD_SOUTH_ARABIAN, 1441 UnicodeBlock.AVESTAN, 1442 UnicodeBlock.INSCRIPTIONAL_PARTHIAN, 1443 UnicodeBlock.INSCRIPTIONAL_PAHLAVI, 1444 UnicodeBlock.OLD_TURKIC, 1445 UnicodeBlock.RUMI_NUMERAL_SYMBOLS, 1446 UnicodeBlock.KAITHI, 1447 UnicodeBlock.EGYPTIAN_HIEROGLYPHS, 1448 UnicodeBlock.ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 1449 UnicodeBlock.ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 1450 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 1451 1452 // Unicode 6.0. 1453 UnicodeBlock.MANDAIC, 1454 UnicodeBlock.BATAK, 1455 UnicodeBlock.ETHIOPIC_EXTENDED_A, 1456 UnicodeBlock.BRAHMI, 1457 UnicodeBlock.BAMUM_SUPPLEMENT, 1458 UnicodeBlock.KANA_SUPPLEMENT, 1459 UnicodeBlock.PLAYING_CARDS, 1460 UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 1461 UnicodeBlock.EMOTICONS, 1462 UnicodeBlock.TRANSPORT_AND_MAP_SYMBOLS, 1463 UnicodeBlock.ALCHEMICAL_SYMBOLS, 1464 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 1465 }; 1466 1467 /** 1468 * Returns the Unicode block for the given block name, or null if there is no 1469 * such block. 1470 * 1471 * <p>Block names may be one of the following: 1472 * <ul> 1473 * <li>Canonical block name, as defined by the Unicode specification; 1474 * case-insensitive.</li> 1475 * <li>Canonical block name without any spaces, as defined by the 1476 * Unicode specification; case-insensitive.</li> 1477 * <li>A {@code UnicodeBlock} constant identifier. This is determined by 1478 * converting the canonical name to uppercase and replacing all spaces and hyphens 1479 * with underscores.</li> 1480 * </ul> 1481 * 1482 * @throws NullPointerException 1483 * if {@code blockName == null}. 1484 * @throws IllegalArgumentException 1485 * if {@code blockName} is not the name of any known block. 1486 * @since 1.5 1487 */ 1488 public static UnicodeBlock forName(String blockName) { 1489 if (blockName == null) { 1490 throw new NullPointerException("blockName == null"); 1491 } 1492 int block = unicodeBlockForName(blockName); 1493 if (block == -1) { 1494 throw new IllegalArgumentException("Unknown block: " + blockName); 1495 } 1496 return BLOCKS[block]; 1497 } 1498 1499 /** 1500 * Returns the Unicode block containing the given code point, or null if the 1501 * code point does not belong to any known block. 1502 */ 1503 public static UnicodeBlock of(char c) { 1504 return of((int) c); 1505 } 1506 1507 /** 1508 * Returns the Unicode block containing the given code point, or null if the 1509 * code point does not belong to any known block. 1510 */ 1511 public static UnicodeBlock of(int codePoint) { 1512 checkValidCodePoint(codePoint); 1513 int block = unicodeBlockForCodePoint(codePoint); 1514 if (block == -1 || block >= BLOCKS.length) { 1515 return null; 1516 } 1517 return BLOCKS[block]; 1518 } 1519 1520 private UnicodeBlock(String blockName) { 1521 super(blockName); 1522 } 1523 } 1524 1525 private static native int unicodeBlockForName(String blockName); 1526 1527 private static native int unicodeBlockForCodePoint(int codePoint); 1528 1529 /** 1530 * Represents a <a href="http://www.unicode.org/reports/tr24/">Unicode script</a>. 1531 * Every Unicode code point is contained by a single {@code UnicodeScript}. Code points 1532 * shared between scripts will be in {@code COMMON}. Code points for combining 1533 * characters that can be applied to multiple scripts will be in {@code INHERITED} 1534 * because they inherit the script of their base character. Code points whose scripts 1535 * don't have a corresponding {@code UnicodeScript} will be in {@code UNKNOWN}. 1536 * 1537 * @since 1.7 1538 * @hide 1539 */ 1540 public static enum UnicodeScript { 1541 /** ISO 15924 English name "Arabic" */ 1542 ARABIC, 1543 /** ISO 15924 English name "Armenian" */ 1544 ARMENIAN, 1545 /** ISO 15924 English name "Avestan" */ 1546 AVESTAN, 1547 /** ISO 15924 English name "Balinese" */ 1548 BALINESE, 1549 /** ISO 15924 English name "Bamum" */ 1550 BAMUM, 1551 /** ISO 15924 English name "Batak" */ 1552 BATAK, 1553 /** ISO 15924 English name "Bengali" */ 1554 BENGALI, 1555 /** ISO 15924 English name "Bopomofo" */ 1556 BOPOMOFO, 1557 /** ISO 15924 English name "Brahmi" */ 1558 BRAHMI, 1559 /** ISO 15924 English name "Braille" */ 1560 BRAILLE, 1561 /** ISO 15924 English name "Buginese" */ 1562 BUGINESE, 1563 /** ISO 15924 English name "Buhid" */ 1564 BUHID, 1565 /** ISO 15924 English name "Unified Canadian Aboriginal Syllabics" */ 1566 CANADIAN_ABORIGINAL, 1567 /** ISO 15924 English name "Carian" */ 1568 CARIAN, 1569 /** ISO 15924 English name "Cham" */ 1570 CHAM, 1571 /** ISO 15924 English name "Cherokee" */ 1572 CHEROKEE, 1573 /** ISO 15924 English name "Common" */ 1574 COMMON, 1575 /** ISO 15924 English name "Coptic" */ 1576 COPTIC, 1577 /** ISO 15924 English name "Cuneiform" */ 1578 CUNEIFORM, 1579 /** ISO 15924 English name "Cypriot" */ 1580 CYPRIOT, 1581 /** ISO 15924 English name "Cyrillic" */ 1582 CYRILLIC, 1583 /** ISO 15924 English name "Deseret" */ 1584 DESERET, 1585 /** ISO 15924 English name "Devanagari" */ 1586 DEVANAGARI, 1587 /** ISO 15924 English name "Egyptian hieroglyphs" */ 1588 EGYPTIAN_HIEROGLYPHS, 1589 /** ISO 15924 English name "Ethiopic" */ 1590 ETHIOPIC, 1591 /** ISO 15924 English name "Georgian" */ 1592 GEORGIAN, 1593 /** ISO 15924 English name "Glagolitic" */ 1594 GLAGOLITIC, 1595 /** ISO 15924 English name "Gothic" */ 1596 GOTHIC, 1597 /** ISO 15924 English name "Greek" */ 1598 GREEK, 1599 /** ISO 15924 English name "Gujarati" */ 1600 GUJARATI, 1601 /** ISO 15924 English name "Gurmukhi" */ 1602 GURMUKHI, 1603 /** ISO 15924 English name "Han" */ 1604 HAN, 1605 /** ISO 15924 English name "Hangul" */ 1606 HANGUL, 1607 /** ISO 15924 English name "Hanunoo" */ 1608 HANUNOO, 1609 /** ISO 15924 English name "Hebrew" */ 1610 HEBREW, 1611 /** ISO 15924 English name "Hiragana" */ 1612 HIRAGANA, 1613 /** ISO 15924 English name "Imperial aramaic" */ 1614 IMPERIAL_ARAMAIC, 1615 /** ISO 15924 English name "Inherited" */ 1616 INHERITED, 1617 /** ISO 15924 English name "Inscriptional pahlavi" */ 1618 INSCRIPTIONAL_PAHLAVI, 1619 /** ISO 15924 English name "Inscriptional parthian" */ 1620 INSCRIPTIONAL_PARTHIAN, 1621 /** ISO 15924 English name "Javanese" */ 1622 JAVANESE, 1623 /** ISO 15924 English name "Kaithi" */ 1624 KAITHI, 1625 /** ISO 15924 English name "Kannada" */ 1626 KANNADA, 1627 /** ISO 15924 English name "Katakana" */ 1628 KATAKANA, 1629 /** ISO 15924 English name "Kayah li" */ 1630 KAYAH_LI, 1631 /** ISO 15924 English name "Kharoshthi" */ 1632 KHAROSHTHI, 1633 /** ISO 15924 English name "Khmer" */ 1634 KHMER, 1635 /** ISO 15924 English name "Lao" */ 1636 LAO, 1637 /** ISO 15924 English name "Latin" */ 1638 LATIN, 1639 /** ISO 15924 English name "Lepcha" */ 1640 LEPCHA, 1641 /** ISO 15924 English name "Limbu" */ 1642 LIMBU, 1643 /** ISO 15924 English name "Linear B" */ 1644 LINEAR_B, 1645 /** ISO 15924 English name "Lisu" */ 1646 LISU, 1647 /** ISO 15924 English name "Lycian" */ 1648 LYCIAN, 1649 /** ISO 15924 English name "Lydian" */ 1650 LYDIAN, 1651 /** ISO 15924 English name "Malayalam" */ 1652 MALAYALAM, 1653 /** ISO 15924 English name "Mandaic" */ 1654 MANDAIC, 1655 /** ISO 15924 English name "Meetei Mayek (Meithei, Meetei)" */ 1656 MEETEI_MAYEK, 1657 /** ISO 15924 English name "Mongolian" */ 1658 MONGOLIAN, 1659 /** ISO 15924 English name "Myanmar" */ 1660 MYANMAR, 1661 /** ISO 15924 English name "New Tai Lue" */ 1662 NEW_TAI_LUE, 1663 /** ISO 15924 English name "Nko" */ 1664 NKO, 1665 /** ISO 15924 English name "Ogham" */ 1666 OGHAM, 1667 /** ISO 15924 English name "Ol Chiki" */ 1668 OL_CHIKI, 1669 /** ISO 15924 English name "Old Italic" */ 1670 OLD_ITALIC, 1671 /** ISO 15924 English name "Old Persian" */ 1672 OLD_PERSIAN, 1673 /** ISO 15924 English name "Old South Arabian" */ 1674 OLD_SOUTH_ARABIAN, 1675 /** ISO 15924 English name "Old Turkic, Orkhon Runic" */ 1676 OLD_TURKIC, 1677 /** ISO 15924 English name "Oriya" */ 1678 ORIYA, 1679 /** ISO 15924 English name "Osmanya" */ 1680 OSMANYA, 1681 /** ISO 15924 English name "Phags-pa" */ 1682 PHAGS_PA, 1683 /** ISO 15924 English name "Phoenician" */ 1684 PHOENICIAN, 1685 /** ISO 15924 English name "Rejang" */ 1686 REJANG, 1687 /** ISO 15924 English name "Runic" */ 1688 RUNIC, 1689 /** ISO 15924 English name "Samaritan" */ 1690 SAMARITAN, 1691 /** ISO 15924 English name "Saurashtra" */ 1692 SAURASHTRA, 1693 /** ISO 15924 English name "Shavian" */ 1694 SHAVIAN, 1695 /** ISO 15924 English name "Sinhala" */ 1696 SINHALA, 1697 /** ISO 15924 English name "Sundanese" */ 1698 SUNDANESE, 1699 /** ISO 15924 English name "Syloti Nagri" */ 1700 SYLOTI_NAGRI, 1701 /** ISO 15924 English name "Syriac" */ 1702 SYRIAC, 1703 /** ISO 15924 English name "Tagalog" */ 1704 TAGALOG, 1705 /** ISO 15924 English name "Tagbanwa" */ 1706 TAGBANWA, 1707 /** ISO 15924 English name "Tai Le" */ 1708 TAI_LE, 1709 /** ISO 15924 English name "Tai Tham (Lanna)" */ 1710 TAI_THAM, 1711 /** ISO 15924 English name "Tai Viet" */ 1712 TAI_VIET, 1713 /** ISO 15924 English name "Tamil" */ 1714 TAMIL, 1715 /** ISO 15924 English name "Telugu" */ 1716 TELUGU, 1717 /** ISO 15924 English name "Thaana" */ 1718 THAANA, 1719 /** ISO 15924 English name "Thai" */ 1720 THAI, 1721 /** ISO 15924 English name "Tibetan" */ 1722 TIBETAN, 1723 /** ISO 15924 English name "Tifinagh" */ 1724 TIFINAGH, 1725 /** ISO 15924 English name "Ugaritic" */ 1726 UGARITIC, 1727 /** ISO 15924 English name "Unknown" */ 1728 UNKNOWN, 1729 /** ISO 15924 English name "Vai" */ 1730 VAI, 1731 /** ISO 15924 English name "Yi" */ 1732 YI; 1733 1734 private static final UnicodeScript[] SCRIPTS = { 1735 COMMON, 1736 INHERITED, 1737 ARABIC, 1738 ARMENIAN, 1739 BENGALI, 1740 BOPOMOFO, 1741 CHEROKEE, 1742 COPTIC, 1743 CYRILLIC, 1744 DESERET, 1745 DEVANAGARI, 1746 ETHIOPIC, 1747 GEORGIAN, 1748 GOTHIC, 1749 GREEK, 1750 GUJARATI, 1751 GURMUKHI, 1752 HAN, 1753 HANGUL, 1754 HEBREW, 1755 HIRAGANA, 1756 KANNADA, 1757 KATAKANA, 1758 KHMER, 1759 LAO, 1760 LATIN, 1761 MALAYALAM, 1762 MONGOLIAN, 1763 MYANMAR, 1764 OGHAM, 1765 OLD_ITALIC, 1766 ORIYA, 1767 RUNIC, 1768 SINHALA, 1769 SYRIAC, 1770 TAMIL, 1771 TELUGU, 1772 THAANA, 1773 THAI, 1774 TIBETAN, 1775 CANADIAN_ABORIGINAL, 1776 YI, 1777 TAGALOG, 1778 HANUNOO, 1779 BUHID, 1780 TAGBANWA, 1781 BRAILLE, 1782 CYPRIOT, 1783 LIMBU, 1784 LINEAR_B, 1785 OSMANYA, 1786 SHAVIAN, 1787 TAI_LE, 1788 UGARITIC, 1789 null, // USCRIPT_KATAKANA_OR_HIRAGANA 1790 BUGINESE, 1791 GLAGOLITIC, 1792 KHAROSHTHI, 1793 SYLOTI_NAGRI, 1794 NEW_TAI_LUE, 1795 TIFINAGH, 1796 OLD_PERSIAN, 1797 BALINESE, 1798 BATAK, 1799 null, // USCRIPT_BLISSYMBOLS, 1800 BRAHMI, 1801 CHAM, 1802 null, // USCRIPT_CIRTH, 1803 null, // USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC, 1804 null, // USCRIPT_DEMOTIC_EGYPTIAN, 1805 null, // USCRIPT_HIERATIC_EGYPTIAN, 1806 EGYPTIAN_HIEROGLYPHS, 1807 null, // USCRIPT_USCRIPT_KHUTSURI, 1808 null, // USCRIPT_SIMPLIFIED_HAN, 1809 null, // USCRIPT_TRADITIONAL_HAN, 1810 null, // USCRIPT_PAHAWH_HMONG, 1811 null, // USCRIPT_OLD_HUNGARIAN, 1812 null, // USCRIPT_HARAPPAN_INDUS, 1813 JAVANESE, 1814 KAYAH_LI, 1815 null, // USCRIPT_LATIN_FRAKTUR, 1816 null, // USCRIPT_LATIN_GAELIC, 1817 LEPCHA, 1818 null, // USCRIPT_LINEAR_A, 1819 MANDAIC, // == MANDAEAN 1820 null, // USCRIPT_MAYAN_HIEROGLYPHS, 1821 null, // USCRIPT_MEROITIC_HIEROGLYPHS == USCRIPT_MEROITIC 1822 null, // USCRIPT_NKO, 1823 OLD_TURKIC, // USCRIPT_ORKHON == OLD_TURKIC, 1824 null, // USCRIPT_OLD_PERMIC, 1825 PHAGS_PA, 1826 PHOENICIAN, 1827 null, // USCRIPT_PHONETIC_POLLARD === MIAO, 1828 null, // USCRIPT_RONGORONGO, 1829 null, // USCRIPT_SARATI, 1830 null, // USCRIPT_ESTRANGELO_SYRIAC, 1831 null, // USCRIPT_WESTERN_SYRIAC, 1832 null, // USCRIPT_EASTERN_SYRIAC, 1833 null, // USCRIPT_TENGWAR, 1834 VAI, 1835 null, // USCRIPT_VISIBLE_SPEECH, 1836 CUNEIFORM, 1837 null, // USCRIPT_UNWRITTEN_LANGUAGES, 1838 UNKNOWN, 1839 CARIAN, 1840 null, // USCRIPT_JAPANESE, 1841 TAI_THAM, // USCRIPT_LANNA (aka TAI_THAM), 1842 LYCIAN, 1843 LYDIAN, 1844 OL_CHIKI, 1845 REJANG, 1846 SAURASHTRA, 1847 null, // USCRIPT_SIGN_WRITING, 1848 SUNDANESE, 1849 null, // USCRIPT_MOON, 1850 MEETEI_MAYEK, // USCRIPT_MEITEI_MAYEK (aka MEETEI, MEITHEI), 1851 IMPERIAL_ARAMAIC, 1852 AVESTAN, 1853 null, // USCRIPT_CHAKMA, 1854 null, // USCRIPT_KOREAN, 1855 KAITHI, 1856 null, // USCRIPT_MANICHAEAN, 1857 INSCRIPTIONAL_PAHLAVI, 1858 null, // USCRIPT_PSALTER_PAHLAVI, 1859 null, // USCRIPT_BOOK_PAHLAVI, 1860 INSCRIPTIONAL_PARTHIAN, 1861 SAMARITAN, 1862 TAI_VIET, 1863 null, // USCRIPT_MATHEMATICAL_NOTATION, 1864 null, // USCRIPT_SYMBOLS, 1865 BAMUM, 1866 LISU, 1867 null, // USCRIPT_NAKHI_GEBA, 1868 OLD_SOUTH_ARABIAN, 1869 null, // USCRIPT_BASSA_VAH, 1870 null, // USCRIPT_DUPLOYAN_SHORTAND, 1871 null, // USCRIPT_ELBASAN, 1872 null, // USCRIPT_GRANTHA, 1873 null, // USCRIPT_KPELLE, 1874 null, // USCRIPT_LOMA, 1875 null, // USCRIPT_MENDE, 1876 null, // USCRIPT_MEROITIC_CURSIVE, 1877 null, // USCRIPT_OLD_NORTH_ARABIAN, 1878 null, // USCRIPT_NABATAEAN, 1879 null, // USCRIPT_PALMYRENE, 1880 null, // USCRIPT_SINDHI, 1881 null, // USCRIPT_WARANG_CITI, 1882 null, // USCRIPT_AFAKA, 1883 null, // USCRIPT_JURCHEN, 1884 null, // USCRIPT_MRO, 1885 null, // USCRIPT_NUSHU, 1886 null, // USCRIPT_SHARADA, 1887 null, // USCRIPT_SORA_SOMPENG, 1888 null, // USCRIPT_TAKRI, 1889 null, // USCRIPT_TANGUT, 1890 null, // USCRIPT_WOLEAI, 1891 null, // USCRIPT_ANATOLIAN_HIEROGLYPHS, 1892 null, // USCRIPT_KHOJKI, 1893 null, // USCRIPT_TIRHUTA, 1894 }; 1895 1896 /** 1897 * Returns the {@link UnicodeScript} value identified by {@code scriptName}. 1898 * {@code scriptName} can be a ISO-15924 English script name 1899 * or an alias (ISO-15924 script code) for that name. 1900 * {@see http://www.unicode.org/iso15924/iso15924-codes.html} 1901 * Lookups are case insensitive. 1902 * 1903 * @throws NullPointerException if {@code scriptName} is null. 1904 * @throws IllegalAccessException if {@code scriptName} in invalid. 1905 * 1906 * @since 1.7 1907 */ 1908 public static UnicodeScript forName(String scriptName) { 1909 if (scriptName == null) { 1910 throw new NullPointerException("scriptName == null"); 1911 } 1912 1913 final int script = unicodeScriptForName(scriptName); 1914 if (script == -1 || script >= SCRIPTS.length || 1915 SCRIPTS[script] == null) { 1916 throw new IllegalArgumentException("Unknown script: " + scriptName); 1917 } 1918 1919 return SCRIPTS[script]; 1920 } 1921 1922 /** 1923 * Returns the {@link UnicodeScript} value that the given Unicode code 1924 * point is assigned to. 1925 * 1926 * @throws IllegalArgumentException if {@codePoint} is not a valid Unicode code point. 1927 */ 1928 public static UnicodeScript of(int codePoint) { 1929 checkValidCodePoint(codePoint); 1930 int script = unicodeScriptForCodePoint(codePoint); 1931 if (script == -1 || script >= SCRIPTS.length) { 1932 // This signifies an ICU error. Complain loudly instead of swallowing 1933 // the error up. 1934 throw new IllegalArgumentException("Invalid codePoint: " + codePoint); 1935 } 1936 1937 // This happens when ICU maps the code point to a script known to ICU but 1938 // not the Java API. 1939 if (SCRIPTS[script] == null) { 1940 return UNKNOWN; 1941 } 1942 1943 return SCRIPTS[script]; 1944 } 1945 } 1946 1947 private static native int unicodeScriptForName(String blockName); 1948 1949 private static native int unicodeScriptForCodePoint(int codePoint); 1950 1951 1952 /** 1953 * Constructs a new {@code Character} with the specified primitive char 1954 * value. 1955 * 1956 * @param value 1957 * the primitive char value to store in the new instance. 1958 */ 1959 public Character(char value) { 1960 this.value = value; 1961 } 1962 1963 /** 1964 * Gets the primitive value of this character. 1965 * 1966 * @return this object's primitive value. 1967 */ 1968 public char charValue() { 1969 return value; 1970 } 1971 1972 private static void checkValidCodePoint(int codePoint) { 1973 if (!isValidCodePoint(codePoint)) { 1974 throw new IllegalArgumentException("Invalid code point: " + codePoint); 1975 } 1976 } 1977 1978 /** 1979 * Compares this object to the specified character object to determine their 1980 * relative order. 1981 * 1982 * @param c 1983 * the character object to compare this object to. 1984 * @return {@code 0} if the value of this character and the value of 1985 * {@code c} are equal; a positive value if the value of this 1986 * character is greater than the value of {@code c}; a negative 1987 * value if the value of this character is less than the value of 1988 * {@code c}. 1989 * @see java.lang.Comparable 1990 * @since 1.2 1991 */ 1992 public int compareTo(Character c) { 1993 return compare(value, c.value); 1994 } 1995 1996 /** 1997 * Compares two {@code char} values. 1998 * @return 0 if lhs = rhs, less than 0 if lhs < rhs, and greater than 0 if lhs > rhs. 1999 * @since 1.7 2000 */ 2001 public static int compare(char lhs, char rhs) { 2002 return lhs - rhs; 2003 } 2004 2005 /** 2006 * Returns a {@code Character} instance for the {@code char} value passed. 2007 * <p> 2008 * If it is not necessary to get a new {@code Character} instance, it is 2009 * recommended to use this method instead of the constructor, since it 2010 * maintains a cache of instances which may result in better performance. 2011 * 2012 * @param c 2013 * the char value for which to get a {@code Character} instance. 2014 * @return the {@code Character} instance for {@code c}. 2015 * @since 1.5 2016 */ 2017 public static Character valueOf(char c) { 2018 return c < 128 ? SMALL_VALUES[c] : new Character(c); 2019 } 2020 2021 /** 2022 * A cache of instances used by {@link #valueOf(char)} and auto-boxing 2023 */ 2024 private static final Character[] SMALL_VALUES = new Character[128]; 2025 2026 static { 2027 for (int i = 0; i < 128; i++) { 2028 SMALL_VALUES[i] = new Character((char) i); 2029 } 2030 } 2031 /** 2032 * Indicates whether {@code codePoint} is a valid Unicode code point. 2033 * 2034 * @param codePoint 2035 * the code point to test. 2036 * @return {@code true} if {@code codePoint} is a valid Unicode code point; 2037 * {@code false} otherwise. 2038 * @since 1.5 2039 */ 2040 public static boolean isValidCodePoint(int codePoint) { 2041 return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 2042 } 2043 2044 /** 2045 * Indicates whether {@code codePoint} is within the supplementary code 2046 * point range. 2047 * 2048 * @param codePoint 2049 * the code point to test. 2050 * @return {@code true} if {@code codePoint} is within the supplementary 2051 * code point range; {@code false} otherwise. 2052 * @since 1.5 2053 */ 2054 public static boolean isSupplementaryCodePoint(int codePoint) { 2055 return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 2056 } 2057 2058 /** 2059 * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit 2060 * that is used for representing supplementary characters in UTF-16 2061 * encoding. 2062 * 2063 * @param ch 2064 * the character to test. 2065 * @return {@code true} if {@code ch} is a high-surrogate code unit; 2066 * {@code false} otherwise. 2067 * @see #isLowSurrogate(char) 2068 * @since 1.5 2069 */ 2070 public static boolean isHighSurrogate(char ch) { 2071 return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch); 2072 } 2073 2074 /** 2075 * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit 2076 * that is used for representing supplementary characters in UTF-16 2077 * encoding. 2078 * 2079 * @param ch 2080 * the character to test. 2081 * @return {@code true} if {@code ch} is a low-surrogate code unit; 2082 * {@code false} otherwise. 2083 * @see #isHighSurrogate(char) 2084 * @since 1.5 2085 */ 2086 public static boolean isLowSurrogate(char ch) { 2087 return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch); 2088 } 2089 2090 /** 2091 * Returns true if the given character is a high or low surrogate. 2092 * @since 1.7 2093 */ 2094 public static boolean isSurrogate(char ch) { 2095 return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE; 2096 } 2097 2098 /** 2099 * Indicates whether the specified character pair is a valid surrogate pair. 2100 * 2101 * @param high 2102 * the high surrogate unit to test. 2103 * @param low 2104 * the low surrogate unit to test. 2105 * @return {@code true} if {@code high} is a high-surrogate code unit and 2106 * {@code low} is a low-surrogate code unit; {@code false} 2107 * otherwise. 2108 * @see #isHighSurrogate(char) 2109 * @see #isLowSurrogate(char) 2110 * @since 1.5 2111 */ 2112 public static boolean isSurrogatePair(char high, char low) { 2113 return (isHighSurrogate(high) && isLowSurrogate(low)); 2114 } 2115 2116 /** 2117 * Calculates the number of {@code char} values required to represent the 2118 * specified Unicode code point. This method checks if the {@code codePoint} 2119 * is greater than or equal to {@code 0x10000}, in which case {@code 2} is 2120 * returned, otherwise {@code 1}. To test if the code point is valid, use 2121 * the {@link #isValidCodePoint(int)} method. 2122 * 2123 * @param codePoint 2124 * the code point for which to calculate the number of required 2125 * chars. 2126 * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise. 2127 * @see #isValidCodePoint(int) 2128 * @see #isSupplementaryCodePoint(int) 2129 * @since 1.5 2130 */ 2131 public static int charCount(int codePoint) { 2132 return (codePoint >= 0x10000 ? 2 : 1); 2133 } 2134 2135 /** 2136 * Converts a surrogate pair into a Unicode code point. This method assumes 2137 * that the pair are valid surrogates. If the pair are <i>not</i> valid 2138 * surrogates, then the result is indeterminate. The 2139 * {@link #isSurrogatePair(char, char)} method should be used prior to this 2140 * method to validate the pair. 2141 * 2142 * @param high 2143 * the high surrogate unit. 2144 * @param low 2145 * the low surrogate unit. 2146 * @return the Unicode code point corresponding to the surrogate unit pair. 2147 * @see #isSurrogatePair(char, char) 2148 * @since 1.5 2149 */ 2150 public static int toCodePoint(char high, char low) { 2151 // See RFC 2781, Section 2.2 2152 // http://www.ietf.org/rfc/rfc2781.txt 2153 int h = (high & 0x3FF) << 10; 2154 int l = low & 0x3FF; 2155 return (h | l) + 0x10000; 2156 } 2157 2158 /** 2159 * Returns the code point at {@code index} in the specified sequence of 2160 * character units. If the unit at {@code index} is a high-surrogate unit, 2161 * {@code index + 1} is less than the length of the sequence and the unit at 2162 * {@code index + 1} is a low-surrogate unit, then the supplementary code 2163 * point represented by the pair is returned; otherwise the {@code char} 2164 * value at {@code index} is returned. 2165 * 2166 * @param seq 2167 * the source sequence of {@code char} units. 2168 * @param index 2169 * the position in {@code seq} from which to retrieve the code 2170 * point. 2171 * @return the Unicode code point or {@code char} value at {@code index} in 2172 * {@code seq}. 2173 * @throws NullPointerException 2174 * if {@code seq} is {@code null}. 2175 * @throws IndexOutOfBoundsException 2176 * if the {@code index} is negative or greater than or equal to 2177 * the length of {@code seq}. 2178 * @since 1.5 2179 */ 2180 public static int codePointAt(CharSequence seq, int index) { 2181 if (seq == null) { 2182 throw new NullPointerException("seq == null"); 2183 } 2184 int len = seq.length(); 2185 if (index < 0 || index >= len) { 2186 throw new IndexOutOfBoundsException(); 2187 } 2188 2189 char high = seq.charAt(index++); 2190 if (index >= len) { 2191 return high; 2192 } 2193 char low = seq.charAt(index); 2194 if (isSurrogatePair(high, low)) { 2195 return toCodePoint(high, low); 2196 } 2197 return high; 2198 } 2199 2200 /** 2201 * Returns the code point at {@code index} in the specified array of 2202 * character units. If the unit at {@code index} is a high-surrogate unit, 2203 * {@code index + 1} is less than the length of the array and the unit at 2204 * {@code index + 1} is a low-surrogate unit, then the supplementary code 2205 * point represented by the pair is returned; otherwise the {@code char} 2206 * value at {@code index} is returned. 2207 * 2208 * @param seq 2209 * the source array of {@code char} units. 2210 * @param index 2211 * the position in {@code seq} from which to retrieve the code 2212 * point. 2213 * @return the Unicode code point or {@code char} value at {@code index} in 2214 * {@code seq}. 2215 * @throws NullPointerException 2216 * if {@code seq} is {@code null}. 2217 * @throws IndexOutOfBoundsException 2218 * if the {@code index} is negative or greater than or equal to 2219 * the length of {@code seq}. 2220 * @since 1.5 2221 */ 2222 public static int codePointAt(char[] seq, int index) { 2223 if (seq == null) { 2224 throw new NullPointerException("seq == null"); 2225 } 2226 int len = seq.length; 2227 if (index < 0 || index >= len) { 2228 throw new IndexOutOfBoundsException(); 2229 } 2230 2231 char high = seq[index++]; 2232 if (index >= len) { 2233 return high; 2234 } 2235 char low = seq[index]; 2236 if (isSurrogatePair(high, low)) { 2237 return toCodePoint(high, low); 2238 } 2239 return high; 2240 } 2241 2242 /** 2243 * Returns the code point at {@code index} in the specified array of 2244 * character units, where {@code index} has to be less than {@code limit}. 2245 * If the unit at {@code index} is a high-surrogate unit, {@code index + 1} 2246 * is less than {@code limit} and the unit at {@code index + 1} is a 2247 * low-surrogate unit, then the supplementary code point represented by the 2248 * pair is returned; otherwise the {@code char} value at {@code index} is 2249 * returned. 2250 * 2251 * @param seq 2252 * the source array of {@code char} units. 2253 * @param index 2254 * the position in {@code seq} from which to get the code point. 2255 * @param limit 2256 * the index after the last unit in {@code seq} that can be used. 2257 * @return the Unicode code point or {@code char} value at {@code index} in 2258 * {@code seq}. 2259 * @throws NullPointerException 2260 * if {@code seq} is {@code null}. 2261 * @throws IndexOutOfBoundsException 2262 * if {@code index < 0}, {@code index >= limit}, 2263 * {@code limit < 0} or if {@code limit} is greater than the 2264 * length of {@code seq}. 2265 * @since 1.5 2266 */ 2267 public static int codePointAt(char[] seq, int index, int limit) { 2268 if (index < 0 || index >= limit || limit < 0 || limit > seq.length) { 2269 throw new IndexOutOfBoundsException(); 2270 } 2271 2272 char high = seq[index++]; 2273 if (index >= limit) { 2274 return high; 2275 } 2276 char low = seq[index]; 2277 if (isSurrogatePair(high, low)) { 2278 return toCodePoint(high, low); 2279 } 2280 return high; 2281 } 2282 2283 /** 2284 * Returns the code point that precedes {@code index} in the specified 2285 * sequence of character units. If the unit at {@code index - 1} is a 2286 * low-surrogate unit, {@code index - 2} is not negative and the unit at 2287 * {@code index - 2} is a high-surrogate unit, then the supplementary code 2288 * point represented by the pair is returned; otherwise the {@code char} 2289 * value at {@code index - 1} is returned. 2290 * 2291 * @param seq 2292 * the source sequence of {@code char} units. 2293 * @param index 2294 * the position in {@code seq} following the code 2295 * point that should be returned. 2296 * @return the Unicode code point or {@code char} value before {@code index} 2297 * in {@code seq}. 2298 * @throws NullPointerException 2299 * if {@code seq} is {@code null}. 2300 * @throws IndexOutOfBoundsException 2301 * if the {@code index} is less than 1 or greater than the 2302 * length of {@code seq}. 2303 * @since 1.5 2304 */ 2305 public static int codePointBefore(CharSequence seq, int index) { 2306 if (seq == null) { 2307 throw new NullPointerException("seq == null"); 2308 } 2309 int len = seq.length(); 2310 if (index < 1 || index > len) { 2311 throw new IndexOutOfBoundsException(); 2312 } 2313 2314 char low = seq.charAt(--index); 2315 if (--index < 0) { 2316 return low; 2317 } 2318 char high = seq.charAt(index); 2319 if (isSurrogatePair(high, low)) { 2320 return toCodePoint(high, low); 2321 } 2322 return low; 2323 } 2324 2325 /** 2326 * Returns the code point that precedes {@code index} in the specified 2327 * array of character units. If the unit at {@code index - 1} is a 2328 * low-surrogate unit, {@code index - 2} is not negative and the unit at 2329 * {@code index - 2} is a high-surrogate unit, then the supplementary code 2330 * point represented by the pair is returned; otherwise the {@code char} 2331 * value at {@code index - 1} is returned. 2332 * 2333 * @param seq 2334 * the source array of {@code char} units. 2335 * @param index 2336 * the position in {@code seq} following the code 2337 * point that should be returned. 2338 * @return the Unicode code point or {@code char} value before {@code index} 2339 * in {@code seq}. 2340 * @throws NullPointerException 2341 * if {@code seq} is {@code null}. 2342 * @throws IndexOutOfBoundsException 2343 * if the {@code index} is less than 1 or greater than the 2344 * length of {@code seq}. 2345 * @since 1.5 2346 */ 2347 public static int codePointBefore(char[] seq, int index) { 2348 if (seq == null) { 2349 throw new NullPointerException("seq == null"); 2350 } 2351 int len = seq.length; 2352 if (index < 1 || index > len) { 2353 throw new IndexOutOfBoundsException(); 2354 } 2355 2356 char low = seq[--index]; 2357 if (--index < 0) { 2358 return low; 2359 } 2360 char high = seq[index]; 2361 if (isSurrogatePair(high, low)) { 2362 return toCodePoint(high, low); 2363 } 2364 return low; 2365 } 2366 2367 /** 2368 * Returns the code point that precedes the {@code index} in the specified 2369 * array of character units and is not less than {@code start}. If the unit 2370 * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not 2371 * less than {@code start} and the unit at {@code index - 2} is a 2372 * high-surrogate unit, then the supplementary code point represented by the 2373 * pair is returned; otherwise the {@code char} value at {@code index - 1} 2374 * is returned. 2375 * 2376 * @param seq 2377 * the source array of {@code char} units. 2378 * @param index 2379 * the position in {@code seq} following the code point that 2380 * should be returned. 2381 * @param start 2382 * the index of the first element in {@code seq}. 2383 * @return the Unicode code point or {@code char} value before {@code index} 2384 * in {@code seq}. 2385 * @throws NullPointerException 2386 * if {@code seq} is {@code null}. 2387 * @throws IndexOutOfBoundsException 2388 * if the {@code index <= start}, {@code start < 0}, 2389 * {@code index} is greater than the length of {@code seq}, or 2390 * if {@code start} is equal or greater than the length of 2391 * {@code seq}. 2392 * @since 1.5 2393 */ 2394 public static int codePointBefore(char[] seq, int index, int start) { 2395 if (seq == null) { 2396 throw new NullPointerException("seq == null"); 2397 } 2398 int len = seq.length; 2399 if (index <= start || index > len || start < 0 || start >= len) { 2400 throw new IndexOutOfBoundsException(); 2401 } 2402 2403 char low = seq[--index]; 2404 if (--index < start) { 2405 return low; 2406 } 2407 char high = seq[index]; 2408 if (isSurrogatePair(high, low)) { 2409 return toCodePoint(high, low); 2410 } 2411 return low; 2412 } 2413 2414 /** 2415 * Converts the specified Unicode code point into a UTF-16 encoded sequence 2416 * and copies the value(s) into the char array {@code dst}, starting at 2417 * index {@code dstIndex}. 2418 * 2419 * @param codePoint 2420 * the Unicode code point to encode. 2421 * @param dst 2422 * the destination array to copy the encoded value into. 2423 * @param dstIndex 2424 * the index in {@code dst} from where to start copying. 2425 * @return the number of {@code char} value units copied into {@code dst}. 2426 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2427 * @throws NullPointerException 2428 * if {@code dst} is {@code null}. 2429 * @throws IndexOutOfBoundsException 2430 * if {@code dstIndex} is negative, greater than or equal to 2431 * {@code dst.length} or equals {@code dst.length - 1} when 2432 * {@code codePoint} is a 2433 * {@link #isSupplementaryCodePoint(int) supplementary code point}. 2434 * @since 1.5 2435 */ 2436 public static int toChars(int codePoint, char[] dst, int dstIndex) { 2437 checkValidCodePoint(codePoint); 2438 if (dst == null) { 2439 throw new NullPointerException("dst == null"); 2440 } 2441 if (dstIndex < 0 || dstIndex >= dst.length) { 2442 throw new IndexOutOfBoundsException(); 2443 } 2444 2445 if (isSupplementaryCodePoint(codePoint)) { 2446 if (dstIndex == dst.length - 1) { 2447 throw new IndexOutOfBoundsException(); 2448 } 2449 // See RFC 2781, Section 2.1 2450 // http://www.ietf.org/rfc/rfc2781.txt 2451 int cpPrime = codePoint - 0x10000; 2452 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 2453 int low = 0xDC00 | (cpPrime & 0x3FF); 2454 dst[dstIndex] = (char) high; 2455 dst[dstIndex + 1] = (char) low; 2456 return 2; 2457 } 2458 2459 dst[dstIndex] = (char) codePoint; 2460 return 1; 2461 } 2462 2463 /** 2464 * Converts the specified Unicode code point into a UTF-16 encoded sequence 2465 * and returns it as a char array. 2466 * 2467 * @param codePoint 2468 * the Unicode code point to encode. 2469 * @return the UTF-16 encoded char sequence. If {@code codePoint} is a 2470 * {@link #isSupplementaryCodePoint(int) supplementary code point}, 2471 * then the returned array contains two characters, otherwise it 2472 * contains just one character. 2473 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2474 * @since 1.5 2475 */ 2476 public static char[] toChars(int codePoint) { 2477 checkValidCodePoint(codePoint); 2478 if (isSupplementaryCodePoint(codePoint)) { 2479 int cpPrime = codePoint - 0x10000; 2480 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 2481 int low = 0xDC00 | (cpPrime & 0x3FF); 2482 return new char[] { (char) high, (char) low }; 2483 } 2484 return new char[] { (char) codePoint }; 2485 } 2486 2487 /** 2488 * Counts the number of Unicode code points in the subsequence of the 2489 * specified character sequence, as delineated by {@code beginIndex} and 2490 * {@code endIndex}. Any surrogate values with missing pair values will be 2491 * counted as one code point. 2492 * 2493 * @param seq 2494 * the {@code CharSequence} to look through. 2495 * @param beginIndex 2496 * the inclusive index to begin counting at. 2497 * @param endIndex 2498 * the exclusive index to stop counting at. 2499 * @return the number of Unicode code points. 2500 * @throws NullPointerException 2501 * if {@code seq} is {@code null}. 2502 * @throws IndexOutOfBoundsException 2503 * if {@code beginIndex < 0}, {@code beginIndex > endIndex} or 2504 * if {@code endIndex} is greater than the length of {@code seq}. 2505 * @since 1.5 2506 */ 2507 public static int codePointCount(CharSequence seq, int beginIndex, 2508 int endIndex) { 2509 if (seq == null) { 2510 throw new NullPointerException("seq == null"); 2511 } 2512 int len = seq.length(); 2513 if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) { 2514 throw new IndexOutOfBoundsException(); 2515 } 2516 2517 int result = 0; 2518 for (int i = beginIndex; i < endIndex; i++) { 2519 char c = seq.charAt(i); 2520 if (isHighSurrogate(c)) { 2521 if (++i < endIndex) { 2522 c = seq.charAt(i); 2523 if (!isLowSurrogate(c)) { 2524 result++; 2525 } 2526 } 2527 } 2528 result++; 2529 } 2530 return result; 2531 } 2532 2533 /** 2534 * Counts the number of Unicode code points in the subsequence of the 2535 * specified char array, as delineated by {@code offset} and {@code count}. 2536 * Any surrogate values with missing pair values will be counted as one code 2537 * point. 2538 * 2539 * @param seq 2540 * the char array to look through 2541 * @param offset 2542 * the inclusive index to begin counting at. 2543 * @param count 2544 * the number of {@code char} values to look through in 2545 * {@code seq}. 2546 * @return the number of Unicode code points. 2547 * @throws NullPointerException 2548 * if {@code seq} is {@code null}. 2549 * @throws IndexOutOfBoundsException 2550 * if {@code offset < 0}, {@code count < 0} or if 2551 * {@code offset + count} is greater than the length of 2552 * {@code seq}. 2553 * @since 1.5 2554 */ 2555 public static int codePointCount(char[] seq, int offset, int count) { 2556 Arrays.checkOffsetAndCount(seq.length, offset, count); 2557 int endIndex = offset + count; 2558 int result = 0; 2559 for (int i = offset; i < endIndex; i++) { 2560 char c = seq[i]; 2561 if (isHighSurrogate(c)) { 2562 if (++i < endIndex) { 2563 c = seq[i]; 2564 if (!isLowSurrogate(c)) { 2565 result++; 2566 } 2567 } 2568 } 2569 result++; 2570 } 2571 return result; 2572 } 2573 2574 /** 2575 * Determines the index in the specified character sequence that is offset 2576 * {@code codePointOffset} code points from {@code index}. 2577 * 2578 * @param seq 2579 * the character sequence to find the index in. 2580 * @param index 2581 * the start index in {@code seq}. 2582 * @param codePointOffset 2583 * the number of code points to look backwards or forwards; may 2584 * be a negative or positive value. 2585 * @return the index in {@code seq} that is {@code codePointOffset} code 2586 * points away from {@code index}. 2587 * @throws NullPointerException 2588 * if {@code seq} is {@code null}. 2589 * @throws IndexOutOfBoundsException 2590 * if {@code index < 0}, {@code index} is greater than the 2591 * length of {@code seq}, or if there are not enough values in 2592 * {@code seq} to skip {@code codePointOffset} code points 2593 * forwards or backwards (if {@code codePointOffset} is 2594 * negative) from {@code index}. 2595 * @since 1.5 2596 */ 2597 public static int offsetByCodePoints(CharSequence seq, int index, int codePointOffset) { 2598 if (seq == null) { 2599 throw new NullPointerException("seq == null"); 2600 } 2601 int len = seq.length(); 2602 if (index < 0 || index > len) { 2603 throw new IndexOutOfBoundsException(); 2604 } 2605 2606 if (codePointOffset == 0) { 2607 return index; 2608 } 2609 2610 if (codePointOffset > 0) { 2611 int codePoints = codePointOffset; 2612 int i = index; 2613 while (codePoints > 0) { 2614 codePoints--; 2615 if (i >= len) { 2616 throw new IndexOutOfBoundsException(); 2617 } 2618 if (isHighSurrogate(seq.charAt(i))) { 2619 int next = i + 1; 2620 if (next < len && isLowSurrogate(seq.charAt(next))) { 2621 i++; 2622 } 2623 } 2624 i++; 2625 } 2626 return i; 2627 } 2628 2629 int codePoints = -codePointOffset; 2630 int i = index; 2631 while (codePoints > 0) { 2632 codePoints--; 2633 i--; 2634 if (i < 0) { 2635 throw new IndexOutOfBoundsException(); 2636 } 2637 if (isLowSurrogate(seq.charAt(i))) { 2638 int prev = i - 1; 2639 if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) { 2640 i--; 2641 } 2642 } 2643 } 2644 return i; 2645 } 2646 2647 /** 2648 * Determines the index in a subsequence of the specified character array 2649 * that is offset {@code codePointOffset} code points from {@code index}. 2650 * The subsequence is delineated by {@code start} and {@code count}. 2651 * 2652 * @param seq 2653 * the character array to find the index in. 2654 * @param start 2655 * the inclusive index that marks the beginning of the 2656 * subsequence. 2657 * @param count 2658 * the number of {@code char} values to include within the 2659 * subsequence. 2660 * @param index 2661 * the start index in the subsequence of the char array. 2662 * @param codePointOffset 2663 * the number of code points to look backwards or forwards; may 2664 * be a negative or positive value. 2665 * @return the index in {@code seq} that is {@code codePointOffset} code 2666 * points away from {@code index}. 2667 * @throws NullPointerException 2668 * if {@code seq} is {@code null}. 2669 * @throws IndexOutOfBoundsException 2670 * if {@code start < 0}, {@code count < 0}, 2671 * {@code index < start}, {@code index > start + count}, 2672 * {@code start + count} is greater than the length of 2673 * {@code seq}, or if there are not enough values in 2674 * {@code seq} to skip {@code codePointOffset} code points 2675 * forward or backward (if {@code codePointOffset} is 2676 * negative) from {@code index}. 2677 * @since 1.5 2678 */ 2679 public static int offsetByCodePoints(char[] seq, int start, int count, 2680 int index, int codePointOffset) { 2681 Arrays.checkOffsetAndCount(seq.length, start, count); 2682 int end = start + count; 2683 if (index < start || index > end) { 2684 throw new IndexOutOfBoundsException(); 2685 } 2686 2687 if (codePointOffset == 0) { 2688 return index; 2689 } 2690 2691 if (codePointOffset > 0) { 2692 int codePoints = codePointOffset; 2693 int i = index; 2694 while (codePoints > 0) { 2695 codePoints--; 2696 if (i >= end) { 2697 throw new IndexOutOfBoundsException(); 2698 } 2699 if (isHighSurrogate(seq[i])) { 2700 int next = i + 1; 2701 if (next < end && isLowSurrogate(seq[next])) { 2702 i++; 2703 } 2704 } 2705 i++; 2706 } 2707 return i; 2708 } 2709 2710 int codePoints = -codePointOffset; 2711 int i = index; 2712 while (codePoints > 0) { 2713 codePoints--; 2714 i--; 2715 if (i < start) { 2716 throw new IndexOutOfBoundsException(); 2717 } 2718 if (isLowSurrogate(seq[i])) { 2719 int prev = i - 1; 2720 if (prev >= start && isHighSurrogate(seq[prev])) { 2721 i--; 2722 } 2723 } 2724 } 2725 return i; 2726 } 2727 2728 /** 2729 * Convenience method to determine the value of the specified character 2730 * {@code c} in the supplied radix. The value of {@code radix} must be 2731 * between MIN_RADIX and MAX_RADIX. 2732 * 2733 * @param c 2734 * the character to determine the value of. 2735 * @param radix 2736 * the radix. 2737 * @return the value of {@code c} in {@code radix} if {@code radix} lies 2738 * between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise. 2739 */ 2740 public static int digit(char c, int radix) { 2741 return digit((int) c, radix); 2742 } 2743 2744 /** 2745 * Convenience method to determine the value of the character 2746 * {@code codePoint} in the supplied radix. The value of {@code radix} must 2747 * be between MIN_RADIX and MAX_RADIX. 2748 * 2749 * @param codePoint 2750 * the character, including supplementary characters. 2751 * @param radix 2752 * the radix. 2753 * @return if {@code radix} lies between {@link #MIN_RADIX} and 2754 * {@link #MAX_RADIX} then the value of the character in the radix; 2755 * -1 otherwise. 2756 */ 2757 public static int digit(int codePoint, int radix) { 2758 if (radix < MIN_RADIX || radix > MAX_RADIX) { 2759 return -1; 2760 } 2761 if (codePoint < 128) { 2762 // Optimized for ASCII 2763 int result = -1; 2764 if ('0' <= codePoint && codePoint <= '9') { 2765 result = codePoint - '0'; 2766 } else if ('a' <= codePoint && codePoint <= 'z') { 2767 result = 10 + (codePoint - 'a'); 2768 } else if ('A' <= codePoint && codePoint <= 'Z') { 2769 result = 10 + (codePoint - 'A'); 2770 } 2771 return result < radix ? result : -1; 2772 } 2773 return digitImpl(codePoint, radix); 2774 } 2775 2776 private static native int digitImpl(int codePoint, int radix); 2777 2778 /** 2779 * Compares this object with the specified object and indicates if they are 2780 * equal. In order to be equal, {@code object} must be an instance of 2781 * {@code Character} and have the same char value as this object. 2782 * 2783 * @param object 2784 * the object to compare this double with. 2785 * @return {@code true} if the specified object is equal to this 2786 * {@code Character}; {@code false} otherwise. 2787 */ 2788 @Override 2789 public boolean equals(Object object) { 2790 return (object instanceof Character) && (((Character) object).value == value); 2791 } 2792 2793 /** 2794 * Returns the character which represents the specified digit in the 2795 * specified radix. The {@code radix} must be between {@code MIN_RADIX} and 2796 * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and 2797 * smaller than {@code radix}. If any of these conditions does not hold, 0 2798 * is returned. 2799 * 2800 * @param digit 2801 * the integer value. 2802 * @param radix 2803 * the radix. 2804 * @return the character which represents the {@code digit} in the 2805 * {@code radix}. 2806 */ 2807 public static char forDigit(int digit, int radix) { 2808 if (MIN_RADIX <= radix && radix <= MAX_RADIX) { 2809 if (digit >= 0 && digit < radix) { 2810 return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10); 2811 } 2812 } 2813 return 0; 2814 } 2815 2816 /** 2817 * Returns a human-readable name for the given code point, 2818 * or null if the code point is unassigned. 2819 * 2820 * <p>As a fallback mechanism this method returns strings consisting of the Unicode 2821 * block name (with underscores replaced by spaces), a single space, and the uppercase 2822 * hex value of the code point, using as few digits as necessary. 2823 * 2824 * <p>Examples: 2825 * <ul> 2826 * <li>{@code Character.getName(0)} returns "NULL". 2827 * <li>{@code Character.getName('e')} returns "LATIN SMALL LETTER E". 2828 * <li>{@code Character.getName('\u0666')} returns "ARABIC-INDIC DIGIT SIX". 2829 * <li>{@code Character.getName(0xe000)} returns "PRIVATE USE AREA E000". 2830 * </ul> 2831 * 2832 * <p>Note that the exact strings returned will vary from release to release. 2833 * 2834 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2835 * @since 1.7 2836 */ 2837 public static String getName(int codePoint) { 2838 checkValidCodePoint(codePoint); 2839 if (getType(codePoint) == Character.UNASSIGNED) { 2840 return null; 2841 } 2842 String result = getNameImpl(codePoint); 2843 if (result == null) { 2844 String blockName = Character.UnicodeBlock.of(codePoint).toString().replace('_', ' '); 2845 result = blockName + " " + IntegralToString.intToHexString(codePoint, true, 0); 2846 } 2847 return result; 2848 } 2849 2850 private static native String getNameImpl(int codePoint); 2851 2852 /** 2853 * Returns the numeric value of the specified Unicode character. 2854 * See {@link #getNumericValue(int)}. 2855 * 2856 * @param c the character 2857 * @return a non-negative numeric integer value if a numeric value for 2858 * {@code c} exists, -1 if there is no numeric value for {@code c}, 2859 * -2 if the numeric value can not be represented as an integer. 2860 */ 2861 public static int getNumericValue(char c) { 2862 return getNumericValue((int) c); 2863 } 2864 2865 /** 2866 * Gets the numeric value of the specified Unicode code point. For example, 2867 * the code point '\u216B' stands for the Roman number XII, which has the 2868 * numeric value 12. 2869 * 2870 * <p>There are two points of divergence between this method and the Unicode 2871 * specification. This method treats the letters a-z (in both upper and lower 2872 * cases, and their full-width variants) as numbers from 10 to 35. The 2873 * Unicode specification also supports the idea of code points with non-integer 2874 * numeric values; this method does not (except to the extent of returning -2 2875 * for such code points). 2876 * 2877 * @param codePoint the code point 2878 * @return a non-negative numeric integer value if a numeric value for 2879 * {@code codePoint} exists, -1 if there is no numeric value for 2880 * {@code codePoint}, -2 if the numeric value can not be 2881 * represented with an integer. 2882 */ 2883 public static int getNumericValue(int codePoint) { 2884 // This is both an optimization and papers over differences between Java and ICU. 2885 if (codePoint < 128) { 2886 if (codePoint >= '0' && codePoint <= '9') { 2887 return codePoint - '0'; 2888 } 2889 if (codePoint >= 'a' && codePoint <= 'z') { 2890 return codePoint - ('a' - 10); 2891 } 2892 if (codePoint >= 'A' && codePoint <= 'Z') { 2893 return codePoint - ('A' - 10); 2894 } 2895 return -1; 2896 } 2897 // Full-width uppercase A-Z. 2898 if (codePoint >= 0xff21 && codePoint <= 0xff3a) { 2899 return codePoint - 0xff17; 2900 } 2901 // Full-width lowercase a-z. 2902 if (codePoint >= 0xff41 && codePoint <= 0xff5a) { 2903 return codePoint - 0xff37; 2904 } 2905 return getNumericValueImpl(codePoint); 2906 } 2907 2908 private static native int getNumericValueImpl(int codePoint); 2909 2910 /** 2911 * Gets the general Unicode category of the specified character. 2912 * 2913 * @param c 2914 * the character to get the category of. 2915 * @return the Unicode category of {@code c}. 2916 */ 2917 public static int getType(char c) { 2918 return getType((int) c); 2919 } 2920 2921 /** 2922 * Gets the general Unicode category of the specified code point. 2923 * 2924 * @param codePoint 2925 * the Unicode code point to get the category of. 2926 * @return the Unicode category of {@code codePoint}. 2927 */ 2928 public static int getType(int codePoint) { 2929 int type = getTypeImpl(codePoint); 2930 // The type values returned by ICU are not RI-compatible. The RI skips the value 17. 2931 if (type <= Character.FORMAT) { 2932 return type; 2933 } 2934 return (type + 1); 2935 } 2936 2937 private static native int getTypeImpl(int codePoint); 2938 2939 /** 2940 * Gets the Unicode directionality of the specified character. 2941 * 2942 * @param c 2943 * the character to get the directionality of. 2944 * @return the Unicode directionality of {@code c}. 2945 */ 2946 public static byte getDirectionality(char c) { 2947 return getDirectionality((int)c); 2948 } 2949 2950 /** 2951 * Gets the Unicode directionality of the specified character. 2952 * 2953 * @param codePoint 2954 * the Unicode code point to get the directionality of. 2955 * @return the Unicode directionality of {@code codePoint}. 2956 */ 2957 public static byte getDirectionality(int codePoint) { 2958 if (getType(codePoint) == Character.UNASSIGNED) { 2959 return Character.DIRECTIONALITY_UNDEFINED; 2960 } 2961 2962 byte directionality = getDirectionalityImpl(codePoint); 2963 if (directionality == -1) { 2964 return -1; 2965 } 2966 return DIRECTIONALITY[directionality]; 2967 } 2968 2969 private static native byte getDirectionalityImpl(int codePoint); 2970 2971 /** 2972 * Indicates whether the specified character is mirrored. 2973 * 2974 * @param c 2975 * the character to check. 2976 * @return {@code true} if {@code c} is mirrored; {@code false} 2977 * otherwise. 2978 */ 2979 public static boolean isMirrored(char c) { 2980 return isMirrored((int) c); 2981 } 2982 2983 /** 2984 * Indicates whether the specified code point is mirrored. 2985 * 2986 * @param codePoint 2987 * the code point to check. 2988 * @return {@code true} if {@code codePoint} is mirrored, {@code false} 2989 * otherwise. 2990 */ 2991 public static boolean isMirrored(int codePoint) { 2992 return isMirroredImpl(codePoint); 2993 } 2994 2995 private static native boolean isMirroredImpl(int codePoint); 2996 2997 @Override 2998 public int hashCode() { 2999 return value; 3000 } 3001 3002 /** 3003 * Returns the high surrogate for the given code point. The result is meaningless if 3004 * the given code point is not a supplementary character. 3005 * @since 1.7 3006 */ 3007 public static char highSurrogate(int codePoint) { 3008 return (char) ((codePoint >> 10) + 0xd7c0); 3009 } 3010 3011 /** 3012 * Returns the low surrogate for the given code point. The result is meaningless if 3013 * the given code point is not a supplementary character. 3014 * @since 1.7 3015 */ 3016 public static char lowSurrogate(int codePoint) { 3017 return (char) ((codePoint & 0x3ff) | 0xdc00); 3018 } 3019 3020 /** 3021 * Returns true if the given code point is alphabetic. That is, 3022 * if it is in any of the Lu, Ll, Lt, Lm, Lo, Nl, or Other_Alphabetic categories. 3023 * @since 1.7 3024 */ 3025 public static native boolean isAlphabetic(int codePoint); 3026 3027 /** 3028 * Returns true if the given code point is in the Basic Multilingual Plane (BMP). 3029 * Such code points can be represented by a single {@code char}. 3030 * @since 1.7 3031 */ 3032 public static boolean isBmpCodePoint(int codePoint) { 3033 return codePoint >= Character.MIN_VALUE && codePoint <= Character.MAX_VALUE; 3034 } 3035 3036 /** 3037 * Indicates whether the specified character is defined in the Unicode 3038 * specification. 3039 * 3040 * @param c 3041 * the character to check. 3042 * @return {@code true} if the general Unicode category of the character is 3043 * not {@code UNASSIGNED}; {@code false} otherwise. 3044 */ 3045 public static boolean isDefined(char c) { 3046 return isDefinedImpl(c); 3047 } 3048 3049 /** 3050 * Indicates whether the specified code point is defined in the Unicode 3051 * specification. 3052 * 3053 * @param codePoint 3054 * the code point to check. 3055 * @return {@code true} if the general Unicode category of the code point is 3056 * not {@code UNASSIGNED}; {@code false} otherwise. 3057 */ 3058 public static boolean isDefined(int codePoint) { 3059 return isDefinedImpl(codePoint); 3060 } 3061 3062 private static native boolean isDefinedImpl(int codePoint); 3063 3064 /** 3065 * Indicates whether the specified character is a digit. 3066 * 3067 * @param c 3068 * the character to check. 3069 * @return {@code true} if {@code c} is a digit; {@code false} 3070 * otherwise. 3071 */ 3072 public static boolean isDigit(char c) { 3073 return isDigit((int) c); 3074 } 3075 3076 /** 3077 * Indicates whether the specified code point is a digit. 3078 * 3079 * @param codePoint 3080 * the code point to check. 3081 * @return {@code true} if {@code codePoint} is a digit; {@code false} 3082 * otherwise. 3083 */ 3084 public static boolean isDigit(int codePoint) { 3085 // Optimized case for ASCII 3086 if ('0' <= codePoint && codePoint <= '9') { 3087 return true; 3088 } 3089 if (codePoint < 1632) { 3090 return false; 3091 } 3092 return isDigitImpl(codePoint); 3093 } 3094 3095 private static native boolean isDigitImpl(int codePoint); 3096 3097 /** 3098 * Indicates whether the specified character is ignorable in a Java or 3099 * Unicode identifier. 3100 * 3101 * @param c 3102 * the character to check. 3103 * @return {@code true} if {@code c} is ignorable; {@code false} otherwise. 3104 */ 3105 public static boolean isIdentifierIgnorable(char c) { 3106 return isIdentifierIgnorable((int) c); 3107 } 3108 3109 /** 3110 * Returns true if the given code point is a CJKV ideographic character. 3111 * @since 1.7 3112 */ 3113 public static native boolean isIdeographic(int codePoint); 3114 3115 /** 3116 * Indicates whether the specified code point is ignorable in a Java or 3117 * Unicode identifier. 3118 * 3119 * @param codePoint 3120 * the code point to check. 3121 * @return {@code true} if {@code codePoint} is ignorable; {@code false} 3122 * otherwise. 3123 */ 3124 public static boolean isIdentifierIgnorable(int codePoint) { 3125 // This is both an optimization and papers over differences between Java and ICU. 3126 if (codePoint < 0x600) { 3127 return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) || 3128 (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad); 3129 } 3130 return isIdentifierIgnorableImpl(codePoint); 3131 } 3132 3133 private static native boolean isIdentifierIgnorableImpl(int codePoint); 3134 3135 /** 3136 * Indicates whether the specified character is an ISO control character. 3137 * 3138 * @param c 3139 * the character to check. 3140 * @return {@code true} if {@code c} is an ISO control character; 3141 * {@code false} otherwise. 3142 */ 3143 public static boolean isISOControl(char c) { 3144 return isISOControl((int) c); 3145 } 3146 3147 /** 3148 * Indicates whether the specified code point is an ISO control character. 3149 * 3150 * @param c 3151 * the code point to check. 3152 * @return {@code true} if {@code c} is an ISO control character; 3153 * {@code false} otherwise. 3154 */ 3155 public static boolean isISOControl(int c) { 3156 return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f); 3157 } 3158 3159 /** 3160 * Indicates whether the specified character is a valid part of a Java 3161 * identifier other than the first character. 3162 * 3163 * @param c 3164 * the character to check. 3165 * @return {@code true} if {@code c} is valid as part of a Java identifier; 3166 * {@code false} otherwise. 3167 */ 3168 public static boolean isJavaIdentifierPart(char c) { 3169 return isJavaIdentifierPart((int) c); 3170 } 3171 3172 /** 3173 * Indicates whether the specified code point is a valid part of a Java 3174 * identifier other than the first character. 3175 * 3176 * @param codePoint 3177 * the code point to check. 3178 * @return {@code true} if {@code c} is valid as part of a Java identifier; 3179 * {@code false} otherwise. 3180 */ 3181 public static boolean isJavaIdentifierPart(int codePoint) { 3182 // Use precomputed bitmasks to optimize the ASCII range. 3183 if (codePoint < 64) { 3184 return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0; 3185 } else if (codePoint < 128) { 3186 return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 3187 } 3188 int type = getType(codePoint); 3189 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 3190 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 3191 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 3192 || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK 3193 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) 3194 || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT; 3195 } 3196 3197 /** 3198 * Indicates whether the specified character is a valid first character for 3199 * a Java identifier. 3200 * 3201 * @param c 3202 * the character to check. 3203 * @return {@code true} if {@code c} is a valid first character of a Java 3204 * identifier; {@code false} otherwise. 3205 */ 3206 public static boolean isJavaIdentifierStart(char c) { 3207 return isJavaIdentifierStart((int) c); 3208 } 3209 3210 /** 3211 * Indicates whether the specified code point is a valid first character for 3212 * a Java identifier. 3213 * 3214 * @param codePoint 3215 * the code point to check. 3216 * @return {@code true} if {@code codePoint} is a valid start of a Java 3217 * identifier; {@code false} otherwise. 3218 */ 3219 public static boolean isJavaIdentifierStart(int codePoint) { 3220 // Use precomputed bitmasks to optimize the ASCII range. 3221 if (codePoint < 64) { 3222 return (codePoint == '$'); // There's only one character in this range. 3223 } else if (codePoint < 128) { 3224 return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 3225 } 3226 int type = getType(codePoint); 3227 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL 3228 || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER; 3229 } 3230 3231 /** 3232 * Indicates whether the specified character is a Java letter. 3233 * 3234 * @param c 3235 * the character to check. 3236 * @return {@code true} if {@code c} is a Java letter; {@code false} 3237 * otherwise. 3238 * @deprecated Use {@link #isJavaIdentifierStart(char)} instead. 3239 */ 3240 @Deprecated 3241 public static boolean isJavaLetter(char c) { 3242 return isJavaIdentifierStart(c); 3243 } 3244 3245 /** 3246 * Indicates whether the specified character is a Java letter or digit 3247 * character. 3248 * 3249 * @param c 3250 * the character to check. 3251 * @return {@code true} if {@code c} is a Java letter or digit; 3252 * {@code false} otherwise. 3253 * @deprecated Use {@link #isJavaIdentifierPart(char)} instead. 3254 */ 3255 @Deprecated 3256 public static boolean isJavaLetterOrDigit(char c) { 3257 return isJavaIdentifierPart(c); 3258 } 3259 3260 /** 3261 * Indicates whether the specified character is a letter. 3262 * 3263 * @param c 3264 * the character to check. 3265 * @return {@code true} if {@code c} is a letter; {@code false} otherwise. 3266 */ 3267 public static boolean isLetter(char c) { 3268 return isLetter((int) c); 3269 } 3270 3271 /** 3272 * Indicates whether the specified code point is a letter. 3273 * 3274 * @param codePoint 3275 * the code point to check. 3276 * @return {@code true} if {@code codePoint} is a letter; {@code false} 3277 * otherwise. 3278 */ 3279 public static boolean isLetter(int codePoint) { 3280 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 3281 return true; 3282 } 3283 if (codePoint < 128) { 3284 return false; 3285 } 3286 return isLetterImpl(codePoint); 3287 } 3288 3289 private static native boolean isLetterImpl(int codePoint); 3290 3291 /** 3292 * Indicates whether the specified character is a letter or a digit. 3293 * 3294 * @param c 3295 * the character to check. 3296 * @return {@code true} if {@code c} is a letter or a digit; {@code false} 3297 * otherwise. 3298 */ 3299 public static boolean isLetterOrDigit(char c) { 3300 return isLetterOrDigit((int) c); 3301 } 3302 3303 /** 3304 * Indicates whether the specified code point is a letter or a digit. 3305 * 3306 * @param codePoint 3307 * the code point to check. 3308 * @return {@code true} if {@code codePoint} is a letter or a digit; 3309 * {@code false} otherwise. 3310 */ 3311 public static boolean isLetterOrDigit(int codePoint) { 3312 // Optimized case for ASCII 3313 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 3314 return true; 3315 } 3316 if ('0' <= codePoint && codePoint <= '9') { 3317 return true; 3318 } 3319 if (codePoint < 128) { 3320 return false; 3321 } 3322 return isLetterOrDigitImpl(codePoint); 3323 } 3324 3325 private static native boolean isLetterOrDigitImpl(int codePoint); 3326 3327 /** 3328 * Indicates whether the specified character is a lower case letter. 3329 * 3330 * @param c 3331 * the character to check. 3332 * @return {@code true} if {@code c} is a lower case letter; {@code false} 3333 * otherwise. 3334 */ 3335 public static boolean isLowerCase(char c) { 3336 return isLowerCase((int) c); 3337 } 3338 3339 /** 3340 * Indicates whether the specified code point is a lower case letter. 3341 * 3342 * @param codePoint 3343 * the code point to check. 3344 * @return {@code true} if {@code codePoint} is a lower case letter; 3345 * {@code false} otherwise. 3346 */ 3347 public static boolean isLowerCase(int codePoint) { 3348 // Optimized case for ASCII 3349 if ('a' <= codePoint && codePoint <= 'z') { 3350 return true; 3351 } 3352 if (codePoint < 128) { 3353 return false; 3354 } 3355 return isLowerCaseImpl(codePoint); 3356 } 3357 3358 private static native boolean isLowerCaseImpl(int codePoint); 3359 3360 /** 3361 * Use {@link #isWhitespace(char)} instead. 3362 * @deprecated Use {@link #isWhitespace(char)} instead. 3363 */ 3364 @Deprecated 3365 public static boolean isSpace(char c) { 3366 return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' '; 3367 } 3368 3369 /** 3370 * See {@link #isSpaceChar(int)}. 3371 */ 3372 public static boolean isSpaceChar(char c) { 3373 return isSpaceChar((int) c); 3374 } 3375 3376 /** 3377 * Returns true if the given code point is a Unicode space character. 3378 * The exact set of characters considered as whitespace varies with Unicode version. 3379 * Note that non-breaking spaces are considered whitespace. 3380 * Note also that line separators are not considered whitespace; see {@link #isWhitespace} 3381 * for an alternative. 3382 */ 3383 public static boolean isSpaceChar(int codePoint) { 3384 // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that. 3385 // SPACE or NO-BREAK SPACE? 3386 if (codePoint == 0x20 || codePoint == 0xa0) { 3387 return true; 3388 } 3389 if (codePoint < 0x1000) { 3390 return false; 3391 } 3392 // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR? 3393 if (codePoint == 0x1680 || codePoint == 0x180e) { 3394 return true; 3395 } 3396 if (codePoint < 0x2000) { 3397 return false; 3398 } 3399 if (codePoint <= 0xffff) { 3400 // Other whitespace from General Punctuation... 3401 return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f || 3402 codePoint == 0x3000; // ...or CJK Symbols and Punctuation? 3403 } 3404 // Let icu4c worry about non-BMP code points. 3405 return isSpaceCharImpl(codePoint); 3406 } 3407 3408 private static native boolean isSpaceCharImpl(int codePoint); 3409 3410 /** 3411 * Indicates whether the specified character is a titlecase character. 3412 * 3413 * @param c 3414 * the character to check. 3415 * @return {@code true} if {@code c} is a titlecase character, {@code false} 3416 * otherwise. 3417 */ 3418 public static boolean isTitleCase(char c) { 3419 return isTitleCaseImpl(c); 3420 } 3421 3422 /** 3423 * Indicates whether the specified code point is a titlecase character. 3424 * 3425 * @param codePoint 3426 * the code point to check. 3427 * @return {@code true} if {@code codePoint} is a titlecase character, 3428 * {@code false} otherwise. 3429 */ 3430 public static boolean isTitleCase(int codePoint) { 3431 return isTitleCaseImpl(codePoint); 3432 } 3433 3434 private static native boolean isTitleCaseImpl(int codePoint); 3435 3436 /** 3437 * Indicates whether the specified character is valid as part of a Unicode 3438 * identifier other than the first character. 3439 * 3440 * @param c 3441 * the character to check. 3442 * @return {@code true} if {@code c} is valid as part of a Unicode 3443 * identifier; {@code false} otherwise. 3444 */ 3445 public static boolean isUnicodeIdentifierPart(char c) { 3446 return isUnicodeIdentifierPartImpl(c); 3447 } 3448 3449 /** 3450 * Indicates whether the specified code point is valid as part of a Unicode 3451 * identifier other than the first character. 3452 * 3453 * @param codePoint 3454 * the code point to check. 3455 * @return {@code true} if {@code codePoint} is valid as part of a Unicode 3456 * identifier; {@code false} otherwise. 3457 */ 3458 public static boolean isUnicodeIdentifierPart(int codePoint) { 3459 return isUnicodeIdentifierPartImpl(codePoint); 3460 } 3461 3462 private static native boolean isUnicodeIdentifierPartImpl(int codePoint); 3463 3464 /** 3465 * Indicates whether the specified character is a valid initial character 3466 * for a Unicode identifier. 3467 * 3468 * @param c 3469 * the character to check. 3470 * @return {@code true} if {@code c} is a valid first character for a 3471 * Unicode identifier; {@code false} otherwise. 3472 */ 3473 public static boolean isUnicodeIdentifierStart(char c) { 3474 return isUnicodeIdentifierStartImpl(c); 3475 } 3476 3477 /** 3478 * Indicates whether the specified code point is a valid initial character 3479 * for a Unicode identifier. 3480 * 3481 * @param codePoint 3482 * the code point to check. 3483 * @return {@code true} if {@code codePoint} is a valid first character for 3484 * a Unicode identifier; {@code false} otherwise. 3485 */ 3486 public static boolean isUnicodeIdentifierStart(int codePoint) { 3487 return isUnicodeIdentifierStartImpl(codePoint); 3488 } 3489 3490 private static native boolean isUnicodeIdentifierStartImpl(int codePoint); 3491 3492 /** 3493 * Indicates whether the specified character is an upper case letter. 3494 * 3495 * @param c 3496 * the character to check. 3497 * @return {@code true} if {@code c} is a upper case letter; {@code false} 3498 * otherwise. 3499 */ 3500 public static boolean isUpperCase(char c) { 3501 return isUpperCase((int) c); 3502 } 3503 3504 /** 3505 * Indicates whether the specified code point is an upper case letter. 3506 * 3507 * @param codePoint 3508 * the code point to check. 3509 * @return {@code true} if {@code codePoint} is a upper case letter; 3510 * {@code false} otherwise. 3511 */ 3512 public static boolean isUpperCase(int codePoint) { 3513 // Optimized case for ASCII 3514 if ('A' <= codePoint && codePoint <= 'Z') { 3515 return true; 3516 } 3517 if (codePoint < 128) { 3518 return false; 3519 } 3520 return isUpperCaseImpl(codePoint); 3521 } 3522 3523 private static native boolean isUpperCaseImpl(int codePoint); 3524 3525 /** 3526 * See {@link #isWhitespace(int)}. 3527 */ 3528 public static boolean isWhitespace(char c) { 3529 return isWhitespace((int) c); 3530 } 3531 3532 /** 3533 * Returns true if the given code point is a Unicode whitespace character. 3534 * The exact set of characters considered as whitespace varies with Unicode version. 3535 * Note that non-breaking spaces are not considered whitespace. 3536 * Note also that line separators are considered whitespace; see {@link #isSpaceChar} 3537 * for an alternative. 3538 */ 3539 public static boolean isWhitespace(int codePoint) { 3540 // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that. 3541 // Any ASCII whitespace character? 3542 if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) { 3543 return true; 3544 } 3545 if (codePoint < 0x1000) { 3546 return false; 3547 } 3548 // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR? 3549 if (codePoint == 0x1680 || codePoint == 0x180e) { 3550 return true; 3551 } 3552 if (codePoint < 0x2000) { 3553 return false; 3554 } 3555 // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE). 3556 if (codePoint == 0x2007 || codePoint == 0x202f) { 3557 return false; 3558 } 3559 if (codePoint <= 0xffff) { 3560 // Other whitespace from General Punctuation... 3561 return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f || 3562 codePoint == 0x3000; // ...or CJK Symbols and Punctuation? 3563 } 3564 // Let icu4c worry about non-BMP code points. 3565 return isWhitespaceImpl(codePoint); 3566 } 3567 3568 private static native boolean isWhitespaceImpl(int codePoint); 3569 3570 /** 3571 * Reverses the order of the first and second byte in the specified 3572 * character. 3573 * 3574 * @param c 3575 * the character to reverse. 3576 * @return the character with reordered bytes. 3577 */ 3578 public static char reverseBytes(char c) { 3579 return (char)((c<<8) | (c>>8)); 3580 } 3581 3582 /** 3583 * Returns the lower case equivalent for the specified character if the 3584 * character is an upper case letter. Otherwise, the specified character is 3585 * returned unchanged. 3586 * 3587 * @param c 3588 * the character 3589 * @return if {@code c} is an upper case character then its lower case 3590 * counterpart, otherwise just {@code c}. 3591 */ 3592 public static char toLowerCase(char c) { 3593 return (char) toLowerCase((int) c); 3594 } 3595 3596 /** 3597 * Returns the lower case equivalent for the specified code point if it is 3598 * an upper case letter. Otherwise, the specified code point is returned 3599 * unchanged. 3600 * 3601 * @param codePoint 3602 * the code point to check. 3603 * @return if {@code codePoint} is an upper case character then its lower 3604 * case counterpart, otherwise just {@code codePoint}. 3605 */ 3606 public static int toLowerCase(int codePoint) { 3607 // Optimized case for ASCII 3608 if ('A' <= codePoint && codePoint <= 'Z') { 3609 return (char) (codePoint + ('a' - 'A')); 3610 } 3611 if (codePoint < 192) { 3612 return codePoint; 3613 } 3614 return toLowerCaseImpl(codePoint); 3615 } 3616 3617 private static native int toLowerCaseImpl(int codePoint); 3618 3619 @Override 3620 public String toString() { 3621 return String.valueOf(value); 3622 } 3623 3624 /** 3625 * Converts the specified character to its string representation. 3626 * 3627 * @param value 3628 * the character to convert. 3629 * @return the character converted to a string. 3630 */ 3631 public static String toString(char value) { 3632 return String.valueOf(value); 3633 } 3634 3635 /** 3636 * Returns the title case equivalent for the specified character if it 3637 * exists. Otherwise, the specified character is returned unchanged. 3638 * 3639 * @param c 3640 * the character to convert. 3641 * @return the title case equivalent of {@code c} if it exists, otherwise 3642 * {@code c}. 3643 */ 3644 public static char toTitleCase(char c) { 3645 return (char) toTitleCaseImpl(c); 3646 } 3647 3648 /** 3649 * Returns the title case equivalent for the specified code point if it 3650 * exists. Otherwise, the specified code point is returned unchanged. 3651 * 3652 * @param codePoint 3653 * the code point to convert. 3654 * @return the title case equivalent of {@code codePoint} if it exists, 3655 * otherwise {@code codePoint}. 3656 */ 3657 public static int toTitleCase(int codePoint) { 3658 return toTitleCaseImpl(codePoint); 3659 } 3660 3661 private static native int toTitleCaseImpl(int codePoint); 3662 3663 /** 3664 * Returns the upper case equivalent for the specified character if the 3665 * character is a lower case letter. Otherwise, the specified character is 3666 * returned unchanged. 3667 * 3668 * @param c 3669 * the character to convert. 3670 * @return if {@code c} is a lower case character then its upper case 3671 * counterpart, otherwise just {@code c}. 3672 */ 3673 public static char toUpperCase(char c) { 3674 return (char) toUpperCase((int) c); 3675 } 3676 3677 /** 3678 * Returns the upper case equivalent for the specified code point if the 3679 * code point is a lower case letter. Otherwise, the specified code point is 3680 * returned unchanged. 3681 * 3682 * @param codePoint 3683 * the code point to convert. 3684 * @return if {@code codePoint} is a lower case character then its upper 3685 * case counterpart, otherwise just {@code codePoint}. 3686 */ 3687 public static int toUpperCase(int codePoint) { 3688 // Optimized case for ASCII 3689 if ('a' <= codePoint && codePoint <= 'z') { 3690 return (char) (codePoint - ('a' - 'A')); 3691 } 3692 if (codePoint < 181) { 3693 return codePoint; 3694 } 3695 return toUpperCaseImpl(codePoint); 3696 } 3697 3698 private static native int toUpperCaseImpl(int codePoint); 3699} 3700