Character.java revision c574d81e3de6cb92cf68bd44f7e50ac52fe2fd87
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.lang; 19 20import java.io.Serializable; 21// BEGIN android-removed 22// import java.util.SortedMap; 23// import java.util.TreeMap; 24// 25// import org.apache.harmony.luni.util.BinarySearch; 26// END android-removed 27 28// BEGIN android-changed 29import com.ibm.icu4jni.lang.UCharacter; 30// END android-changed 31 32/** 33 * The wrapper for the primitive type {@code char}. This class also provides a 34 * number of utility methods for working with characters. 35 * <p> 36 * Character data is based upon the Unicode Standard, 4.0. The Unicode 37 * specification, character tables and other information are available at <a 38 * href="http://www.unicode.org/">http://www.unicode.org/</a>. 39 * <p> 40 * Unicode characters are referred to as <i>code points</i>. The range of valid 41 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i> 42 * is the code point range U+0000 to U+FFFF. Characters above the BMP are 43 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16 44 * encoding and {@code char} pairs are used to represent code points in the 45 * supplementary range. A pair of {@code char} values that represent a 46 * supplementary character are made up of a <i>high surrogate</i> with a value 47 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of 48 * 0xDC00 to 0xDFFF. 49 * <p> 50 * On the Java platform a {@code char} value represents either a single BMP code 51 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type 52 * is used to represent all Unicode code points. 53 * 54 * @since 1.0 55 */ 56public final class Character implements Serializable, Comparable<Character> { 57 private static final long serialVersionUID = 3786198910865385080L; 58 59 private final char value; 60 61 /** 62 * The minimum {@code Character} value. 63 */ 64 public static final char MIN_VALUE = '\u0000'; 65 66 /** 67 * The maximum {@code Character} value. 68 */ 69 public static final char MAX_VALUE = '\uffff'; 70 71 /** 72 * The minimum radix used for conversions between characters and integers. 73 */ 74 public static final int MIN_RADIX = 2; 75 76 /** 77 * The maximum radix used for conversions between characters and integers. 78 */ 79 public static final int MAX_RADIX = 36; 80 81 /** 82 * The {@link Class} object that represents the primitive type {@code char}. 83 */ 84 @SuppressWarnings("unchecked") 85 public static final Class<Character> TYPE 86 = (Class<Character>) char[].class.getComponentType(); 87 88 // Note: This can't be set to "char.class", since *that* is 89 // defined to be "java.lang.Character.TYPE"; 90 91 /** 92 * Unicode category constant Cn. 93 */ 94 public static final byte UNASSIGNED = 0; 95 96 /** 97 * Unicode category constant Lu. 98 */ 99 public static final byte UPPERCASE_LETTER = 1; 100 101 /** 102 * Unicode category constant Ll. 103 */ 104 public static final byte LOWERCASE_LETTER = 2; 105 106 /** 107 * Unicode category constant Lt. 108 */ 109 public static final byte TITLECASE_LETTER = 3; 110 111 /** 112 * Unicode category constant Lm. 113 */ 114 public static final byte MODIFIER_LETTER = 4; 115 116 /** 117 * Unicode category constant Lo. 118 */ 119 public static final byte OTHER_LETTER = 5; 120 121 /** 122 * Unicode category constant Mn. 123 */ 124 public static final byte NON_SPACING_MARK = 6; 125 126 /** 127 * Unicode category constant Me. 128 */ 129 public static final byte ENCLOSING_MARK = 7; 130 131 /** 132 * Unicode category constant Mc. 133 */ 134 public static final byte COMBINING_SPACING_MARK = 8; 135 136 /** 137 * Unicode category constant Nd. 138 */ 139 public static final byte DECIMAL_DIGIT_NUMBER = 9; 140 141 /** 142 * Unicode category constant Nl. 143 */ 144 public static final byte LETTER_NUMBER = 10; 145 146 /** 147 * Unicode category constant No. 148 */ 149 public static final byte OTHER_NUMBER = 11; 150 151 /** 152 * Unicode category constant Zs. 153 */ 154 public static final byte SPACE_SEPARATOR = 12; 155 156 /** 157 * Unicode category constant Zl. 158 */ 159 public static final byte LINE_SEPARATOR = 13; 160 161 /** 162 * Unicode category constant Zp. 163 */ 164 public static final byte PARAGRAPH_SEPARATOR = 14; 165 166 /** 167 * Unicode category constant Cc. 168 */ 169 public static final byte CONTROL = 15; 170 171 /** 172 * Unicode category constant Cf. 173 */ 174 public static final byte FORMAT = 16; 175 176 /** 177 * Unicode category constant Co. 178 */ 179 public static final byte PRIVATE_USE = 18; 180 181 /** 182 * Unicode category constant Cs. 183 */ 184 public static final byte SURROGATE = 19; 185 186 /** 187 * Unicode category constant Pd. 188 */ 189 public static final byte DASH_PUNCTUATION = 20; 190 191 /** 192 * Unicode category constant Ps. 193 */ 194 public static final byte START_PUNCTUATION = 21; 195 196 /** 197 * Unicode category constant Pe. 198 */ 199 public static final byte END_PUNCTUATION = 22; 200 201 /** 202 * Unicode category constant Pc. 203 */ 204 public static final byte CONNECTOR_PUNCTUATION = 23; 205 206 /** 207 * Unicode category constant Po. 208 */ 209 public static final byte OTHER_PUNCTUATION = 24; 210 211 /** 212 * Unicode category constant Sm. 213 */ 214 public static final byte MATH_SYMBOL = 25; 215 216 /** 217 * Unicode category constant Sc. 218 */ 219 public static final byte CURRENCY_SYMBOL = 26; 220 221 /** 222 * Unicode category constant Sk. 223 */ 224 public static final byte MODIFIER_SYMBOL = 27; 225 226 /** 227 * Unicode category constant So. 228 */ 229 public static final byte OTHER_SYMBOL = 28; 230 231 /** 232 * Unicode category constant Pi. 233 * 234 * @since 1.4 235 */ 236 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 237 238 /** 239 * Unicode category constant Pf. 240 * 241 * @since 1.4 242 */ 243 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 244 245 /** 246 * Unicode bidirectional constant. 247 * 248 * @since 1.4 249 */ 250 public static final byte DIRECTIONALITY_UNDEFINED = -1; 251 252 /** 253 * Unicode bidirectional constant L. 254 * 255 * @since 1.4 256 */ 257 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 258 259 /** 260 * Unicode bidirectional constant R. 261 * 262 * @since 1.4 263 */ 264 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 265 266 /** 267 * Unicode bidirectional constant AL. 268 * 269 * @since 1.4 270 */ 271 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 272 273 /** 274 * Unicode bidirectional constant EN. 275 * 276 * @since 1.4 277 */ 278 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 279 280 /** 281 * Unicode bidirectional constant ES. 282 * 283 * @since 1.4 284 */ 285 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 286 287 /** 288 * Unicode bidirectional constant ET. 289 * 290 * @since 1.4 291 */ 292 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 293 294 /** 295 * Unicode bidirectional constant AN. 296 * 297 * @since 1.4 298 */ 299 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 300 301 /** 302 * Unicode bidirectional constant CS. 303 * 304 * @since 1.4 305 */ 306 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 307 308 /** 309 * Unicode bidirectional constant NSM. 310 * 311 * @since 1.4 312 */ 313 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 314 315 /** 316 * Unicode bidirectional constant BN. 317 * 318 * @since 1.4 319 */ 320 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 321 322 /** 323 * Unicode bidirectional constant B. 324 * 325 * @since 1.4 326 */ 327 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 328 329 /** 330 * Unicode bidirectional constant S. 331 * 332 * @since 1.4 333 */ 334 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 335 336 /** 337 * Unicode bidirectional constant WS. 338 * 339 * @since 1.4 340 */ 341 public static final byte DIRECTIONALITY_WHITESPACE = 12; 342 343 /** 344 * Unicode bidirectional constant ON. 345 * 346 * @since 1.4 347 */ 348 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 349 350 /** 351 * Unicode bidirectional constant LRE. 352 * 353 * @since 1.4 354 */ 355 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 356 357 /** 358 * Unicode bidirectional constant LRO. 359 * 360 * @since 1.4 361 */ 362 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 363 364 /** 365 * Unicode bidirectional constant RLE. 366 * 367 * @since 1.4 368 */ 369 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 370 371 /** 372 * Unicode bidirectional constant RLO. 373 * 374 * @since 1.4 375 */ 376 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 377 378 /** 379 * Unicode bidirectional constant PDF. 380 * 381 * @since 1.4 382 */ 383 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 384 385 /** 386 * The minimum value of a high surrogate or leading surrogate unit in UTF-16 387 * encoding, {@code '\uD800'}. 388 * 389 * @since 1.5 390 */ 391 public static final char MIN_HIGH_SURROGATE = '\uD800'; 392 393 /** 394 * The maximum value of a high surrogate or leading surrogate unit in UTF-16 395 * encoding, {@code '\uDBFF'}. 396 * 397 * @since 1.5 398 */ 399 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 400 401 /** 402 * The minimum value of a low surrogate or trailing surrogate unit in UTF-16 403 * encoding, {@code '\uDC00'}. 404 * 405 * @since 1.5 406 */ 407 public static final char MIN_LOW_SURROGATE = '\uDC00'; 408 409 /** 410 * The maximum value of a low surrogate or trailing surrogate unit in UTF-16 411 * encoding, {@code '\uDFFF'}. 412 * 413 * @since 1.5 414 */ 415 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 416 417 /** 418 * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}. 419 * 420 * @since 1.5 421 */ 422 public static final char MIN_SURROGATE = '\uD800'; 423 424 /** 425 * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}. 426 * 427 * @since 1.5 428 */ 429 public static final char MAX_SURROGATE = '\uDFFF'; 430 431 /** 432 * The minimum value of a supplementary code point, {@code U+010000}. 433 * 434 * @since 1.5 435 */ 436 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; 437 438 /** 439 * The minimum code point value, {@code U+0000}. 440 * 441 * @since 1.5 442 */ 443 public static final int MIN_CODE_POINT = 0x000000; 444 445 /** 446 * The maximum code point value, {@code U+10FFFF}. 447 * 448 * @since 1.5 449 */ 450 public static final int MAX_CODE_POINT = 0x10FFFF; 451 452 /** 453 * The number of bits required to represent a {@code Character} value 454 * unsigned form. 455 * 456 * @since 1.5 457 */ 458 public static final int SIZE = 16; 459 460 // BEGIN android-removed 461 // Unicode 3.0.1 (same as Unicode 3.0.0) 462 // private static final String bidiKeys = ... 463 464 // private static final char[] bidiValues = ... 465 466 // private static final char[] mirrored = ... 467 468 // Unicode 3.0.1 (same as Unicode 3.0.0) 469 // private static final String typeKeys = ... 470 471 // private static final char[] typeValues = ... 472 473 // private static final int[] typeValuesCache = ... 474 475 // Unicode 3.0.1 (same as Unicode 3.0.0) 476 // private static final String uppercaseKeys = ... 477 478 // private static final char[] uppercaseValues = ... 479 480 // private static final int[] uppercaseValuesCache = ... 481 482 // private static final String lowercaseKeys = ... 483 484 // private static final char[] lowercaseValues = ... 485 486 // private static final int[] lowercaseValuesCache = ... 487 488 // private static final String digitKeys = ... 489 490 // private static final char[] digitValues = ... 491 492 // private static final char[] typeTags = ... 493 // END android-removed 494 495 // BEGIN android-note 496 // put this in a helper class so that it's only initialized on demand? 497 // END android-note 498 private static final byte[] DIRECTIONALITY = new byte[] { 499 DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT, 500 DIRECTIONALITY_EUROPEAN_NUMBER, 501 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, 502 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, 503 DIRECTIONALITY_ARABIC_NUMBER, 504 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, 505 DIRECTIONALITY_PARAGRAPH_SEPARATOR, 506 DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE, 507 DIRECTIONALITY_OTHER_NEUTRALS, 508 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, 509 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, 510 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, 511 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, 512 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, 513 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, 514 DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL }; 515 516 // BEGIN android-removed 517 // private static final int ISJAVASTART = 1; 518 519 // private static final int ISJAVAPART = 2; 520 521 // Unicode 3.0.1 (same as Unicode 3.0.0) 522 // private static final String titlecaseKeys = ... 523 524 // private static final char[] titlecaseValues = ... 525 526 // Unicode 3.0.0 (NOT the same as Unicode 3.0.1) 527 // private static final String numericKeys = ... 528 529 // private static final char[] numericValues = ... 530 // END android-removed 531 532 /* 533 * Represents a subset of the Unicode character set. 534 */ 535 public static class Subset { 536 String name; 537 538 /** 539 * Constructs a new {@code Subset}. 540 * 541 * @param string 542 * this subset's name. 543 */ 544 protected Subset(String string) { 545 if (string == null) { 546 throw new NullPointerException(); 547 } 548 name = string; 549 } 550 551 /** 552 * Compares this character subset with the specified object. Uses 553 * {@link java.lang.Object#equals(Object)} to do the comparison. 554 * 555 * @param object 556 * the object to compare this character subset with. 557 * @return {@code true} if {@code object} is this subset, that is, if 558 * {@code object == this}; {@code false} otherwise. 559 */ 560 @Override 561 public final boolean equals(Object object) { 562 return super.equals(object); 563 } 564 565 /** 566 * Returns the integer hash code for this character subset. 567 * 568 * @return this subset's hash code, which is the hash code computed by 569 * {@link java.lang.Object#hashCode()}. 570 */ 571 @Override 572 public final int hashCode() { 573 return super.hashCode(); 574 } 575 576 /** 577 * Returns the string representation of this subset. 578 * 579 * @return this subset's name. 580 */ 581 @Override 582 public final String toString() { 583 return name; 584 } 585 } 586 587 /** 588 * Represents a block of Unicode characters, as defined by the Unicode 4.0.1 589 * specification. 590 * 591 * @since 1.2 592 */ 593 public static final class UnicodeBlock extends Subset { 594 /** 595 * The "Surrogates Area" Unicode Block. 596 * 597 * @deprecated As of Java 5, this block has been replaced by 598 * {@link #HIGH_SURROGATES}, 599 * {@link #HIGH_PRIVATE_USE_SURROGATES} and 600 * {@link #LOW_SURROGATES}. 601 */ 602 @Deprecated 603 public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA", 0x0, 0x0); 604 /** 605 * The "Basic Latin" Unicode Block. 606 * 607 * @since 1.2 608 */ 609 public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN", 0x0, 0x7f); 610 /** 611 * The "Latin-1 Supplement" Unicode Block. 612 * 613 * @since 1.2 614 */ 615 public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT", 0x80, 0xff); 616 /** 617 * The "Latin Extended-A" Unicode Block. 618 * 619 * @since 1.2 620 */ 621 public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A", 0x100, 0x17f); 622 /** 623 * The "Latin Extended-B" Unicode Block. 624 * 625 * @since 1.2 626 */ 627 public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B", 0x180, 0x24f); 628 /** 629 * The "IPA Extensions" Unicode Block. 630 * 631 * @since 1.2 632 */ 633 public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS", 0x250, 0x2af); 634 /** 635 * The "Spacing Modifier Letters" Unicode Block. 636 * 637 * @since 1.2 638 */ 639 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 0x2b0, 0x2ff); 640 /** 641 * The "Combining Diacritical Marks" Unicode Block. 642 * 643 * @since 1.2 644 */ 645 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 0x300, 0x36f); 646 /** 647 * The "Greek and Coptic" Unicode Block. Previously referred 648 * to as "Greek". 649 * 650 * @since 1.2 651 */ 652 public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK", 0x370, 0x3ff); 653 /** 654 * The "Cyrillic" Unicode Block. 655 * 656 * @since 1.2 657 */ 658 public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC", 0x400, 0x4ff); 659 /** 660 * The "Cyrillic Supplement" Unicode Block. Previously 661 * referred to as "Cyrillic Supplementary". 662 * 663 * @since 1.5 664 */ 665 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 0x500, 0x52f); 666 /** 667 * The "Armenian" Unicode Block. 668 * 669 * @since 1.2 670 */ 671 public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN", 0x530, 0x58f); 672 /** 673 * The "Hebrew" Unicode Block. 674 * 675 * @since 1.2 676 */ 677 public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW", 0x590, 0x5ff); 678 /** 679 * The "Arabic" Unicode Block. 680 * 681 * @since 1.2 682 */ 683 public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC", 0x600, 0x6ff); 684 /** 685 * The "Syriac" Unicode Block. 686 * 687 * @since 1.4 688 */ 689 public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC", 0x700, 0x74f); 690 /** 691 * The "Thaana" Unicode Block. 692 * 693 * @since 1.4 694 */ 695 public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA", 0x780, 0x7bf); 696 /** 697 * The "Devanagari" Unicode Block. 698 * 699 * @since 1.2 700 */ 701 public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI", 0x900, 0x97f); 702 /** 703 * The "Bengali" Unicode Block. 704 * 705 * @since 1.2 706 */ 707 public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI", 0x980, 0x9ff); 708 /** 709 * The "Gurmukhi" Unicode Block. 710 * 711 * @since 1.2 712 */ 713 public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI", 0xa00, 0xa7f); 714 /** 715 * The "Gujarati" Unicode Block. 716 * 717 * @since 1.2 718 */ 719 public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI", 0xa80, 0xaff); 720 /** 721 * The "Oriya" Unicode Block. 722 * 723 * @since 1.2 724 */ 725 public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA", 0xb00, 0xb7f); 726 /** 727 * The "Tamil" Unicode Block. 728 * 729 * @since 1.2 730 */ 731 public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL", 0xb80, 0xbff); 732 /** 733 * The "Telugu" Unicode Block. 734 * 735 * @since 1.2 736 */ 737 public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU", 0xc00, 0xc7f); 738 /** 739 * The "Kannada" Unicode Block. 740 * 741 * @since 1.2 742 */ 743 public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA", 0xc80, 0xcff); 744 /** 745 * The "Malayalam" Unicode Block. 746 * 747 * @since 1.2 748 */ 749 public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM", 0xd00, 0xd7f); 750 /** 751 * The "Sinhala" Unicode Block. 752 * 753 * @since 1.4 754 */ 755 public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA", 0xd80, 0xdff); 756 /** 757 * The "Thai" Unicode Block. 758 * 759 * @since 1.2 760 */ 761 public static final UnicodeBlock THAI = new UnicodeBlock("THAI", 0xe00, 0xe7f); 762 /** 763 * The "Lao" Unicode Block. 764 * 765 * @since 1.2 766 */ 767 public static final UnicodeBlock LAO = new UnicodeBlock("LAO", 0xe80, 0xeff); 768 /** 769 * The "Tibetan" Unicode Block. 770 * 771 * @since 1.2 772 */ 773 public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN", 0xf00, 0xfff); 774 /** 775 * The "Myanmar" Unicode Block. 776 * 777 * @since 1.4 778 */ 779 public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR", 0x1000, 0x109f); 780 /** 781 * The "Georgian" Unicode Block. 782 * 783 * @since 1.2 784 */ 785 public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN", 0x10a0, 0x10ff); 786 /** 787 * The "Hangul Jamo" Unicode Block. 788 * 789 * @since 1.2 790 */ 791 public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO", 0x1100, 0x11ff); 792 /** 793 * The "Ethiopic" Unicode Block. 794 * 795 * @since 1.4 796 */ 797 public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC", 0x1200, 0x137f); 798 /** 799 * The "Cherokee" Unicode Block. 800 * 801 * @since 1.4 802 */ 803 public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE", 0x13a0, 0x13ff); 804 /** 805 * The "Unified Canadian Aboriginal Syllabics" Unicode Block. 806 * 807 * @since 1.4 808 */ 809 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 0x1400, 0x167f); 810 /** 811 * The "Ogham" Unicode Block. 812 * 813 * @since 1.4 814 */ 815 public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM", 0x1680, 0x169f); 816 /** 817 * The "Runic" Unicode Block. 818 * 819 * @since 1.4 820 */ 821 public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC", 0x16a0, 0x16ff); 822 /** 823 * The "Tagalog" Unicode Block. 824 * 825 * @since 1.5 826 */ 827 public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG", 0x1700, 0x171f); 828 /** 829 * The "Hanunoo" Unicode Block. 830 * 831 * @since 1.5 832 */ 833 public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO", 0x1720, 0x173f); 834 /** 835 * The "Buhid" Unicode Block. 836 * 837 * @since 1.5 838 */ 839 public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID", 0x1740, 0x175f); 840 /** 841 * The "Tagbanwa" Unicode Block. 842 * 843 * @since 1.5 844 */ 845 public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA", 0x1760, 0x177f); 846 /** 847 * The "Khmer" Unicode Block. 848 * 849 * @since 1.4 850 */ 851 public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER", 0x1780, 0x17ff); 852 /** 853 * The "Mongolian" Unicode Block. 854 * 855 * @since 1.4 856 */ 857 public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN", 0x1800, 0x18af); 858 /** 859 * The "Limbu" Unicode Block. 860 * 861 * @since 1.5 862 */ 863 public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU", 0x1900, 0x194f); 864 /** 865 * The "Tai Le" Unicode Block. 866 * 867 * @since 1.5 868 */ 869 public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE", 0x1950, 0x197f); 870 /** 871 * The "Khmer Symbols" Unicode Block. 872 * 873 * @since 1.5 874 */ 875 public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS", 0x19e0, 0x19ff); 876 /** 877 * The "Phonetic Extensions" Unicode Block. 878 * 879 * @since 1.5 880 */ 881 public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS", 0x1d00, 0x1d7f); 882 /** 883 * The "Latin Extended Additional" Unicode Block. 884 * 885 * @since 1.2 886 */ 887 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 0x1e00, 0x1eff); 888 /** 889 * The "Greek Extended" Unicode Block. 890 * 891 * @since 1.2 892 */ 893 public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED", 0x1f00, 0x1fff); 894 /** 895 * The "General Punctuation" Unicode Block. 896 * 897 * @since 1.2 898 */ 899 public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION", 0x2000, 0x206f); 900 /** 901 * The "Superscripts and Subscripts" Unicode Block. 902 * 903 * @since 1.2 904 */ 905 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 0x2070, 0x209f); 906 /** 907 * The "Currency Symbols" Unicode Block. 908 * 909 * @since 1.2 910 */ 911 public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS", 0x20a0, 0x20cf); 912 /** 913 * The "Combining Diacritical Marks for Symbols" Unicode 914 * Block. Previously referred to as "Combining Marks for 915 * Symbols". 916 * 917 * @since 1.2 918 */ 919 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 0x20d0, 0x20ff); 920 /** 921 * The "Letterlike Symbols" Unicode Block. 922 * 923 * @since 1.2 924 */ 925 public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS", 0x2100, 0x214f); 926 /** 927 * The "Number Forms" Unicode Block. 928 * 929 * @since 1.2 930 */ 931 public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS", 0x2150, 0x218f); 932 /** 933 * The "Arrows" Unicode Block. 934 * 935 * @since 1.2 936 */ 937 public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS", 0x2190, 0x21ff); 938 /** 939 * The "Mathematical Operators" Unicode Block. 940 * 941 * @since 1.2 942 */ 943 public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS", 0x2200, 0x22ff); 944 /** 945 * The "Miscellaneous Technical" Unicode Block. 946 * 947 * @since 1.2 948 */ 949 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 0x2300, 0x23ff); 950 /** 951 * The "Control Pictures" Unicode Block. 952 * 953 * @since 1.2 954 */ 955 public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES", 0x2400, 0x243f); 956 /** 957 * The "Optical Character Recognition" Unicode Block. 958 * 959 * @since 1.2 960 */ 961 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 0x2440, 0x245f); 962 /** 963 * The "Enclosed Alphanumerics" Unicode Block. 964 * 965 * @since 1.2 966 */ 967 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 0x2460, 0x24ff); 968 /** 969 * The "Box Drawing" Unicode Block. 970 * 971 * @since 1.2 972 */ 973 public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING", 0x2500, 0x257f); 974 /** 975 * The "Block Elements" Unicode Block. 976 * 977 * @since 1.2 978 */ 979 public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS", 0x2580, 0x259f); 980 /** 981 * The "Geometric Shapes" Unicode Block. 982 * 983 * @since 1.2 984 */ 985 public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES", 0x25a0, 0x25ff); 986 /** 987 * The "Miscellaneous Symbols" Unicode Block. 988 * 989 * @since 1.2 990 */ 991 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 0x2600, 0x26ff); 992 /** 993 * The "Dingbats" Unicode Block. 994 * 995 * @since 1.2 996 */ 997 public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS", 0x2700, 0x27bf); 998 /** 999 * The "Miscellaneous Mathematical Symbols-A" Unicode Block. 1000 * 1001 * @since 1.5 1002 */ 1003 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 0x27c0, 0x27ef); 1004 /** 1005 * The "Supplemental Arrows-A" Unicode Block. 1006 * 1007 * @since 1.5 1008 */ 1009 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 0x27f0, 0x27ff); 1010 /** 1011 * The "Braille Patterns" Unicode Block. 1012 * 1013 * @since 1.4 1014 */ 1015 public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS", 0x2800, 0x28ff); 1016 /** 1017 * The "Supplemental Arrows-B" Unicode Block. 1018 * 1019 * @since 1.5 1020 */ 1021 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 0x2900, 0x297f); 1022 /** 1023 * The "Miscellaneous Mathematical Symbols-B" Unicode Block. 1024 * 1025 * @since 1.5 1026 */ 1027 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 0x2980, 0x29ff); 1028 /** 1029 * The "Supplemental Mathematical Operators" Unicode Block. 1030 * 1031 * @since 1.5 1032 */ 1033 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 0x2a00, 0x2aff); 1034 /** 1035 * The "Miscellaneous Symbols and Arrows" Unicode Block. 1036 * 1037 * @since 1.2 1038 */ 1039 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 0x2b00, 0x2bff); 1040 /** 1041 * The "CJK Radicals Supplement" Unicode Block. 1042 * 1043 * @since 1.4 1044 */ 1045 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 0x2e80, 0x2eff); 1046 /** 1047 * The "Kangxi Radicals" Unicode Block. 1048 * 1049 * @since 1.4 1050 */ 1051 public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS", 0x2f00, 0x2fdf); 1052 /** 1053 * The "Ideographic Description Characters" Unicode Block. 1054 * 1055 * @since 1.4 1056 */ 1057 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 0x2ff0, 0x2fff); 1058 /** 1059 * The "CJK Symbols and Punctuation" Unicode Block. 1060 * 1061 * @since 1.2 1062 */ 1063 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 0x3000, 0x303f); 1064 /** 1065 * The "Hiragana" Unicode Block. 1066 * 1067 * @since 1.2 1068 */ 1069 public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA", 0x3040, 0x309f); 1070 /** 1071 * The "Katakana" Unicode Block. 1072 * 1073 * @since 1.2 1074 */ 1075 public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA", 0x30a0, 0x30ff); 1076 /** 1077 * The "Bopomofo" Unicode Block. 1078 * 1079 * @since 1.2 1080 */ 1081 public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO", 0x3100, 0x312f); 1082 /** 1083 * The "Hangul Compatibility Jamo" Unicode Block. 1084 * 1085 * @since 1.2 1086 */ 1087 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 0x3130, 0x318f); 1088 /** 1089 * The "Kanbun" Unicode Block. 1090 * 1091 * @since 1.2 1092 */ 1093 public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN", 0x3190, 0x319f); 1094 /** 1095 * The "Bopomofo Extended" Unicode Block. 1096 * 1097 * @since 1.4 1098 */ 1099 public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED", 0x31a0, 0x31bf); 1100 /** 1101 * The "Katakana Phonetic Extensions" Unicode Block. 1102 * 1103 * @since 1.5 1104 */ 1105 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 0x31f0, 0x31ff); 1106 /** 1107 * The "Enclosed CJK Letters and Months" Unicode Block. 1108 * 1109 * @since 1.2 1110 */ 1111 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 0x3200, 0x32ff); 1112 /** 1113 * The "CJK Compatibility" Unicode Block. 1114 * 1115 * @since 1.2 1116 */ 1117 public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY", 0x3300, 0x33ff); 1118 /** 1119 * The "CJK Unified Ideographs Extension A" Unicode Block. 1120 * 1121 * @since 1.4 1122 */ 1123 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 0x3400, 0x4dbf); 1124 /** 1125 * The "Yijing Hexagram Symbols" Unicode Block. 1126 * 1127 * @since 1.5 1128 */ 1129 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 0x4dc0, 0x4dff); 1130 /** 1131 * The "CJK Unified Ideographs" Unicode Block. 1132 * 1133 * @since 1.2 1134 */ 1135 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 0x4e00, 0x9fff); 1136 /** 1137 * The "Yi Syllables" Unicode Block. 1138 * 1139 * @since 1.4 1140 */ 1141 public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES", 0xa000, 0xa48f); 1142 /** 1143 * The "Yi Radicals" Unicode Block. 1144 * 1145 * @since 1.4 1146 */ 1147 public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS", 0xa490, 0xa4cf); 1148 /** 1149 * The "Hangul Syllables" Unicode Block. 1150 * 1151 * @since 1.2 1152 */ 1153 public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES", 0xac00, 0xd7af); 1154 /** 1155 * The "High Surrogates" Unicode Block. This block represents 1156 * code point values in the high surrogate range 0xD800 to 0xDB7F 1157 */ 1158 public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES", 0xd800, 0xdb7f); 1159 /** 1160 * The "High Private Use Surrogates" Unicode Block. This block 1161 * represents code point values in the high surrogate range 0xDB80 to 1162 * 0xDBFF 1163 */ 1164 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 0xdb80, 0xdbff); 1165 /** 1166 * The "Low Surrogates" Unicode Block. This block represents 1167 * code point values in the low surrogate range 0xDC00 to 0xDFFF 1168 */ 1169 public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES", 0xdc00, 0xdfff); 1170 /** 1171 * The "Private Use Area" Unicode Block. 1172 * 1173 * @since 1.2 1174 */ 1175 public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA", 0xe000, 0xf8ff); 1176 /** 1177 * The "CJK Compatibility Ideographs" Unicode Block. 1178 * 1179 * @since 1.2 1180 */ 1181 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 0xf900, 0xfaff); 1182 /** 1183 * The "Alphabetic Presentation Forms" Unicode Block. 1184 * 1185 * @since 1.2 1186 */ 1187 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 0xfb00, 0xfb4f); 1188 /** 1189 * The "Arabic Presentation Forms-A" Unicode Block. 1190 * 1191 * @since 1.2 1192 */ 1193 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 0xfb50, 0xfdff); 1194 /** 1195 * The "Variation Selectors" Unicode Block. 1196 * 1197 * @since 1.5 1198 */ 1199 public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS", 0xfe00, 0xfe0f); 1200 /** 1201 * The "Combining Half Marks" Unicode Block. 1202 * 1203 * @since 1.2 1204 */ 1205 public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS", 0xfe20, 0xfe2f); 1206 /** 1207 * The "CJK Compatibility Forms" Unicode Block. 1208 * 1209 * @since 1.2 1210 */ 1211 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 0xfe30, 0xfe4f); 1212 /** 1213 * The "Small Form Variants" Unicode Block. 1214 * 1215 * @since 1.2 1216 */ 1217 public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS", 0xfe50, 0xfe6f); 1218 /** 1219 * The "Arabic Presentation Forms-B" Unicode Block. 1220 * 1221 * @since 1.2 1222 */ 1223 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 0xfe70, 0xfeff); 1224 /** 1225 * The "Halfwidth and Fullwidth Forms" Unicode Block. 1226 * 1227 * @since 1.2 1228 */ 1229 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 0xff00, 0xffef); 1230 /** 1231 * The "Specials" Unicode Block. 1232 * 1233 * @since 1.2 1234 */ 1235 public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS", 0xfff0, 0xffff); 1236 /** 1237 * The "Linear B Syllabary" Unicode Block. 1238 * 1239 * @since 1.2 1240 */ 1241 public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY", 0x10000, 0x1007f); 1242 /** 1243 * The "Linear B Ideograms" Unicode Block. 1244 * 1245 * @since 1.5 1246 */ 1247 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 0x10080, 0x100ff); 1248 /** 1249 * The "Aegean Numbers" Unicode Block. 1250 * 1251 * @since 1.5 1252 */ 1253 public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS", 0x10100, 0x1013f); 1254 /** 1255 * The "Old Italic" Unicode Block. 1256 * 1257 * @since 1.5 1258 */ 1259 public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC", 0x10300, 0x1032f); 1260 /** 1261 * The "Gothic" Unicode Block. 1262 * 1263 * @since 1.5 1264 */ 1265 public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC", 0x10330, 0x1034f); 1266 /** 1267 * The "Ugaritic" Unicode Block. 1268 * 1269 * @since 1.5 1270 */ 1271 public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC", 0x10380, 0x1039f); 1272 /** 1273 * The "Deseret" Unicode Block. 1274 * 1275 * @since 1.5 1276 */ 1277 public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET", 0x10400, 0x1044f); 1278 /** 1279 * The "Shavian" Unicode Block. 1280 * 1281 * @since 1.5 1282 */ 1283 public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN", 0x10450, 0x1047f); 1284 /** 1285 * The "Osmanya" Unicode Block. 1286 * 1287 * @since 1.5 1288 */ 1289 public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA", 0x10480, 0x104af); 1290 /** 1291 * The "Cypriot Syllabary" Unicode Block. 1292 * 1293 * @since 1.5 1294 */ 1295 public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY", 0x10800, 0x1083f); 1296 /** 1297 * The "Byzantine Musical Symbols" Unicode Block. 1298 * 1299 * @since 1.5 1300 */ 1301 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 0x1d000, 0x1d0ff); 1302 /** 1303 * The "Musical Symbols" Unicode Block. 1304 * 1305 * @since 1.5 1306 */ 1307 public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS", 0x1d100, 0x1d1ff); 1308 /** 1309 * The "Tai Xuan Jing Symbols" Unicode Block. 1310 * 1311 * @since 1.5 1312 */ 1313 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 0x1d300, 0x1d35f); 1314 /** 1315 * The "Mathematical Alphanumeric Symbols" Unicode Block. 1316 * 1317 * @since 1.5 1318 */ 1319 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 0x1d400, 0x1d7ff); 1320 /** 1321 * The "CJK Unified Ideographs Extension B" Unicode Block. 1322 * 1323 * @since 1.5 1324 */ 1325 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 0x20000, 0x2a6df); 1326 /** 1327 * The "CJK Compatibility Ideographs Supplement" Unicode Block. 1328 * 1329 * @since 1.5 1330 */ 1331 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 0x2f800, 0x2fa1f); 1332 /** 1333 * The "Tags" Unicode Block. 1334 * 1335 * @since 1.5 1336 */ 1337 public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS", 0xe0000, 0xe007f); 1338 /** 1339 * The "Variation Selectors Supplement" Unicode Block. 1340 * 1341 * @since 1.5 1342 */ 1343 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 0xe0100, 0xe01ef); 1344 /** 1345 * The "Supplementary Private Use Area-A" Unicode Block. 1346 * 1347 * @since 1.5 1348 */ 1349 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 0xf0000, 0xfffff); 1350 /** 1351 * The "Supplementary Private Use Area-B" Unicode Block. 1352 * 1353 * @since 1.5 1354 */ 1355 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 0x100000, 0x10ffff); 1356 1357 /* 1358 * All of the UnicodeBlocks with valid ranges in ascending order. 1359 */ 1360 private static UnicodeBlock[] BLOCKS; 1361 1362 // BEGIN android-changed 1363 // /* 1364 // * A SortedMap (String.CASE_INSENSITIVE_ORDER) with keys that represents 1365 // * valid block names and values of the UnicodeBlock constant they map 1366 // * to. 1367 // */ 1368 // private static final SortedMap<String, UnicodeBlock> BLOCKS_BY_NAME = ...; 1369 // END android-changed 1370 1371 /** 1372 * Retrieves the constant that corresponds to the specified block name. 1373 * The block names are defined by the Unicode 4.0.1 specification in the 1374 * {@code Blocks-4.0.1.txt} file. 1375 * <p> 1376 * Block names may be one of the following: 1377 * <ul> 1378 * <li>Canonical block name, as defined by the Unicode specification; 1379 * case-insensitive.</li> 1380 * <li>Canonical block name without any spaces, as defined by the 1381 * Unicode specification; case-insensitive.</li> 1382 * <li>{@code UnicodeBlock} constant identifier. This is determined by 1383 * uppercasing the canonical name and replacing all spaces and hyphens 1384 * with underscores.</li> 1385 * </ul> 1386 * 1387 * @param blockName 1388 * the name of the block to retrieve. 1389 * @return the UnicodeBlock constant corresponding to {@code blockName}. 1390 * @throws NullPointerException 1391 * if {@code blockName} is {@code null}. 1392 * @throws IllegalArgumentException 1393 * if {@code blockName} is not a valid block name. 1394 * @since 1.5 1395 */ 1396 public static final UnicodeBlock forName(String blockName) { 1397 // BEGIN android-note 1398 // trying to get closer to the RI which defines this as final. 1399 // END android-note 1400 if (blockName == null) { 1401 throw new NullPointerException(); 1402 } 1403 // BEGIN android-changed 1404 if (BLOCKS == null) { 1405 BLOCKS = UCharacter.getBlockTable(); 1406 } 1407 int block = UCharacter.forName(blockName); 1408 if (block == -1) { 1409 if(blockName.equals("SURROGATES_AREA")) { 1410 return SURROGATES_AREA; 1411 } else if(blockName.equalsIgnoreCase("greek")) { 1412 return GREEK; 1413 } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") || 1414 blockName.equals("Combining Marks for Symbols") || 1415 blockName.equals("CombiningMarksforSymbols")) { 1416 return COMBINING_MARKS_FOR_SYMBOLS; 1417 } 1418 throw new IllegalArgumentException(); 1419 } 1420 return BLOCKS[block]; 1421 // END android-changed 1422 } 1423 1424 /** 1425 * Gets the constant for the Unicode block that contains the specified 1426 * character. 1427 * 1428 * @param c 1429 * the character for which to get the {@code UnicodeBlock} 1430 * constant. 1431 * @return the {@code UnicodeBlock} constant for the block that contains 1432 * {@code c}, or {@code null} if {@code c} does not belong to 1433 * any defined block. 1434 */ 1435 public static UnicodeBlock of(char c) { 1436 return of((int) c); 1437 } 1438 1439 /** 1440 * Gets the constant for the Unicode block that contains the specified 1441 * Unicode code point. 1442 * 1443 * @param codePoint 1444 * the Unicode code point for which to get the 1445 * {@code UnicodeBlock} constant. 1446 * @return the {@code UnicodeBlock} constant for the block that contains 1447 * {@code codePoint}, or {@code null} if {@code codePoint} does 1448 * not belong to any defined block. 1449 * @throws IllegalArgumentException 1450 * if {@code codePoint} is not a valid Unicode code point. 1451 * @since 1.5 1452 */ 1453 public static UnicodeBlock of(int codePoint) { 1454 if (!isValidCodePoint(codePoint)) { 1455 throw new IllegalArgumentException(); 1456 } 1457 // BEGIN android-changed 1458 if (BLOCKS == null) { 1459 BLOCKS = UCharacter.getBlockTable(); 1460 } 1461 int block = UCharacter.of(codePoint); 1462 if(block == -1 || block >= BLOCKS.length) { 1463 return null; 1464 } 1465 return BLOCKS[block]; 1466 // END android-changed 1467 } 1468 1469 // BEGIN android-changed 1470 private UnicodeBlock(String blockName, int start, int end) { 1471 super(blockName); 1472 } 1473 // END android-changed 1474 } 1475 1476 /** 1477 * Constructs a new {@code Character} with the specified primitive char 1478 * value. 1479 * 1480 * @param value 1481 * the primitive char value to store in the new instance. 1482 */ 1483 public Character(char value) { 1484 this.value = value; 1485 } 1486 1487 /** 1488 * Gets the primitive value of this character. 1489 * 1490 * @return this object's primitive value. 1491 */ 1492 public char charValue() { 1493 return value; 1494 } 1495 1496 /** 1497 * Compares this object to the specified character object to determine their 1498 * relative order. 1499 * 1500 * @param c 1501 * the character object to compare this object to. 1502 * @return {@code 0} if the value of this character and the value of 1503 * {@code c} are equal; a positive value if the value of this 1504 * character is greater than the value of {@code c}; a negative 1505 * value if the value of this character is less than the value of 1506 * {@code c}. 1507 * @see java.lang.Comparable 1508 * @since 1.2 1509 */ 1510 public int compareTo(Character c) { 1511 return value - c.value; 1512 } 1513 1514 /** 1515 * Returns a {@code Character} instance for the {@code char} value passed. 1516 * <p> 1517 * If it is not necessary to get a new {@code Character} instance, it is 1518 * recommended to use this method instead of the constructor, since it 1519 * maintains a cache of instances which may result in better performance. 1520 * 1521 * @param c 1522 * the char value for which to get a {@code Character} instance. 1523 * @return the {@code Character} instance for {@code c}. 1524 * @since 1.5 1525 */ 1526 public static Character valueOf(char c) { 1527 return c < 128 ? SMALL_VALUES[c] : new Character(c); 1528 } 1529 1530 /** 1531 * A cache of instances used by {@link #valueOf(char)} and auto-boxing 1532 */ 1533 private static final Character[] SMALL_VALUES = new Character[128]; 1534 1535 static { 1536 for(int i = 0; i < 128; i++) { 1537 SMALL_VALUES[i] = new Character((char) i); 1538 } 1539 } 1540 /** 1541 * Indicates whether {@code codePoint} is a valid Unicode code point. 1542 * 1543 * @param codePoint 1544 * the code point to test. 1545 * @return {@code true} if {@code codePoint} is a valid Unicode code point; 1546 * {@code false} otherwise. 1547 * @since 1.5 1548 */ 1549 public static boolean isValidCodePoint(int codePoint) { 1550 return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1551 } 1552 1553 /** 1554 * Indicates whether {@code codePoint} is within the supplementary code 1555 * point range. 1556 * 1557 * @param codePoint 1558 * the code point to test. 1559 * @return {@code true} if {@code codePoint} is within the supplementary 1560 * code point range; {@code false} otherwise. 1561 * @since 1.5 1562 */ 1563 public static boolean isSupplementaryCodePoint(int codePoint) { 1564 return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1565 } 1566 1567 /** 1568 * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit 1569 * that is used for representing supplementary characters in UTF-16 1570 * encoding. 1571 * 1572 * @param ch 1573 * the character to test. 1574 * @return {@code true} if {@code ch} is a high-surrogate code unit; 1575 * {@code false} otherwise. 1576 * @see #isLowSurrogate(char) 1577 * @since 1.5 1578 */ 1579 public static boolean isHighSurrogate(char ch) { 1580 return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch); 1581 } 1582 1583 /** 1584 * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit 1585 * that is used for representing supplementary characters in UTF-16 1586 * encoding. 1587 * 1588 * @param ch 1589 * the character to test. 1590 * @return {@code true} if {@code ch} is a low-surrogate code unit; 1591 * {@code false} otherwise. 1592 * @see #isHighSurrogate(char) 1593 * @since 1.5 1594 */ 1595 public static boolean isLowSurrogate(char ch) { 1596 return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch); 1597 } 1598 1599 /** 1600 * Indicates whether the specified character pair is a valid surrogate pair. 1601 * 1602 * @param high 1603 * the high surrogate unit to test. 1604 * @param low 1605 * the low surrogate unit to test. 1606 * @return {@code true} if {@code high} is a high-surrogate code unit and 1607 * {@code low} is a low-surrogate code unit; {@code false} 1608 * otherwise. 1609 * @see #isHighSurrogate(char) 1610 * @see #isLowSurrogate(char) 1611 * @since 1.5 1612 */ 1613 public static boolean isSurrogatePair(char high, char low) { 1614 return (isHighSurrogate(high) && isLowSurrogate(low)); 1615 } 1616 1617 /** 1618 * Calculates the number of {@code char} values required to represent the 1619 * specified Unicode code point. This method checks if the {@code codePoint} 1620 * is greater than or equal to {@code 0x10000}, in which case {@code 2} is 1621 * returned, otherwise {@code 1}. To test if the code point is valid, use 1622 * the {@link #isValidCodePoint(int)} method. 1623 * 1624 * @param codePoint 1625 * the code point for which to calculate the number of required 1626 * chars. 1627 * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise. 1628 * @see #isValidCodePoint(int) 1629 * @see #isSupplementaryCodePoint(int) 1630 * @since 1.5 1631 */ 1632 public static int charCount(int codePoint) { 1633 return (codePoint >= 0x10000 ? 2 : 1); 1634 } 1635 1636 /** 1637 * Converts a surrogate pair into a Unicode code point. This method assumes 1638 * that the pair are valid surrogates. If the pair are <i>not</i> valid 1639 * surrogates, then the result is indeterminate. The 1640 * {@link #isSurrogatePair(char, char)} method should be used prior to this 1641 * method to validate the pair. 1642 * 1643 * @param high 1644 * the high surrogate unit. 1645 * @param low 1646 * the low surrogate unit. 1647 * @return the Unicode code point corresponding to the surrogate unit pair. 1648 * @see #isSurrogatePair(char, char) 1649 * @since 1.5 1650 */ 1651 public static int toCodePoint(char high, char low) { 1652 // See RFC 2781, Section 2.2 1653 // http://www.faqs.org/rfcs/rfc2781.html 1654 int h = (high & 0x3FF) << 10; 1655 int l = low & 0x3FF; 1656 return (h | l) + 0x10000; 1657 } 1658 1659 /** 1660 * Returns the code point at {@code index} in the specified sequence of 1661 * character units. If the unit at {@code index} is a high-surrogate unit, 1662 * {@code index + 1} is less than the length of the sequence and the unit at 1663 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1664 * point represented by the pair is returned; otherwise the {@code char} 1665 * value at {@code index} is returned. 1666 * 1667 * @param seq 1668 * the source sequence of {@code char} units. 1669 * @param index 1670 * the position in {@code seq} from which to retrieve the code 1671 * point. 1672 * @return the Unicode code point or {@code char} value at {@code index} in 1673 * {@code seq}. 1674 * @throws NullPointerException 1675 * if {@code seq} is {@code null}. 1676 * @throws IndexOutOfBoundsException 1677 * if the {@code index} is negative or greater than or equal to 1678 * the length of {@code seq}. 1679 * @since 1.5 1680 */ 1681 public static int codePointAt(CharSequence seq, int index) { 1682 if (seq == null) { 1683 throw new NullPointerException(); 1684 } 1685 int len = seq.length(); 1686 if (index < 0 || index >= len) { 1687 throw new IndexOutOfBoundsException(); 1688 } 1689 1690 char high = seq.charAt(index++); 1691 if (index >= len) { 1692 return high; 1693 } 1694 char low = seq.charAt(index); 1695 if (isSurrogatePair(high, low)) { 1696 return toCodePoint(high, low); 1697 } 1698 return high; 1699 } 1700 1701 /** 1702 * Returns the code point at {@code index} in the specified array of 1703 * character units. If the unit at {@code index} is a high-surrogate unit, 1704 * {@code index + 1} is less than the length of the array and the unit at 1705 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1706 * point represented by the pair is returned; otherwise the {@code char} 1707 * value at {@code index} is returned. 1708 * 1709 * @param seq 1710 * the source array of {@code char} units. 1711 * @param index 1712 * the position in {@code seq} from which to retrieve the code 1713 * point. 1714 * @return the Unicode code point or {@code char} value at {@code index} in 1715 * {@code seq}. 1716 * @throws NullPointerException 1717 * if {@code seq} is {@code null}. 1718 * @throws IndexOutOfBoundsException 1719 * if the {@code index} is negative or greater than or equal to 1720 * the length of {@code seq}. 1721 * @since 1.5 1722 */ 1723 public static int codePointAt(char[] seq, int index) { 1724 if (seq == null) { 1725 throw new NullPointerException(); 1726 } 1727 int len = seq.length; 1728 if (index < 0 || index >= len) { 1729 throw new IndexOutOfBoundsException(); 1730 } 1731 1732 char high = seq[index++]; 1733 if (index >= len) { 1734 return high; 1735 } 1736 char low = seq[index]; 1737 if (isSurrogatePair(high, low)) { 1738 return toCodePoint(high, low); 1739 } 1740 return high; 1741 } 1742 1743 /** 1744 * Returns the code point at {@code index} in the specified array of 1745 * character units, where {@code index} has to be less than {@code limit}. 1746 * If the unit at {@code index} is a high-surrogate unit, {@code index + 1} 1747 * is less than {@code limit} and the unit at {@code index + 1} is a 1748 * low-surrogate unit, then the supplementary code point represented by the 1749 * pair is returned; otherwise the {@code char} value at {@code index} is 1750 * returned. 1751 * 1752 * @param seq 1753 * the source array of {@code char} units. 1754 * @param index 1755 * the position in {@code seq} from which to get the code point. 1756 * @param limit 1757 * the index after the last unit in {@code seq} that can be used. 1758 * @return the Unicode code point or {@code char} value at {@code index} in 1759 * {@code seq}. 1760 * @throws NullPointerException 1761 * if {@code seq} is {@code null}. 1762 * @throws IndexOutOfBoundsException 1763 * if {@code index < 0}, {@code index >= limit}, 1764 * {@code limit < 0} or if {@code limit} is greater than the 1765 * length of {@code seq}. 1766 * @since 1.5 1767 */ 1768 public static int codePointAt(char[] seq, int index, int limit) { 1769 if (index < 0 || index >= limit || limit < 0 || limit > seq.length) { 1770 throw new IndexOutOfBoundsException(); 1771 } 1772 1773 char high = seq[index++]; 1774 if (index >= limit) { 1775 return high; 1776 } 1777 char low = seq[index]; 1778 if (isSurrogatePair(high, low)) { 1779 return toCodePoint(high, low); 1780 } 1781 return high; 1782 } 1783 1784 /** 1785 * Returns the code point that precedes {@code index} in the specified 1786 * sequence of character units. If the unit at {@code index - 1} is a 1787 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1788 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1789 * point represented by the pair is returned; otherwise the {@code char} 1790 * value at {@code index - 1} is returned. 1791 * 1792 * @param seq 1793 * the source sequence of {@code char} units. 1794 * @param index 1795 * the position in {@code seq} following the code 1796 * point that should be returned. 1797 * @return the Unicode code point or {@code char} value before {@code index} 1798 * in {@code seq}. 1799 * @throws NullPointerException 1800 * if {@code seq} is {@code null}. 1801 * @throws IndexOutOfBoundsException 1802 * if the {@code index} is less than 1 or greater than the 1803 * length of {@code seq}. 1804 * @since 1.5 1805 */ 1806 public static int codePointBefore(CharSequence seq, int index) { 1807 if (seq == null) { 1808 throw new NullPointerException(); 1809 } 1810 int len = seq.length(); 1811 if (index < 1 || index > len) { 1812 throw new IndexOutOfBoundsException(); 1813 } 1814 1815 char low = seq.charAt(--index); 1816 if (--index < 0) { 1817 return low; 1818 } 1819 char high = seq.charAt(index); 1820 if (isSurrogatePair(high, low)) { 1821 return toCodePoint(high, low); 1822 } 1823 return low; 1824 } 1825 1826 /** 1827 * Returns the code point that precedes {@code index} in the specified 1828 * array of character units. If the unit at {@code index - 1} is a 1829 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1830 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1831 * point represented by the pair is returned; otherwise the {@code char} 1832 * value at {@code index - 1} is returned. 1833 * 1834 * @param seq 1835 * the source array of {@code char} units. 1836 * @param index 1837 * the position in {@code seq} following the code 1838 * point that should be returned. 1839 * @return the Unicode code point or {@code char} value before {@code index} 1840 * in {@code seq}. 1841 * @throws NullPointerException 1842 * if {@code seq} is {@code null}. 1843 * @throws IndexOutOfBoundsException 1844 * if the {@code index} is less than 1 or greater than the 1845 * length of {@code seq}. 1846 * @since 1.5 1847 */ 1848 public static int codePointBefore(char[] seq, int index) { 1849 if (seq == null) { 1850 throw new NullPointerException(); 1851 } 1852 int len = seq.length; 1853 if (index < 1 || index > len) { 1854 throw new IndexOutOfBoundsException(); 1855 } 1856 1857 char low = seq[--index]; 1858 if (--index < 0) { 1859 return low; 1860 } 1861 char high = seq[index]; 1862 if (isSurrogatePair(high, low)) { 1863 return toCodePoint(high, low); 1864 } 1865 return low; 1866 } 1867 1868 /** 1869 * Returns the code point that precedes the {@code index} in the specified 1870 * array of character units and is not less than {@code start}. If the unit 1871 * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not 1872 * less than {@code start} and the unit at {@code index - 2} is a 1873 * high-surrogate unit, then the supplementary code point represented by the 1874 * pair is returned; otherwise the {@code char} value at {@code index - 1} 1875 * is returned. 1876 * 1877 * @param seq 1878 * the source array of {@code char} units. 1879 * @param index 1880 * the position in {@code seq} following the code point that 1881 * should be returned. 1882 * @param start 1883 * the index of the first element in {@code seq}. 1884 * @return the Unicode code point or {@code char} value before {@code index} 1885 * in {@code seq}. 1886 * @throws NullPointerException 1887 * if {@code seq} is {@code null}. 1888 * @throws IndexOutOfBoundsException 1889 * if the {@code index <= start}, {@code start < 0}, 1890 * {@code index} is greater than the length of {@code seq}, or 1891 * if {@code start} is equal or greater than the length of 1892 * {@code seq}. 1893 * @since 1.5 1894 */ 1895 public static int codePointBefore(char[] seq, int index, int start) { 1896 if (seq == null) { 1897 throw new NullPointerException(); 1898 } 1899 int len = seq.length; 1900 if (index <= start || index > len || start < 0 || start >= len) { 1901 throw new IndexOutOfBoundsException(); 1902 } 1903 1904 char low = seq[--index]; 1905 if (--index < start) { 1906 return low; 1907 } 1908 char high = seq[index]; 1909 if (isSurrogatePair(high, low)) { 1910 return toCodePoint(high, low); 1911 } 1912 return low; 1913 } 1914 1915 /** 1916 * Converts the specified Unicode code point into a UTF-16 encoded sequence 1917 * and copies the value(s) into the char array {@code dst}, starting at 1918 * index {@code dstIndex}. 1919 * 1920 * @param codePoint 1921 * the Unicode code point to encode. 1922 * @param dst 1923 * the destination array to copy the encoded value into. 1924 * @param dstIndex 1925 * the index in {@code dst} from where to start copying. 1926 * @return the number of {@code char} value units copied into {@code dst}. 1927 * @throws IllegalArgumentException 1928 * if {@code codePoint} is not a valid Unicode code point. 1929 * @throws NullPointerException 1930 * if {@code dst} is {@code null}. 1931 * @throws IndexOutOfBoundsException 1932 * if {@code dstIndex} is negative, greater than or equal to 1933 * {@code dst.length} or equals {@code dst.length - 1} when 1934 * {@code codePoint} is a 1935 * {@link #isSupplementaryCodePoint(int) supplementary code point}. 1936 * @since 1.5 1937 */ 1938 public static int toChars(int codePoint, char[] dst, int dstIndex) { 1939 if (!isValidCodePoint(codePoint)) { 1940 throw new IllegalArgumentException(); 1941 } 1942 if (dst == null) { 1943 throw new NullPointerException(); 1944 } 1945 if (dstIndex < 0 || dstIndex >= dst.length) { 1946 throw new IndexOutOfBoundsException(); 1947 } 1948 1949 if (isSupplementaryCodePoint(codePoint)) { 1950 if (dstIndex == dst.length - 1) { 1951 throw new IndexOutOfBoundsException(); 1952 } 1953 // See RFC 2781, Section 2.1 1954 // http://www.faqs.org/rfcs/rfc2781.html 1955 int cpPrime = codePoint - 0x10000; 1956 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 1957 int low = 0xDC00 | (cpPrime & 0x3FF); 1958 dst[dstIndex] = (char) high; 1959 dst[dstIndex + 1] = (char) low; 1960 return 2; 1961 } 1962 1963 dst[dstIndex] = (char) codePoint; 1964 return 1; 1965 } 1966 1967 /** 1968 * Converts the specified Unicode code point into a UTF-16 encoded sequence 1969 * and returns it as a char array. 1970 * 1971 * @param codePoint 1972 * the Unicode code point to encode. 1973 * @return the UTF-16 encoded char sequence. If {@code codePoint} is a 1974 * {@link #isSupplementaryCodePoint(int) supplementary code point}, 1975 * then the returned array contains two characters, otherwise it 1976 * contains just one character. 1977 * @throws IllegalArgumentException 1978 * if {@code codePoint} is not a valid Unicode code point. 1979 * @since 1.5 1980 */ 1981 public static char[] toChars(int codePoint) { 1982 if (!isValidCodePoint(codePoint)) { 1983 throw new IllegalArgumentException(); 1984 } 1985 1986 if (isSupplementaryCodePoint(codePoint)) { 1987 int cpPrime = codePoint - 0x10000; 1988 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 1989 int low = 0xDC00 | (cpPrime & 0x3FF); 1990 return new char[] { (char) high, (char) low }; 1991 } 1992 return new char[] { (char) codePoint }; 1993 } 1994 1995 /** 1996 * Counts the number of Unicode code points in the subsequence of the 1997 * specified character sequence, as delineated by {@code beginIndex} and 1998 * {@code endIndex}. Any surrogate values with missing pair values will be 1999 * counted as one code point. 2000 * 2001 * @param seq 2002 * the {@code CharSequence} to look through. 2003 * @param beginIndex 2004 * the inclusive index to begin counting at. 2005 * @param endIndex 2006 * the exclusive index to stop counting at. 2007 * @return the number of Unicode code points. 2008 * @throws NullPointerException 2009 * if {@code seq} is {@code null}. 2010 * @throws IndexOutOfBoundsException 2011 * if {@code beginIndex < 0}, {@code beginIndex > endIndex} or 2012 * if {@code endIndex} is greater than the length of {@code seq}. 2013 * @since 1.5 2014 */ 2015 public static int codePointCount(CharSequence seq, int beginIndex, 2016 int endIndex) { 2017 if (seq == null) { 2018 throw new NullPointerException(); 2019 } 2020 int len = seq.length(); 2021 if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) { 2022 throw new IndexOutOfBoundsException(); 2023 } 2024 2025 int result = 0; 2026 for (int i = beginIndex; i < endIndex; i++) { 2027 char c = seq.charAt(i); 2028 if (isHighSurrogate(c)) { 2029 if (++i < endIndex) { 2030 c = seq.charAt(i); 2031 if (!isLowSurrogate(c)) { 2032 result++; 2033 } 2034 } 2035 } 2036 result++; 2037 } 2038 return result; 2039 } 2040 2041 /** 2042 * Counts the number of Unicode code points in the subsequence of the 2043 * specified char array, as delineated by {@code offset} and {@code count}. 2044 * Any surrogate values with missing pair values will be counted as one code 2045 * point. 2046 * 2047 * @param seq 2048 * the char array to look through 2049 * @param offset 2050 * the inclusive index to begin counting at. 2051 * @param count 2052 * the number of {@code char} values to look through in 2053 * {@code seq}. 2054 * @return the number of Unicode code points. 2055 * @throws NullPointerException 2056 * if {@code seq} is {@code null}. 2057 * @throws IndexOutOfBoundsException 2058 * if {@code offset < 0}, {@code count < 0} or if 2059 * {@code offset + count} is greater than the length of 2060 * {@code seq}. 2061 * @since 1.5 2062 */ 2063 public static int codePointCount(char[] seq, int offset, int count) { 2064 if (seq == null) { 2065 throw new NullPointerException(); 2066 } 2067 int len = seq.length; 2068 int endIndex = offset + count; 2069 if (offset < 0 || count < 0 || endIndex > len) { 2070 throw new IndexOutOfBoundsException(); 2071 } 2072 2073 int result = 0; 2074 for (int i = offset; i < endIndex; i++) { 2075 char c = seq[i]; 2076 if (isHighSurrogate(c)) { 2077 if (++i < endIndex) { 2078 c = seq[i]; 2079 if (!isLowSurrogate(c)) { 2080 result++; 2081 } 2082 } 2083 } 2084 result++; 2085 } 2086 return result; 2087 } 2088 2089 /** 2090 * Determines the index in the specified character sequence that is offset 2091 * {@code codePointOffset} code points from {@code index}. 2092 * 2093 * @param seq 2094 * the character sequence to find the index in. 2095 * @param index 2096 * the start index in {@code seq}. 2097 * @param codePointOffset 2098 * the number of code points to look backwards or forwards; may 2099 * be a negative or positive value. 2100 * @return the index in {@code seq} that is {@code codePointOffset} code 2101 * points away from {@code index}. 2102 * @throws NullPointerException 2103 * if {@code seq} is {@code null}. 2104 * @throws IndexOutOfBoundsException 2105 * if {@code index < 0}, {@code index} is greater than the 2106 * length of {@code seq}, or if there are not enough values in 2107 * {@code seq} to skip {@code codePointOffset} code points 2108 * forwards or backwards (if {@code codePointOffset} is 2109 * negative) from {@code index}. 2110 * @since 1.5 2111 */ 2112 public static int offsetByCodePoints(CharSequence seq, int index, 2113 int codePointOffset) { 2114 if (seq == null) { 2115 throw new NullPointerException(); 2116 } 2117 int len = seq.length(); 2118 if (index < 0 || index > len) { 2119 throw new IndexOutOfBoundsException(); 2120 } 2121 2122 if (codePointOffset == 0) { 2123 return index; 2124 } 2125 2126 if (codePointOffset > 0) { 2127 int codePoints = codePointOffset; 2128 int i = index; 2129 while (codePoints > 0) { 2130 codePoints--; 2131 if (i >= len) { 2132 throw new IndexOutOfBoundsException(); 2133 } 2134 if (isHighSurrogate(seq.charAt(i))) { 2135 int next = i + 1; 2136 if (next < len && isLowSurrogate(seq.charAt(next))) { 2137 i++; 2138 } 2139 } 2140 i++; 2141 } 2142 return i; 2143 } 2144 2145 assert codePointOffset < 0; 2146 int codePoints = -codePointOffset; 2147 int i = index; 2148 while (codePoints > 0) { 2149 codePoints--; 2150 i--; 2151 if (i < 0) { 2152 throw new IndexOutOfBoundsException(); 2153 } 2154 if (isLowSurrogate(seq.charAt(i))) { 2155 int prev = i - 1; 2156 if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) { 2157 i--; 2158 } 2159 } 2160 } 2161 return i; 2162 } 2163 2164 /** 2165 * Determines the index in a subsequence of the specified character array 2166 * that is offset {@code codePointOffset} code points from {@code index}. 2167 * The subsequence is delineated by {@code start} and {@code count}. 2168 * 2169 * @param seq 2170 * the character array to find the index in. 2171 * @param start 2172 * the inclusive index that marks the beginning of the 2173 * subsequence. 2174 * @param count 2175 * the number of {@code char} values to include within the 2176 * subsequence. 2177 * @param index 2178 * the start index in the subsequence of the char array. 2179 * @param codePointOffset 2180 * the number of code points to look backwards or forwards; may 2181 * be a negative or positive value. 2182 * @return the index in {@code seq} that is {@code codePointOffset} code 2183 * points away from {@code index}. 2184 * @throws NullPointerException 2185 * if {@code seq} is {@code null}. 2186 * @throws IndexOutOfBoundsException 2187 * if {@code start < 0}, {@code count < 0}, 2188 * {@code index < start}, {@code index > start + count}, 2189 * {@code start + count} is greater than the length of 2190 * {@code seq}, or if there are not enough values in 2191 * {@code seq} to skip {@code codePointOffset} code points 2192 * forward or backward (if {@code codePointOffset} is 2193 * negative) from {@code index}. 2194 * @since 1.5 2195 */ 2196 public static int offsetByCodePoints(char[] seq, int start, int count, 2197 int index, int codePointOffset) { 2198 if (seq == null) { 2199 throw new NullPointerException(); 2200 } 2201 int end = start + count; 2202 if (start < 0 || count < 0 || end > seq.length || index < start 2203 || index > end) { 2204 throw new IndexOutOfBoundsException(); 2205 } 2206 2207 if (codePointOffset == 0) { 2208 return index; 2209 } 2210 2211 if (codePointOffset > 0) { 2212 int codePoints = codePointOffset; 2213 int i = index; 2214 while (codePoints > 0) { 2215 codePoints--; 2216 if (i >= end) { 2217 throw new IndexOutOfBoundsException(); 2218 } 2219 if (isHighSurrogate(seq[i])) { 2220 int next = i + 1; 2221 if (next < end && isLowSurrogate(seq[next])) { 2222 i++; 2223 } 2224 } 2225 i++; 2226 } 2227 return i; 2228 } 2229 2230 assert codePointOffset < 0; 2231 int codePoints = -codePointOffset; 2232 int i = index; 2233 while (codePoints > 0) { 2234 codePoints--; 2235 i--; 2236 if (i < start) { 2237 throw new IndexOutOfBoundsException(); 2238 } 2239 if (isLowSurrogate(seq[i])) { 2240 int prev = i - 1; 2241 if (prev >= start && isHighSurrogate(seq[prev])) { 2242 i--; 2243 } 2244 } 2245 } 2246 return i; 2247 } 2248 2249 /** 2250 * Convenience method to determine the value of the specified character 2251 * {@code c} in the supplied radix. The value of {@code radix} must be 2252 * between MIN_RADIX and MAX_RADIX. 2253 * 2254 * @param c 2255 * the character to determine the value of. 2256 * @param radix 2257 * the radix. 2258 * @return the value of {@code c} in {@code radix} if {@code radix} lies 2259 * between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise. 2260 */ 2261 public static int digit(char c, int radix) { 2262 // BEGIN android-changed 2263 return digit((int) c, radix); 2264 // END android-changed 2265 } 2266 2267 /** 2268 * Convenience method to determine the value of the character 2269 * {@code codePoint} in the supplied radix. The value of {@code radix} must 2270 * be between MIN_RADIX and MAX_RADIX. 2271 * 2272 * @param codePoint 2273 * the character, including supplementary characters. 2274 * @param radix 2275 * the radix. 2276 * @return if {@code radix} lies between {@link #MIN_RADIX} and 2277 * {@link #MAX_RADIX} then the value of the character in the radix; 2278 * -1 otherwise. 2279 */ 2280 public static int digit(int codePoint, int radix) { 2281 // BEGIN android-changed 2282 if (radix < MIN_RADIX || radix > MAX_RADIX) { 2283 return -1; 2284 } 2285 if (codePoint < 128) { 2286 // Optimized for ASCII 2287 int result = -1; 2288 if ('0' <= codePoint && codePoint <= '9') { 2289 result = codePoint - '0'; 2290 } else if ('a' <= codePoint && codePoint <= 'z') { 2291 result = 10 + (codePoint - 'a'); 2292 } else if ('A' <= codePoint && codePoint <= 'Z') { 2293 result = 10 + (codePoint - 'A'); 2294 } 2295 return result < radix ? result : -1; 2296 } 2297 return UCharacter.digit(codePoint, radix); 2298 // END android-changed 2299 } 2300 2301 /** 2302 * Compares this object with the specified object and indicates if they are 2303 * equal. In order to be equal, {@code object} must be an instance of 2304 * {@code Character} and have the same char value as this object. 2305 * 2306 * @param object 2307 * the object to compare this double with. 2308 * @return {@code true} if the specified object is equal to this 2309 * {@code Character}; {@code false} otherwise. 2310 */ 2311 @Override 2312 public boolean equals(Object object) { 2313 return (object instanceof Character) 2314 && (value == ((Character) object).value); 2315 } 2316 2317 /** 2318 * Returns the character which represents the specified digit in the 2319 * specified radix. The {@code radix} must be between {@code MIN_RADIX} and 2320 * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and 2321 * smaller than {@code radix}. If any of these conditions does not hold, 0 2322 * is returned. 2323 * 2324 * @param digit 2325 * the integer value. 2326 * @param radix 2327 * the radix. 2328 * @return the character which represents the {@code digit} in the 2329 * {@code radix}. 2330 */ 2331 public static char forDigit(int digit, int radix) { 2332 if (MIN_RADIX <= radix && radix <= MAX_RADIX) { 2333 if (0 <= digit && digit < radix) { 2334 return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10); 2335 } 2336 } 2337 return 0; 2338 } 2339 2340 /** 2341 * Gets the numeric value of the specified Unicode character. 2342 * 2343 * @param c 2344 * the Unicode character to get the numeric value of. 2345 * @return a non-negative numeric integer value if a numeric value for 2346 * {@code c} exists, -1 if there is no numeric value for {@code c}, 2347 * -2 if the numeric value can not be represented with an integer. 2348 */ 2349 public static int getNumericValue(char c) { 2350 // BEGIN android-changed 2351 return getNumericValue((int) c); 2352 // END android-changed 2353 } 2354 2355 /** 2356 * Gets the numeric value of the specified Unicode code point. For example, 2357 * the code point '\u216B' stands for the Roman number XII, which has the 2358 * numeric value 12. 2359 * 2360 * @param codePoint 2361 * the Unicode code point to get the numeric value of. 2362 * @return a non-negative numeric integer value if a numeric value for 2363 * {@code codePoint} exists, -1 if there is no numeric value for 2364 * {@code codePoint}, -2 if the numeric value can not be 2365 * represented with an integer. 2366 */ 2367 public static int getNumericValue(int codePoint) { 2368 // BEGIN android-changed 2369 if (codePoint < 128) { 2370 // Optimized for ASCII 2371 if (codePoint >= '0' && codePoint <= '9') { 2372 return codePoint - '0'; 2373 } 2374 if (codePoint >= 'a' && codePoint <= 'z') { 2375 return codePoint - ('a' - 10); 2376 } 2377 if (codePoint >= 'A' && codePoint <= 'Z') { 2378 return codePoint - ('A' - 10); 2379 } 2380 return -1; 2381 } 2382 return UCharacter.getNumericValue(codePoint); 2383 // END android-changed 2384 } 2385 2386 /** 2387 * Gets the general Unicode category of the specified character. 2388 * 2389 * @param c 2390 * the character to get the category of. 2391 * @return the Unicode category of {@code c}. 2392 */ 2393 public static int getType(char c) { 2394 // BEGIN android-changed 2395 return getType((int) c); 2396 // END android-changed 2397 } 2398 2399 /** 2400 * Gets the general Unicode category of the specified code point. 2401 * 2402 * @param codePoint 2403 * the Unicode code point to get the category of. 2404 * @return the Unicode category of {@code codePoint}. 2405 */ 2406 public static int getType(int codePoint) { 2407 // BEGIN android-changed 2408 // if (codePoint < 1000 && codePoint > 0) { 2409 // return typeValuesCache[codePoint]; 2410 // } 2411 // END android-changed 2412 int type = UCharacter.getType(codePoint); 2413 2414 // the type values returned by UCharacter are not compatible with what 2415 // the spec says.RI's Character type values skip the value 17. 2416 if (type <= Character.FORMAT) { 2417 return type; 2418 } 2419 return (type + 1); 2420 } 2421 2422 /** 2423 * Gets the Unicode directionality of the specified character. 2424 * 2425 * @param c 2426 * the character to get the directionality of. 2427 * @return the Unicode directionality of {@code c}. 2428 */ 2429 public static byte getDirectionality(char c) { 2430 // BEGIN android-changed 2431 // int result = BinarySearch.binarySearchRange(bidiKeys, c); 2432 // int high = bidiValues[result * 2]; 2433 // if (c <= high) { 2434 // int code = bidiValues[result * 2 + 1]; 2435 // if (code < 0x100) { 2436 // return (byte) (code - 1); 2437 // } 2438 // return (byte) (((c & 1) == 1 ? code >> 8 : code & 0xff) - 1); 2439 // } 2440 // return DIRECTIONALITY_UNDEFINED; 2441 return getDirectionality((int)c); 2442 // END android-changed 2443 } 2444 2445 /** 2446 * Gets the Unicode directionality of the specified character. 2447 * 2448 * @param codePoint 2449 * the Unicode code point to get the directionality of. 2450 * @return the Unicode directionality of {@code codePoint}. 2451 */ 2452 public static byte getDirectionality(int codePoint) { 2453 if (getType(codePoint) == Character.UNASSIGNED) { 2454 return Character.DIRECTIONALITY_UNDEFINED; 2455 } 2456 2457 byte UCDirectionality = UCharacter.getDirectionality(codePoint); 2458 if (UCDirectionality == -1) { 2459 return -1; 2460 } 2461 return DIRECTIONALITY[UCDirectionality]; 2462 } 2463 2464 /** 2465 * Indicates whether the specified character is mirrored. 2466 * 2467 * @param c 2468 * the character to check. 2469 * @return {@code true} if {@code c} is mirrored; {@code false} 2470 * otherwise. 2471 */ 2472 public static boolean isMirrored(char c) { 2473 // BEGIN android-changed 2474 // int value = c / 16; 2475 // if (value >= mirrored.length) { 2476 // return false; 2477 // } 2478 // int bit = 1 << (c % 16); 2479 // return (mirrored[value] & bit) != 0; 2480 return isMirrored((int)c); 2481 // ENd android-changed 2482 } 2483 2484 /** 2485 * Indicates whether the specified code point is mirrored. 2486 * 2487 * @param codePoint 2488 * the code point to check. 2489 * @return {@code true} if {@code codePoint} is mirrored, {@code false} 2490 * otherwise. 2491 */ 2492 public static boolean isMirrored(int codePoint) { 2493 return UCharacter.isMirrored(codePoint); 2494 } 2495 2496 @Override 2497 public int hashCode() { 2498 return value; 2499 } 2500 2501 /** 2502 * Indicates whether the specified character is defined in the Unicode 2503 * specification. 2504 * 2505 * @param c 2506 * the character to check. 2507 * @return {@code true} if the general Unicode category of the character is 2508 * not {@code UNASSIGNED}; {@code false} otherwise. 2509 */ 2510 public static boolean isDefined(char c) { 2511 // BEGIN android-changed 2512 // return getType(c) != UNASSIGNED; 2513 return UCharacter.isDefined(c); 2514 // END android-changed 2515 } 2516 2517 /** 2518 * Indicates whether the specified code point is defined in the Unicode 2519 * specification. 2520 * 2521 * @param codePoint 2522 * the code point to check. 2523 * @return {@code true} if the general Unicode category of the code point is 2524 * not {@code UNASSIGNED}; {@code false} otherwise. 2525 */ 2526 public static boolean isDefined(int codePoint) { 2527 return UCharacter.isDefined(codePoint); 2528 } 2529 2530 /** 2531 * Indicates whether the specified character is a digit. 2532 * 2533 * @param c 2534 * the character to check. 2535 * @return {@code true} if {@code c} is a digit; {@code false} 2536 * otherwise. 2537 */ 2538 public static boolean isDigit(char c) { 2539 // BEGIN android-changed 2540 return isDigit((int) c); 2541 // END android-changed 2542 } 2543 2544 /** 2545 * Indicates whether the specified code point is a digit. 2546 * 2547 * @param codePoint 2548 * the code point to check. 2549 * @return {@code true} if {@code codePoint} is a digit; {@code false} 2550 * otherwise. 2551 */ 2552 public static boolean isDigit(int codePoint) { 2553 // BEGIN android-changed 2554 // Optimized case for ASCII 2555 if ('0' <= codePoint && codePoint <= '9') { 2556 return true; 2557 } 2558 if (codePoint < 1632) { 2559 return false; 2560 } 2561 return UCharacter.isDigit(codePoint); 2562 // END android-changed 2563 } 2564 2565 /** 2566 * Indicates whether the specified character is ignorable in a Java or 2567 * Unicode identifier. 2568 * 2569 * @param c 2570 * the character to check. 2571 * @return {@code true} if {@code c} is ignorable; {@code false} otherwise. 2572 */ 2573 public static boolean isIdentifierIgnorable(char c) { 2574 // BEGIN android-changed 2575 return isIdentifierIgnorable((int) c); 2576 // END android-changed 2577 } 2578 2579 /** 2580 * Indicates whether the specified code point is ignorable in a Java or 2581 * Unicode identifier. 2582 * 2583 * @param codePoint 2584 * the code point to check. 2585 * @return {@code true} if {@code codePoint} is ignorable; {@code false} 2586 * otherwise. 2587 */ 2588 public static boolean isIdentifierIgnorable(int codePoint) { 2589 // BEGIN android-changed 2590 if (codePoint < 0x600) { 2591 return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) || 2592 (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad); 2593 } 2594 return UCharacter.isIdentifierIgnorable(codePoint); 2595 // END android-changed 2596 } 2597 2598 /** 2599 * Indicates whether the specified character is an ISO control character. 2600 * 2601 * @param c 2602 * the character to check. 2603 * @return {@code true} if {@code c} is an ISO control character; 2604 * {@code false} otherwise. 2605 */ 2606 public static boolean isISOControl(char c) { 2607 return isISOControl((int)c); 2608 } 2609 2610 /** 2611 * Indicates whether the specified code point is an ISO control character. 2612 * 2613 * @param c 2614 * the code point to check. 2615 * @return {@code true} if {@code c} is an ISO control character; 2616 * {@code false} otherwise. 2617 */ 2618 public static boolean isISOControl(int c) { 2619 return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f); 2620 } 2621 2622 /** 2623 * Indicates whether the specified character is a valid part of a Java 2624 * identifier other than the first character. 2625 * 2626 * @param c 2627 * the character to check. 2628 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2629 * {@code false} otherwise. 2630 */ 2631 public static boolean isJavaIdentifierPart(char c) { 2632 // BEGIN android-changed 2633 return isJavaIdentifierPart((int) c); 2634 // END android-changed 2635 } 2636 2637 /** 2638 * Indicates whether the specified code point is a valid part of a Java 2639 * identifier other than the first character. 2640 * 2641 * @param codePoint 2642 * the code point to check. 2643 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2644 * {@code false} otherwise. 2645 */ 2646 public static boolean isJavaIdentifierPart(int codePoint) { 2647 // BEGIN android-changed: use precomputed bitmasks for the ASCII range. 2648 // Optimized case for ASCII 2649 if (codePoint < 64) { 2650 return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0; 2651 } else if (codePoint < 128) { 2652 return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 2653 } 2654 int type = getType(codePoint); 2655 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2656 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 2657 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2658 || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK 2659 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) 2660 || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT; 2661 // END android-changed 2662 } 2663 2664 /** 2665 * Indicates whether the specified character is a valid first character for 2666 * a Java identifier. 2667 * 2668 * @param c 2669 * the character to check. 2670 * @return {@code true} if {@code c} is a valid first character of a Java 2671 * identifier; {@code false} otherwise. 2672 */ 2673 public static boolean isJavaIdentifierStart(char c) { 2674 // BEGIN android-changed 2675 return isJavaIdentifierStart((int) c); 2676 // END android-changed 2677 } 2678 2679 /** 2680 * Indicates whether the specified code point is a valid first character for 2681 * a Java identifier. 2682 * 2683 * @param codePoint 2684 * the code point to check. 2685 * @return {@code true} if {@code codePoint} is a valid start of a Java 2686 * identifier; {@code false} otherwise. 2687 */ 2688 public static boolean isJavaIdentifierStart(int codePoint) { 2689 // BEGIN android-changed: use precomputed bitmasks for the ASCII range. 2690 // Optimized case for ASCII 2691 if (codePoint < 64) { 2692 return (codePoint == '$'); // There's only one character in this range. 2693 } else if (codePoint < 128) { 2694 return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 2695 } 2696 int type = getType(codePoint); 2697 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL 2698 || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER; 2699 // END android-changed 2700 } 2701 2702 /** 2703 * Indicates whether the specified character is a Java letter. 2704 * 2705 * @param c 2706 * the character to check. 2707 * @return {@code true} if {@code c} is a Java letter; {@code false} 2708 * otherwise. 2709 * @deprecated Use {@link #isJavaIdentifierStart(char)} 2710 */ 2711 @Deprecated 2712 public static boolean isJavaLetter(char c) { 2713 return isJavaIdentifierStart(c); 2714 } 2715 2716 /** 2717 * Indicates whether the specified character is a Java letter or digit 2718 * character. 2719 * 2720 * @param c 2721 * the character to check. 2722 * @return {@code true} if {@code c} is a Java letter or digit; 2723 * {@code false} otherwise. 2724 * @deprecated Use {@link #isJavaIdentifierPart(char)} 2725 */ 2726 @Deprecated 2727 public static boolean isJavaLetterOrDigit(char c) { 2728 return isJavaIdentifierPart(c); 2729 } 2730 2731 /** 2732 * Indicates whether the specified character is a letter. 2733 * 2734 * @param c 2735 * the character to check. 2736 * @return {@code true} if {@code c} is a letter; {@code false} otherwise. 2737 */ 2738 public static boolean isLetter(char c) { 2739 // BEGIN android-changed 2740 return isLetter((int) c); 2741 // END android-changed 2742 } 2743 2744 /** 2745 * Indicates whether the specified code point is a letter. 2746 * 2747 * @param codePoint 2748 * the code point to check. 2749 * @return {@code true} if {@code codePoint} is a letter; {@code false} 2750 * otherwise. 2751 */ 2752 public static boolean isLetter(int codePoint) { 2753 // BEGIN android-changed 2754 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 2755 return true; 2756 } 2757 if (codePoint < 128) { 2758 return false; 2759 } 2760 return UCharacter.isLetter(codePoint); 2761 // END android-changed 2762 } 2763 2764 /** 2765 * Indicates whether the specified character is a letter or a digit. 2766 * 2767 * @param c 2768 * the character to check. 2769 * @return {@code true} if {@code c} is a letter or a digit; {@code false} 2770 * otherwise. 2771 */ 2772 public static boolean isLetterOrDigit(char c) { 2773 // BEGIN android-changed 2774 return isLetterOrDigit((int) c); 2775 // END android-changed 2776 } 2777 2778 /** 2779 * Indicates whether the specified code point is a letter or a digit. 2780 * 2781 * @param codePoint 2782 * the code point to check. 2783 * @return {@code true} if {@code codePoint} is a letter or a digit; 2784 * {@code false} otherwise. 2785 */ 2786 public static boolean isLetterOrDigit(int codePoint) { 2787 // BEGIN android-changed 2788 // Optimized case for ASCII 2789 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 2790 return true; 2791 } 2792 if ('0' <= codePoint && codePoint <= '9') { 2793 return true; 2794 } 2795 if (codePoint < 128) { 2796 return false; 2797 } 2798 return UCharacter.isLetterOrDigit(codePoint); 2799 // END android-changed 2800 } 2801 2802 /** 2803 * Indicates whether the specified character is a lower case letter. 2804 * 2805 * @param c 2806 * the character to check. 2807 * @return {@code true} if {@code c} is a lower case letter; {@code false} 2808 * otherwise. 2809 */ 2810 public static boolean isLowerCase(char c) { 2811 // BEGIN android-changed 2812 return isLowerCase((int) c); 2813 // END android-changed 2814 } 2815 2816 /** 2817 * Indicates whether the specified code point is a lower case letter. 2818 * 2819 * @param codePoint 2820 * the code point to check. 2821 * @return {@code true} if {@code codePoint} is a lower case letter; 2822 * {@code false} otherwise. 2823 */ 2824 public static boolean isLowerCase(int codePoint) { 2825 // BEGIN android-changed 2826 // Optimized case for ASCII 2827 if ('a' <= codePoint && codePoint <= 'z') { 2828 return true; 2829 } 2830 if (codePoint < 128) { 2831 return false; 2832 } 2833 // END android-changed 2834 return UCharacter.isLowerCase(codePoint); 2835 } 2836 2837 /** 2838 * Indicates whether the specified character is a Java space. 2839 * 2840 * @param c 2841 * the character to check. 2842 * @return {@code true} if {@code c} is a Java space; {@code false} 2843 * otherwise. 2844 * @deprecated Use {@link #isWhitespace(char)} 2845 */ 2846 @Deprecated 2847 public static boolean isSpace(char c) { 2848 return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' '; 2849 } 2850 2851 /** 2852 * Indicates whether the specified character is a Unicode space character. 2853 * That is, if it is a member of one of the Unicode categories Space 2854 * Separator, Line Separator, or Paragraph Separator. 2855 * 2856 * @param c 2857 * the character to check. 2858 * @return {@code true} if {@code c} is a Unicode space character, 2859 * {@code false} otherwise. 2860 */ 2861 public static boolean isSpaceChar(char c) { 2862 // BEGIN android-changed 2863 return isSpaceChar((int) c); 2864 // END android-changed 2865 } 2866 2867 /** 2868 * Indicates whether the specified code point is a Unicode space character. 2869 * That is, if it is a member of one of the Unicode categories Space 2870 * Separator, Line Separator, or Paragraph Separator. 2871 * 2872 * @param codePoint 2873 * the code point to check. 2874 * @return {@code true} if {@code codePoint} is a Unicode space character, 2875 * {@code false} otherwise. 2876 */ 2877 public static boolean isSpaceChar(int codePoint) { 2878 // BEGIN android-changed 2879 if (codePoint == 0x20 || codePoint == 0xa0 || codePoint == 0x1680) { 2880 return true; 2881 } 2882 if (codePoint < 0x2000) { 2883 return false; 2884 } 2885 if (codePoint <= 0xffff) { 2886 return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 || 2887 codePoint == 0x202f || codePoint == 0x3000; 2888 } 2889 return UCharacter.isSpaceChar(codePoint); 2890 // END android-changed 2891 } 2892 2893 /** 2894 * Indicates whether the specified character is a titlecase character. 2895 * 2896 * @param c 2897 * the character to check. 2898 * @return {@code true} if {@code c} is a titlecase character, {@code false} 2899 * otherwise. 2900 */ 2901 public static boolean isTitleCase(char c) { 2902 // BEGIN android-changed 2903 return UCharacter.isTitleCase(c); 2904 // END android-changed 2905 } 2906 2907 /** 2908 * Indicates whether the specified code point is a titlecase character. 2909 * 2910 * @param codePoint 2911 * the code point to check. 2912 * @return {@code true} if {@code codePoint} is a titlecase character, 2913 * {@code false} otherwise. 2914 */ 2915 public static boolean isTitleCase(int codePoint) { 2916 return UCharacter.isTitleCase(codePoint); 2917 } 2918 2919 /** 2920 * Indicates whether the specified character is valid as part of a Unicode 2921 * identifier other than the first character. 2922 * 2923 * @param c 2924 * the character to check. 2925 * @return {@code true} if {@code c} is valid as part of a Unicode 2926 * identifier; {@code false} otherwise. 2927 */ 2928 public static boolean isUnicodeIdentifierPart(char c) { 2929 // BEGIN android-changed 2930 // int type = getType(c); 2931 // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2932 // || type == CONNECTOR_PUNCTUATION 2933 // || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2934 // || type == NON_SPACING_MARK || type == COMBINING_SPACING_MARK 2935 // || isIdentifierIgnorable(c); 2936 return UCharacter.isUnicodeIdentifierPart(c); 2937 // END android-changed 2938 } 2939 2940 /** 2941 * Indicates whether the specified code point is valid as part of a Unicode 2942 * identifier other than the first character. 2943 * 2944 * @param codePoint 2945 * the code point to check. 2946 * @return {@code true} if {@code codePoint} is valid as part of a Unicode 2947 * identifier; {@code false} otherwise. 2948 */ 2949 public static boolean isUnicodeIdentifierPart(int codePoint) { 2950 return UCharacter.isUnicodeIdentifierPart(codePoint); 2951 } 2952 2953 /** 2954 * Indicates whether the specified character is a valid initial character 2955 * for a Unicode identifier. 2956 * 2957 * @param c 2958 * the character to check. 2959 * @return {@code true} if {@code c} is a valid first character for a 2960 * Unicode identifier; {@code false} otherwise. 2961 */ 2962 public static boolean isUnicodeIdentifierStart(char c) { 2963 // BEGIN android-changed 2964 // int type = getType(c); 2965 // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2966 // || type == LETTER_NUMBER; 2967 return UCharacter.isUnicodeIdentifierStart(c); 2968 // END android-changed 2969 } 2970 2971 /** 2972 * Indicates whether the specified code point is a valid initial character 2973 * for a Unicode identifier. 2974 * 2975 * @param codePoint 2976 * the code point to check. 2977 * @return {@code true} if {@code codePoint} is a valid first character for 2978 * a Unicode identifier; {@code false} otherwise. 2979 */ 2980 public static boolean isUnicodeIdentifierStart(int codePoint) { 2981 return UCharacter.isUnicodeIdentifierStart(codePoint); 2982 } 2983 2984 /** 2985 * Indicates whether the specified character is an upper case letter. 2986 * 2987 * @param c 2988 * the character to check. 2989 * @return {@code true} if {@code c} is a upper case letter; {@code false} 2990 * otherwise. 2991 */ 2992 public static boolean isUpperCase(char c) { 2993 // BEGIN android-changed 2994 return isUpperCase((int) c); 2995 // END android-changed 2996 } 2997 2998 /** 2999 * Indicates whether the specified code point is an upper case letter. 3000 * 3001 * @param codePoint 3002 * the code point to check. 3003 * @return {@code true} if {@code codePoint} is a upper case letter; 3004 * {@code false} otherwise. 3005 */ 3006 public static boolean isUpperCase(int codePoint) { 3007 // BEGIN android-changed 3008 // Optimized case for ASCII 3009 if ('A' <= codePoint && codePoint <= 'Z') { 3010 return true; 3011 } 3012 if (codePoint < 128) { 3013 return false; 3014 } 3015 return UCharacter.isUpperCase(codePoint); 3016 // END android-changed 3017 } 3018 3019 /** 3020 * Indicates whether the specified character is a whitespace character in 3021 * Java. 3022 * 3023 * @param c 3024 * the character to check. 3025 * @return {@code true} if the supplied {@code c} is a whitespace character 3026 * in Java; {@code false} otherwise. 3027 */ 3028 public static boolean isWhitespace(char c) { 3029 // BEGIN android-changed 3030 return isWhitespace((int) c); 3031 // END android-changed 3032 } 3033 3034 /** 3035 * Indicates whether the specified code point is a whitespace character in 3036 * Java. 3037 * 3038 * @param codePoint 3039 * the code point to check. 3040 * @return {@code true} if the supplied {@code c} is a whitespace character 3041 * in Java; {@code false} otherwise. 3042 */ 3043 public static boolean isWhitespace(int codePoint) { 3044 // BEGIN android-changed 3045 // Optimized case for ASCII 3046 if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x9 && codePoint <= 0xd)) { 3047 return true; 3048 } 3049 if (codePoint == 0x1680) { 3050 return true; 3051 } 3052 if (codePoint < 0x2000 || codePoint == 0x2007) { 3053 return false; 3054 } 3055 if (codePoint <= 0xffff) { 3056 return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 || 3057 codePoint == 0x3000; 3058 } 3059 return UCharacter.isWhitespace(codePoint); 3060 // END android-changed 3061 } 3062 3063 /** 3064 * Reverses the order of the first and second byte in the specified 3065 * character. 3066 * 3067 * @param c 3068 * the character to reverse. 3069 * @return the character with reordered bytes. 3070 */ 3071 public static char reverseBytes(char c) { 3072 return (char)((c<<8) | (c>>8)); 3073 } 3074 3075 /** 3076 * Returns the lower case equivalent for the specified character if the 3077 * character is an upper case letter. Otherwise, the specified character is 3078 * returned unchanged. 3079 * 3080 * @param c 3081 * the character 3082 * @return if {@code c} is an upper case character then its lower case 3083 * counterpart, otherwise just {@code c}. 3084 */ 3085 public static char toLowerCase(char c) { 3086 // BEGIN android-changed 3087 return (char) toLowerCase((int) c); 3088 // END android-changed 3089 } 3090 3091 /** 3092 * Returns the lower case equivalent for the specified code point if it is 3093 * an upper case letter. Otherwise, the specified code point is returned 3094 * unchanged. 3095 * 3096 * @param codePoint 3097 * the code point to check. 3098 * @return if {@code codePoint} is an upper case character then its lower 3099 * case counterpart, otherwise just {@code codePoint}. 3100 */ 3101 public static int toLowerCase(int codePoint) { 3102 // BEGIN android-changed 3103 // Optimized case for ASCII 3104 if ('A' <= codePoint && codePoint <= 'Z') { 3105 return (char) (codePoint + ('a' - 'A')); 3106 } 3107 if (codePoint < 192) { 3108 return codePoint; 3109 } 3110 return UCharacter.toLowerCase(codePoint); 3111 // END android-changed 3112 } 3113 3114 @Override 3115 public String toString() { 3116 return String.valueOf(value); 3117 } 3118 3119 /** 3120 * Converts the specified character to its string representation. 3121 * 3122 * @param value 3123 * the character to convert. 3124 * @return the character converted to a string. 3125 */ 3126 public static String toString(char value) { 3127 return String.valueOf(value); 3128 } 3129 3130 /** 3131 * Returns the title case equivalent for the specified character if it 3132 * exists. Otherwise, the specified character is returned unchanged. 3133 * 3134 * @param c 3135 * the character to convert. 3136 * @return the title case equivalent of {@code c} if it exists, otherwise 3137 * {@code c}. 3138 */ 3139 public static char toTitleCase(char c) { 3140 // BEGIN android-changed 3141 // if (isTitleCase(c)) { 3142 // return c; 3143 // } 3144 // int result = BinarySearch.binarySearch(titlecaseKeys, c); 3145 // if (result >= 0) { 3146 // return titlecaseValues[result]; 3147 // } 3148 // return toUpperCase(c); 3149 return (char)UCharacter.toTitleCase(c); 3150 // ENd android-changed 3151 } 3152 3153 /** 3154 * Returns the title case equivalent for the specified code point if it 3155 * exists. Otherwise, the specified code point is returned unchanged. 3156 * 3157 * @param codePoint 3158 * the code point to convert. 3159 * @return the title case equivalent of {@code codePoint} if it exists, 3160 * otherwise {@code codePoint}. 3161 */ 3162 public static int toTitleCase(int codePoint) { 3163 return UCharacter.toTitleCase(codePoint); 3164 } 3165 3166 /** 3167 * Returns the upper case equivalent for the specified character if the 3168 * character is a lower case letter. Otherwise, the specified character is 3169 * returned unchanged. 3170 * 3171 * @param c 3172 * the character to convert. 3173 * @return if {@code c} is a lower case character then its upper case 3174 * counterpart, otherwise just {@code c}. 3175 */ 3176 public static char toUpperCase(char c) { 3177 // BEGIN android-changed 3178 return (char) toUpperCase((int) c); 3179 // END android-changed 3180 } 3181 3182 /** 3183 * Returns the upper case equivalent for the specified code point if the 3184 * code point is a lower case letter. Otherwise, the specified code point is 3185 * returned unchanged. 3186 * 3187 * @param codePoint 3188 * the code point to convert. 3189 * @return if {@code codePoint} is a lower case character then its upper 3190 * case counterpart, otherwise just {@code codePoint}. 3191 */ 3192 public static int toUpperCase(int codePoint) { 3193 // BEGIN android-changed 3194 // Optimized case for ASCII 3195 if ('a' <= codePoint && codePoint <= 'z') { 3196 return (char) (codePoint - ('a' - 'A')); 3197 } 3198 if (codePoint < 181) { 3199 return codePoint; 3200 } 3201 return UCharacter.toUpperCase(codePoint); 3202 // END android-changed 3203 } 3204} 3205