Character.java revision 726ac583d69b37db03c6279af5b36df7b837ede1
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.lang; 19 20import java.io.Serializable; 21// BEGIN android-removed 22// import java.util.SortedMap; 23// import java.util.TreeMap; 24// 25// import org.apache.harmony.luni.util.BinarySearch; 26// END android-removed 27 28// BEGIN android-changed 29import com.ibm.icu4jni.lang.UCharacter; 30// END android-changed 31 32/** 33 * The wrapper for the primitive type {@code char}. This class also provides a 34 * number of utility methods for working with characters. 35 * <p> 36 * Character data is based upon the Unicode Standard, 4.0. The Unicode 37 * specification, character tables and other information are available at <a 38 * href="http://www.unicode.org/">http://www.unicode.org/</a>. 39 * <p> 40 * Unicode characters are referred to as <i>code points</i>. The range of valid 41 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i> 42 * is the code point range U+0000 to U+FFFF. Characters above the BMP are 43 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16 44 * encoding and {@code char} pairs are used to represent code points in the 45 * supplementary range. A pair of {@code char} values that represent a 46 * supplementary character are made up of a <i>high surrogate</i> with a value 47 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of 48 * 0xDC00 to 0xDFFF. 49 * <p> 50 * On the Java platform a {@code char} value represents either a single BMP code 51 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type 52 * is used to represent all Unicode code points. 53 * 54 * @since 1.0 55 */ 56public final class Character implements Serializable, Comparable<Character> { 57 private static final long serialVersionUID = 3786198910865385080L; 58 59 private final char value; 60 61 /** 62 * The minimum {@code Character} value. 63 */ 64 public static final char MIN_VALUE = '\u0000'; 65 66 /** 67 * The maximum {@code Character} value. 68 */ 69 public static final char MAX_VALUE = '\uffff'; 70 71 /** 72 * The minimum radix used for conversions between characters and integers. 73 */ 74 public static final int MIN_RADIX = 2; 75 76 /** 77 * The maximum radix used for conversions between characters and integers. 78 */ 79 public static final int MAX_RADIX = 36; 80 81 /** 82 * The {@link Class} object that represents the primitive type {@code char}. 83 */ 84 @SuppressWarnings("unchecked") 85 public static final Class<Character> TYPE 86 = (Class<Character>) char[].class.getComponentType(); 87 88 // Note: This can't be set to "char.class", since *that* is 89 // defined to be "java.lang.Character.TYPE"; 90 91 /** 92 * Unicode category constant Cn. 93 */ 94 public static final byte UNASSIGNED = 0; 95 96 /** 97 * Unicode category constant Lu. 98 */ 99 public static final byte UPPERCASE_LETTER = 1; 100 101 /** 102 * Unicode category constant Ll. 103 */ 104 public static final byte LOWERCASE_LETTER = 2; 105 106 /** 107 * Unicode category constant Lt. 108 */ 109 public static final byte TITLECASE_LETTER = 3; 110 111 /** 112 * Unicode category constant Lm. 113 */ 114 public static final byte MODIFIER_LETTER = 4; 115 116 /** 117 * Unicode category constant Lo. 118 */ 119 public static final byte OTHER_LETTER = 5; 120 121 /** 122 * Unicode category constant Mn. 123 */ 124 public static final byte NON_SPACING_MARK = 6; 125 126 /** 127 * Unicode category constant Me. 128 */ 129 public static final byte ENCLOSING_MARK = 7; 130 131 /** 132 * Unicode category constant Mc. 133 */ 134 public static final byte COMBINING_SPACING_MARK = 8; 135 136 /** 137 * Unicode category constant Nd. 138 */ 139 public static final byte DECIMAL_DIGIT_NUMBER = 9; 140 141 /** 142 * Unicode category constant Nl. 143 */ 144 public static final byte LETTER_NUMBER = 10; 145 146 /** 147 * Unicode category constant No. 148 */ 149 public static final byte OTHER_NUMBER = 11; 150 151 /** 152 * Unicode category constant Zs. 153 */ 154 public static final byte SPACE_SEPARATOR = 12; 155 156 /** 157 * Unicode category constant Zl. 158 */ 159 public static final byte LINE_SEPARATOR = 13; 160 161 /** 162 * Unicode category constant Zp. 163 */ 164 public static final byte PARAGRAPH_SEPARATOR = 14; 165 166 /** 167 * Unicode category constant Cc. 168 */ 169 public static final byte CONTROL = 15; 170 171 /** 172 * Unicode category constant Cf. 173 */ 174 public static final byte FORMAT = 16; 175 176 /** 177 * Unicode category constant Co. 178 */ 179 public static final byte PRIVATE_USE = 18; 180 181 /** 182 * Unicode category constant Cs. 183 */ 184 public static final byte SURROGATE = 19; 185 186 /** 187 * Unicode category constant Pd. 188 */ 189 public static final byte DASH_PUNCTUATION = 20; 190 191 /** 192 * Unicode category constant Ps. 193 */ 194 public static final byte START_PUNCTUATION = 21; 195 196 /** 197 * Unicode category constant Pe. 198 */ 199 public static final byte END_PUNCTUATION = 22; 200 201 /** 202 * Unicode category constant Pc. 203 */ 204 public static final byte CONNECTOR_PUNCTUATION = 23; 205 206 /** 207 * Unicode category constant Po. 208 */ 209 public static final byte OTHER_PUNCTUATION = 24; 210 211 /** 212 * Unicode category constant Sm. 213 */ 214 public static final byte MATH_SYMBOL = 25; 215 216 /** 217 * Unicode category constant Sc. 218 */ 219 public static final byte CURRENCY_SYMBOL = 26; 220 221 /** 222 * Unicode category constant Sk. 223 */ 224 public static final byte MODIFIER_SYMBOL = 27; 225 226 /** 227 * Unicode category constant So. 228 */ 229 public static final byte OTHER_SYMBOL = 28; 230 231 /** 232 * Unicode category constant Pi. 233 * 234 * @since 1.4 235 */ 236 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 237 238 /** 239 * Unicode category constant Pf. 240 * 241 * @since 1.4 242 */ 243 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 244 245 /** 246 * Unicode bidirectional constant. 247 * 248 * @since 1.4 249 */ 250 public static final byte DIRECTIONALITY_UNDEFINED = -1; 251 252 /** 253 * Unicode bidirectional constant L. 254 * 255 * @since 1.4 256 */ 257 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 258 259 /** 260 * Unicode bidirectional constant R. 261 * 262 * @since 1.4 263 */ 264 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 265 266 /** 267 * Unicode bidirectional constant AL. 268 * 269 * @since 1.4 270 */ 271 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 272 273 /** 274 * Unicode bidirectional constant EN. 275 * 276 * @since 1.4 277 */ 278 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 279 280 /** 281 * Unicode bidirectional constant ES. 282 * 283 * @since 1.4 284 */ 285 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 286 287 /** 288 * Unicode bidirectional constant ET. 289 * 290 * @since 1.4 291 */ 292 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 293 294 /** 295 * Unicode bidirectional constant AN. 296 * 297 * @since 1.4 298 */ 299 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 300 301 /** 302 * Unicode bidirectional constant CS. 303 * 304 * @since 1.4 305 */ 306 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 307 308 /** 309 * Unicode bidirectional constant NSM. 310 * 311 * @since 1.4 312 */ 313 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 314 315 /** 316 * Unicode bidirectional constant BN. 317 * 318 * @since 1.4 319 */ 320 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 321 322 /** 323 * Unicode bidirectional constant B. 324 * 325 * @since 1.4 326 */ 327 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 328 329 /** 330 * Unicode bidirectional constant S. 331 * 332 * @since 1.4 333 */ 334 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 335 336 /** 337 * Unicode bidirectional constant WS. 338 * 339 * @since 1.4 340 */ 341 public static final byte DIRECTIONALITY_WHITESPACE = 12; 342 343 /** 344 * Unicode bidirectional constant ON. 345 * 346 * @since 1.4 347 */ 348 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 349 350 /** 351 * Unicode bidirectional constant LRE. 352 * 353 * @since 1.4 354 */ 355 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 356 357 /** 358 * Unicode bidirectional constant LRO. 359 * 360 * @since 1.4 361 */ 362 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 363 364 /** 365 * Unicode bidirectional constant RLE. 366 * 367 * @since 1.4 368 */ 369 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 370 371 /** 372 * Unicode bidirectional constant RLO. 373 * 374 * @since 1.4 375 */ 376 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 377 378 /** 379 * Unicode bidirectional constant PDF. 380 * 381 * @since 1.4 382 */ 383 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 384 385 /** 386 * The minimum value of a high surrogate or leading surrogate unit in UTF-16 387 * encoding, {@code '\uD800'}. 388 * 389 * @since 1.5 390 */ 391 public static final char MIN_HIGH_SURROGATE = '\uD800'; 392 393 /** 394 * The maximum value of a high surrogate or leading surrogate unit in UTF-16 395 * encoding, {@code '\uDBFF'}. 396 * 397 * @since 1.5 398 */ 399 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 400 401 /** 402 * The minimum value of a low surrogate or trailing surrogate unit in UTF-16 403 * encoding, {@code '\uDC00'}. 404 * 405 * @since 1.5 406 */ 407 public static final char MIN_LOW_SURROGATE = '\uDC00'; 408 409 /** 410 * The maximum value of a low surrogate or trailing surrogate unit in UTF-16 411 * encoding, {@code '\uDFFF'}. 412 * 413 * @since 1.5 414 */ 415 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 416 417 /** 418 * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}. 419 * 420 * @since 1.5 421 */ 422 public static final char MIN_SURROGATE = '\uD800'; 423 424 /** 425 * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}. 426 * 427 * @since 1.5 428 */ 429 public static final char MAX_SURROGATE = '\uDFFF'; 430 431 /** 432 * The minimum value of a supplementary code point, {@code U+010000}. 433 * 434 * @since 1.5 435 */ 436 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; 437 438 /** 439 * The minimum code point value, {@code U+0000}. 440 * 441 * @since 1.5 442 */ 443 public static final int MIN_CODE_POINT = 0x000000; 444 445 /** 446 * The maximum code point value, {@code U+10FFFF}. 447 * 448 * @since 1.5 449 */ 450 public static final int MAX_CODE_POINT = 0x10FFFF; 451 452 /** 453 * The number of bits required to represent a {@code Character} value 454 * unsigned form. 455 * 456 * @since 1.5 457 */ 458 public static final int SIZE = 16; 459 460 // BEGIN android-removed 461 // Unicode 3.0.1 (same as Unicode 3.0.0) 462 // private static final String bidiKeys = ... 463 464 // private static final char[] bidiValues = ... 465 466 // private static final char[] mirrored = ... 467 468 // Unicode 3.0.1 (same as Unicode 3.0.0) 469 // private static final String typeKeys = ... 470 471 // private static final char[] typeValues = ... 472 473 // private static final int[] typeValuesCache = ... 474 475 // Unicode 3.0.1 (same as Unicode 3.0.0) 476 // private static final String uppercaseKeys = ... 477 478 // private static final char[] uppercaseValues = ... 479 480 // private static final int[] uppercaseValuesCache = ... 481 482 // private static final String lowercaseKeys = ... 483 484 // private static final char[] lowercaseValues = ... 485 486 // private static final int[] lowercaseValuesCache = ... 487 488 // private static final String digitKeys = ... 489 490 // private static final char[] digitValues = ... 491 // END android-removed 492 493 // BEGIN android-note 494 // put this in a helper class so that it's only initialized on demand? 495 // END android-note 496 private static final char[] typeTags = "\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0000\u0000\u0000\u0000\u0003\u0000\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0000\u0000\u0000\u0000\u0002" 497 .getValue(); 498 499 // BEGIN android-note 500 // put this in a helper class so that it's only initialized on demand? 501 // END android-note 502 private static final byte[] DIRECTIONALITY = new byte[] { 503 DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT, 504 DIRECTIONALITY_EUROPEAN_NUMBER, 505 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, 506 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, 507 DIRECTIONALITY_ARABIC_NUMBER, 508 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, 509 DIRECTIONALITY_PARAGRAPH_SEPARATOR, 510 DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE, 511 DIRECTIONALITY_OTHER_NEUTRALS, 512 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, 513 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, 514 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, 515 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, 516 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, 517 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, 518 DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL }; 519 520 private static final int ISJAVASTART = 1; 521 522 private static final int ISJAVAPART = 2; 523 524 // BEGIN android-removed 525 // Unicode 3.0.1 (same as Unicode 3.0.0) 526 // private static final String titlecaseKeys = ... 527 528 // private static final char[] titlecaseValues = ... 529 530 // Unicode 3.0.0 (NOT the same as Unicode 3.0.1) 531 // private static final String numericKeys = ... 532 533 // private static final char[] numericValues = ... 534 // END android-removed 535 536 /* 537 * Represents a subset of the Unicode character set. 538 */ 539 public static class Subset { 540 String name; 541 542 /** 543 * Constructs a new {@code Subset}. 544 * 545 * @param string 546 * this subset's name. 547 */ 548 protected Subset(String string) { 549 if (string == null) { 550 throw new NullPointerException(); 551 } 552 name = string; 553 } 554 555 /** 556 * Compares this character subset with the specified object. Uses 557 * {@link java.lang.Object#equals(Object)} to do the comparison. 558 * 559 * @param object 560 * the object to compare this character subset with. 561 * @return {@code true} if {@code object} is this subset, that is, if 562 * {@code object == this}; {@code false} otherwise. 563 */ 564 @Override 565 public final boolean equals(Object object) { 566 return super.equals(object); 567 } 568 569 /** 570 * Returns the integer hash code for this character subset. 571 * 572 * @return this subset's hash code, which is the hash code computed by 573 * {@link java.lang.Object#hashCode()}. 574 */ 575 @Override 576 public final int hashCode() { 577 return super.hashCode(); 578 } 579 580 /** 581 * Returns the string representation of this subset. 582 * 583 * @return this subset's name. 584 */ 585 @Override 586 public final String toString() { 587 return name; 588 } 589 } 590 591 /** 592 * Represents a block of Unicode characters, as defined by the Unicode 4.0.1 593 * specification. 594 * 595 * @since 1.2 596 */ 597 public static final class UnicodeBlock extends Subset { 598 /** 599 * The "Surrogates Area" Unicode Block. 600 * 601 * @deprecated As of Java 5, this block has been replaced by 602 * {@link #HIGH_SURROGATES}, 603 * {@link #HIGH_PRIVATE_USE_SURROGATES} and 604 * {@link #LOW_SURROGATES}. 605 */ 606 @Deprecated 607 public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA", 0x0, 0x0); 608 /** 609 * The "Basic Latin" Unicode Block. 610 * 611 * @since 1.2 612 */ 613 public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN", 0x0, 0x7f); 614 /** 615 * The "Latin-1 Supplement" Unicode Block. 616 * 617 * @since 1.2 618 */ 619 public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT", 0x80, 0xff); 620 /** 621 * The "Latin Extended-A" Unicode Block. 622 * 623 * @since 1.2 624 */ 625 public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A", 0x100, 0x17f); 626 /** 627 * The "Latin Extended-B" Unicode Block. 628 * 629 * @since 1.2 630 */ 631 public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B", 0x180, 0x24f); 632 /** 633 * The "IPA Extensions" Unicode Block. 634 * 635 * @since 1.2 636 */ 637 public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS", 0x250, 0x2af); 638 /** 639 * The "Spacing Modifier Letters" Unicode Block. 640 * 641 * @since 1.2 642 */ 643 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 0x2b0, 0x2ff); 644 /** 645 * The "Combining Diacritical Marks" Unicode Block. 646 * 647 * @since 1.2 648 */ 649 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 0x300, 0x36f); 650 /** 651 * The "Greek and Coptic" Unicode Block. Previously referred 652 * to as "Greek". 653 * 654 * @since 1.2 655 */ 656 public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK", 0x370, 0x3ff); 657 /** 658 * The "Cyrillic" Unicode Block. 659 * 660 * @since 1.2 661 */ 662 public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC", 0x400, 0x4ff); 663 /** 664 * The "Cyrillic Supplement" Unicode Block. Previously 665 * referred to as "Cyrillic Supplementary". 666 * 667 * @since 1.5 668 */ 669 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 0x500, 0x52f); 670 /** 671 * The "Armenian" Unicode Block. 672 * 673 * @since 1.2 674 */ 675 public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN", 0x530, 0x58f); 676 /** 677 * The "Hebrew" Unicode Block. 678 * 679 * @since 1.2 680 */ 681 public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW", 0x590, 0x5ff); 682 /** 683 * The "Arabic" Unicode Block. 684 * 685 * @since 1.2 686 */ 687 public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC", 0x600, 0x6ff); 688 /** 689 * The "Syriac" Unicode Block. 690 * 691 * @since 1.4 692 */ 693 public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC", 0x700, 0x74f); 694 /** 695 * The "Thaana" Unicode Block. 696 * 697 * @since 1.4 698 */ 699 public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA", 0x780, 0x7bf); 700 /** 701 * The "Devanagari" Unicode Block. 702 * 703 * @since 1.2 704 */ 705 public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI", 0x900, 0x97f); 706 /** 707 * The "Bengali" Unicode Block. 708 * 709 * @since 1.2 710 */ 711 public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI", 0x980, 0x9ff); 712 /** 713 * The "Gurmukhi" Unicode Block. 714 * 715 * @since 1.2 716 */ 717 public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI", 0xa00, 0xa7f); 718 /** 719 * The "Gujarati" Unicode Block. 720 * 721 * @since 1.2 722 */ 723 public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI", 0xa80, 0xaff); 724 /** 725 * The "Oriya" Unicode Block. 726 * 727 * @since 1.2 728 */ 729 public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA", 0xb00, 0xb7f); 730 /** 731 * The "Tamil" Unicode Block. 732 * 733 * @since 1.2 734 */ 735 public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL", 0xb80, 0xbff); 736 /** 737 * The "Telugu" Unicode Block. 738 * 739 * @since 1.2 740 */ 741 public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU", 0xc00, 0xc7f); 742 /** 743 * The "Kannada" Unicode Block. 744 * 745 * @since 1.2 746 */ 747 public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA", 0xc80, 0xcff); 748 /** 749 * The "Malayalam" Unicode Block. 750 * 751 * @since 1.2 752 */ 753 public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM", 0xd00, 0xd7f); 754 /** 755 * The "Sinhala" Unicode Block. 756 * 757 * @since 1.4 758 */ 759 public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA", 0xd80, 0xdff); 760 /** 761 * The "Thai" Unicode Block. 762 * 763 * @since 1.2 764 */ 765 public static final UnicodeBlock THAI = new UnicodeBlock("THAI", 0xe00, 0xe7f); 766 /** 767 * The "Lao" Unicode Block. 768 * 769 * @since 1.2 770 */ 771 public static final UnicodeBlock LAO = new UnicodeBlock("LAO", 0xe80, 0xeff); 772 /** 773 * The "Tibetan" Unicode Block. 774 * 775 * @since 1.2 776 */ 777 public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN", 0xf00, 0xfff); 778 /** 779 * The "Myanmar" Unicode Block. 780 * 781 * @since 1.4 782 */ 783 public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR", 0x1000, 0x109f); 784 /** 785 * The "Georgian" Unicode Block. 786 * 787 * @since 1.2 788 */ 789 public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN", 0x10a0, 0x10ff); 790 /** 791 * The "Hangul Jamo" Unicode Block. 792 * 793 * @since 1.2 794 */ 795 public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO", 0x1100, 0x11ff); 796 /** 797 * The "Ethiopic" Unicode Block. 798 * 799 * @since 1.4 800 */ 801 public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC", 0x1200, 0x137f); 802 /** 803 * The "Cherokee" Unicode Block. 804 * 805 * @since 1.4 806 */ 807 public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE", 0x13a0, 0x13ff); 808 /** 809 * The "Unified Canadian Aboriginal Syllabics" Unicode Block. 810 * 811 * @since 1.4 812 */ 813 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 0x1400, 0x167f); 814 /** 815 * The "Ogham" Unicode Block. 816 * 817 * @since 1.4 818 */ 819 public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM", 0x1680, 0x169f); 820 /** 821 * The "Runic" Unicode Block. 822 * 823 * @since 1.4 824 */ 825 public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC", 0x16a0, 0x16ff); 826 /** 827 * The "Tagalog" Unicode Block. 828 * 829 * @since 1.5 830 */ 831 public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG", 0x1700, 0x171f); 832 /** 833 * The "Hanunoo" Unicode Block. 834 * 835 * @since 1.5 836 */ 837 public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO", 0x1720, 0x173f); 838 /** 839 * The "Buhid" Unicode Block. 840 * 841 * @since 1.5 842 */ 843 public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID", 0x1740, 0x175f); 844 /** 845 * The "Tagbanwa" Unicode Block. 846 * 847 * @since 1.5 848 */ 849 public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA", 0x1760, 0x177f); 850 /** 851 * The "Khmer" Unicode Block. 852 * 853 * @since 1.4 854 */ 855 public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER", 0x1780, 0x17ff); 856 /** 857 * The "Mongolian" Unicode Block. 858 * 859 * @since 1.4 860 */ 861 public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN", 0x1800, 0x18af); 862 /** 863 * The "Limbu" Unicode Block. 864 * 865 * @since 1.5 866 */ 867 public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU", 0x1900, 0x194f); 868 /** 869 * The "Tai Le" Unicode Block. 870 * 871 * @since 1.5 872 */ 873 public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE", 0x1950, 0x197f); 874 /** 875 * The "Khmer Symbols" Unicode Block. 876 * 877 * @since 1.5 878 */ 879 public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS", 0x19e0, 0x19ff); 880 /** 881 * The "Phonetic Extensions" Unicode Block. 882 * 883 * @since 1.5 884 */ 885 public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS", 0x1d00, 0x1d7f); 886 /** 887 * The "Latin Extended Additional" Unicode Block. 888 * 889 * @since 1.2 890 */ 891 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 0x1e00, 0x1eff); 892 /** 893 * The "Greek Extended" Unicode Block. 894 * 895 * @since 1.2 896 */ 897 public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED", 0x1f00, 0x1fff); 898 /** 899 * The "General Punctuation" Unicode Block. 900 * 901 * @since 1.2 902 */ 903 public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION", 0x2000, 0x206f); 904 /** 905 * The "Superscripts and Subscripts" Unicode Block. 906 * 907 * @since 1.2 908 */ 909 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 0x2070, 0x209f); 910 /** 911 * The "Currency Symbols" Unicode Block. 912 * 913 * @since 1.2 914 */ 915 public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS", 0x20a0, 0x20cf); 916 /** 917 * The "Combining Diacritical Marks for Symbols" Unicode 918 * Block. Previously referred to as "Combining Marks for 919 * Symbols". 920 * 921 * @since 1.2 922 */ 923 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 0x20d0, 0x20ff); 924 /** 925 * The "Letterlike Symbols" Unicode Block. 926 * 927 * @since 1.2 928 */ 929 public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS", 0x2100, 0x214f); 930 /** 931 * The "Number Forms" Unicode Block. 932 * 933 * @since 1.2 934 */ 935 public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS", 0x2150, 0x218f); 936 /** 937 * The "Arrows" Unicode Block. 938 * 939 * @since 1.2 940 */ 941 public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS", 0x2190, 0x21ff); 942 /** 943 * The "Mathematical Operators" Unicode Block. 944 * 945 * @since 1.2 946 */ 947 public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS", 0x2200, 0x22ff); 948 /** 949 * The "Miscellaneous Technical" Unicode Block. 950 * 951 * @since 1.2 952 */ 953 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 0x2300, 0x23ff); 954 /** 955 * The "Control Pictures" Unicode Block. 956 * 957 * @since 1.2 958 */ 959 public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES", 0x2400, 0x243f); 960 /** 961 * The "Optical Character Recognition" Unicode Block. 962 * 963 * @since 1.2 964 */ 965 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 0x2440, 0x245f); 966 /** 967 * The "Enclosed Alphanumerics" Unicode Block. 968 * 969 * @since 1.2 970 */ 971 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 0x2460, 0x24ff); 972 /** 973 * The "Box Drawing" Unicode Block. 974 * 975 * @since 1.2 976 */ 977 public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING", 0x2500, 0x257f); 978 /** 979 * The "Block Elements" Unicode Block. 980 * 981 * @since 1.2 982 */ 983 public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS", 0x2580, 0x259f); 984 /** 985 * The "Geometric Shapes" Unicode Block. 986 * 987 * @since 1.2 988 */ 989 public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES", 0x25a0, 0x25ff); 990 /** 991 * The "Miscellaneous Symbols" Unicode Block. 992 * 993 * @since 1.2 994 */ 995 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 0x2600, 0x26ff); 996 /** 997 * The "Dingbats" Unicode Block. 998 * 999 * @since 1.2 1000 */ 1001 public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS", 0x2700, 0x27bf); 1002 /** 1003 * The "Miscellaneous Mathematical Symbols-A" Unicode Block. 1004 * 1005 * @since 1.5 1006 */ 1007 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 0x27c0, 0x27ef); 1008 /** 1009 * The "Supplemental Arrows-A" Unicode Block. 1010 * 1011 * @since 1.5 1012 */ 1013 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 0x27f0, 0x27ff); 1014 /** 1015 * The "Braille Patterns" Unicode Block. 1016 * 1017 * @since 1.4 1018 */ 1019 public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS", 0x2800, 0x28ff); 1020 /** 1021 * The "Supplemental Arrows-B" Unicode Block. 1022 * 1023 * @since 1.5 1024 */ 1025 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 0x2900, 0x297f); 1026 /** 1027 * The "Miscellaneous Mathematical Symbols-B" Unicode Block. 1028 * 1029 * @since 1.5 1030 */ 1031 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 0x2980, 0x29ff); 1032 /** 1033 * The "Supplemental Mathematical Operators" Unicode Block. 1034 * 1035 * @since 1.5 1036 */ 1037 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 0x2a00, 0x2aff); 1038 /** 1039 * The "Miscellaneous Symbols and Arrows" Unicode Block. 1040 * 1041 * @since 1.2 1042 */ 1043 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 0x2b00, 0x2bff); 1044 /** 1045 * The "CJK Radicals Supplement" Unicode Block. 1046 * 1047 * @since 1.4 1048 */ 1049 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 0x2e80, 0x2eff); 1050 /** 1051 * The "Kangxi Radicals" Unicode Block. 1052 * 1053 * @since 1.4 1054 */ 1055 public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS", 0x2f00, 0x2fdf); 1056 /** 1057 * The "Ideographic Description Characters" Unicode Block. 1058 * 1059 * @since 1.4 1060 */ 1061 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 0x2ff0, 0x2fff); 1062 /** 1063 * The "CJK Symbols and Punctuation" Unicode Block. 1064 * 1065 * @since 1.2 1066 */ 1067 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 0x3000, 0x303f); 1068 /** 1069 * The "Hiragana" Unicode Block. 1070 * 1071 * @since 1.2 1072 */ 1073 public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA", 0x3040, 0x309f); 1074 /** 1075 * The "Katakana" Unicode Block. 1076 * 1077 * @since 1.2 1078 */ 1079 public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA", 0x30a0, 0x30ff); 1080 /** 1081 * The "Bopomofo" Unicode Block. 1082 * 1083 * @since 1.2 1084 */ 1085 public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO", 0x3100, 0x312f); 1086 /** 1087 * The "Hangul Compatibility Jamo" Unicode Block. 1088 * 1089 * @since 1.2 1090 */ 1091 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 0x3130, 0x318f); 1092 /** 1093 * The "Kanbun" Unicode Block. 1094 * 1095 * @since 1.2 1096 */ 1097 public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN", 0x3190, 0x319f); 1098 /** 1099 * The "Bopomofo Extended" Unicode Block. 1100 * 1101 * @since 1.4 1102 */ 1103 public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED", 0x31a0, 0x31bf); 1104 /** 1105 * The "Katakana Phonetic Extensions" Unicode Block. 1106 * 1107 * @since 1.5 1108 */ 1109 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 0x31f0, 0x31ff); 1110 /** 1111 * The "Enclosed CJK Letters and Months" Unicode Block. 1112 * 1113 * @since 1.2 1114 */ 1115 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 0x3200, 0x32ff); 1116 /** 1117 * The "CJK Compatibility" Unicode Block. 1118 * 1119 * @since 1.2 1120 */ 1121 public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY", 0x3300, 0x33ff); 1122 /** 1123 * The "CJK Unified Ideographs Extension A" Unicode Block. 1124 * 1125 * @since 1.4 1126 */ 1127 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 0x3400, 0x4dbf); 1128 /** 1129 * The "Yijing Hexagram Symbols" Unicode Block. 1130 * 1131 * @since 1.5 1132 */ 1133 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 0x4dc0, 0x4dff); 1134 /** 1135 * The "CJK Unified Ideographs" Unicode Block. 1136 * 1137 * @since 1.2 1138 */ 1139 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 0x4e00, 0x9fff); 1140 /** 1141 * The "Yi Syllables" Unicode Block. 1142 * 1143 * @since 1.4 1144 */ 1145 public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES", 0xa000, 0xa48f); 1146 /** 1147 * The "Yi Radicals" Unicode Block. 1148 * 1149 * @since 1.4 1150 */ 1151 public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS", 0xa490, 0xa4cf); 1152 /** 1153 * The "Hangul Syllables" Unicode Block. 1154 * 1155 * @since 1.2 1156 */ 1157 public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES", 0xac00, 0xd7af); 1158 /** 1159 * The "High Surrogates" Unicode Block. This block represents 1160 * code point values in the high surrogate range 0xD800 to 0xDB7F 1161 */ 1162 public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES", 0xd800, 0xdb7f); 1163 /** 1164 * The "High Private Use Surrogates" Unicode Block. This block 1165 * represents code point values in the high surrogate range 0xDB80 to 1166 * 0xDBFF 1167 */ 1168 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 0xdb80, 0xdbff); 1169 /** 1170 * The "Low Surrogates" Unicode Block. This block represents 1171 * code point values in the low surrogate range 0xDC00 to 0xDFFF 1172 */ 1173 public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES", 0xdc00, 0xdfff); 1174 /** 1175 * The "Private Use Area" Unicode Block. 1176 * 1177 * @since 1.2 1178 */ 1179 public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA", 0xe000, 0xf8ff); 1180 /** 1181 * The "CJK Compatibility Ideographs" Unicode Block. 1182 * 1183 * @since 1.2 1184 */ 1185 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 0xf900, 0xfaff); 1186 /** 1187 * The "Alphabetic Presentation Forms" Unicode Block. 1188 * 1189 * @since 1.2 1190 */ 1191 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 0xfb00, 0xfb4f); 1192 /** 1193 * The "Arabic Presentation Forms-A" Unicode Block. 1194 * 1195 * @since 1.2 1196 */ 1197 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 0xfb50, 0xfdff); 1198 /** 1199 * The "Variation Selectors" Unicode Block. 1200 * 1201 * @since 1.5 1202 */ 1203 public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS", 0xfe00, 0xfe0f); 1204 /** 1205 * The "Combining Half Marks" Unicode Block. 1206 * 1207 * @since 1.2 1208 */ 1209 public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS", 0xfe20, 0xfe2f); 1210 /** 1211 * The "CJK Compatibility Forms" Unicode Block. 1212 * 1213 * @since 1.2 1214 */ 1215 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 0xfe30, 0xfe4f); 1216 /** 1217 * The "Small Form Variants" Unicode Block. 1218 * 1219 * @since 1.2 1220 */ 1221 public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS", 0xfe50, 0xfe6f); 1222 /** 1223 * The "Arabic Presentation Forms-B" Unicode Block. 1224 * 1225 * @since 1.2 1226 */ 1227 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 0xfe70, 0xfeff); 1228 /** 1229 * The "Halfwidth and Fullwidth Forms" Unicode Block. 1230 * 1231 * @since 1.2 1232 */ 1233 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 0xff00, 0xffef); 1234 /** 1235 * The "Specials" Unicode Block. 1236 * 1237 * @since 1.2 1238 */ 1239 public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS", 0xfff0, 0xffff); 1240 /** 1241 * The "Linear B Syllabary" Unicode Block. 1242 * 1243 * @since 1.2 1244 */ 1245 public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY", 0x10000, 0x1007f); 1246 /** 1247 * The "Linear B Ideograms" Unicode Block. 1248 * 1249 * @since 1.5 1250 */ 1251 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 0x10080, 0x100ff); 1252 /** 1253 * The "Aegean Numbers" Unicode Block. 1254 * 1255 * @since 1.5 1256 */ 1257 public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS", 0x10100, 0x1013f); 1258 /** 1259 * The "Old Italic" Unicode Block. 1260 * 1261 * @since 1.5 1262 */ 1263 public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC", 0x10300, 0x1032f); 1264 /** 1265 * The "Gothic" Unicode Block. 1266 * 1267 * @since 1.5 1268 */ 1269 public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC", 0x10330, 0x1034f); 1270 /** 1271 * The "Ugaritic" Unicode Block. 1272 * 1273 * @since 1.5 1274 */ 1275 public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC", 0x10380, 0x1039f); 1276 /** 1277 * The "Deseret" Unicode Block. 1278 * 1279 * @since 1.5 1280 */ 1281 public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET", 0x10400, 0x1044f); 1282 /** 1283 * The "Shavian" Unicode Block. 1284 * 1285 * @since 1.5 1286 */ 1287 public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN", 0x10450, 0x1047f); 1288 /** 1289 * The "Osmanya" Unicode Block. 1290 * 1291 * @since 1.5 1292 */ 1293 public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA", 0x10480, 0x104af); 1294 /** 1295 * The "Cypriot Syllabary" Unicode Block. 1296 * 1297 * @since 1.5 1298 */ 1299 public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY", 0x10800, 0x1083f); 1300 /** 1301 * The "Byzantine Musical Symbols" Unicode Block. 1302 * 1303 * @since 1.5 1304 */ 1305 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 0x1d000, 0x1d0ff); 1306 /** 1307 * The "Musical Symbols" Unicode Block. 1308 * 1309 * @since 1.5 1310 */ 1311 public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS", 0x1d100, 0x1d1ff); 1312 /** 1313 * The "Tai Xuan Jing Symbols" Unicode Block. 1314 * 1315 * @since 1.5 1316 */ 1317 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 0x1d300, 0x1d35f); 1318 /** 1319 * The "Mathematical Alphanumeric Symbols" Unicode Block. 1320 * 1321 * @since 1.5 1322 */ 1323 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 0x1d400, 0x1d7ff); 1324 /** 1325 * The "CJK Unified Ideographs Extension B" Unicode Block. 1326 * 1327 * @since 1.5 1328 */ 1329 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 0x20000, 0x2a6df); 1330 /** 1331 * The "CJK Compatibility Ideographs Supplement" Unicode Block. 1332 * 1333 * @since 1.5 1334 */ 1335 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 0x2f800, 0x2fa1f); 1336 /** 1337 * The "Tags" Unicode Block. 1338 * 1339 * @since 1.5 1340 */ 1341 public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS", 0xe0000, 0xe007f); 1342 /** 1343 * The "Variation Selectors Supplement" Unicode Block. 1344 * 1345 * @since 1.5 1346 */ 1347 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 0xe0100, 0xe01ef); 1348 /** 1349 * The "Supplementary Private Use Area-A" Unicode Block. 1350 * 1351 * @since 1.5 1352 */ 1353 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 0xf0000, 0xfffff); 1354 /** 1355 * The "Supplementary Private Use Area-B" Unicode Block. 1356 * 1357 * @since 1.5 1358 */ 1359 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 0x100000, 0x10ffff); 1360 1361 /* 1362 * All of the UnicodeBlocks with valid ranges in ascending order. 1363 */ 1364 private static UnicodeBlock[] BLOCKS; 1365 1366 // BEGIN android-changed 1367 // /* 1368 // * A SortedMap (String.CASE_INSENSITIVE_ORDER) with keys that represents 1369 // * valid block names and values of the UnicodeBlock constant they map 1370 // * to. 1371 // */ 1372 // private static final SortedMap<String, UnicodeBlock> BLOCKS_BY_NAME = ...; 1373 // END android-changed 1374 1375 /** 1376 * Retrieves the constant that corresponds to the specified block name. 1377 * The block names are defined by the Unicode 4.0.1 specification in the 1378 * {@code Blocks-4.0.1.txt} file. 1379 * <p> 1380 * Block names may be one of the following: 1381 * <ul> 1382 * <li>Canonical block name, as defined by the Unicode specification; 1383 * case-insensitive.</li> 1384 * <li>Canonical block name without any spaces, as defined by the 1385 * Unicode specification; case-insensitive.</li> 1386 * <li>{@code UnicodeBlock} constant identifier. This is determined by 1387 * uppercasing the canonical name and replacing all spaces and hyphens 1388 * with underscores.</li> 1389 * </ul> 1390 * 1391 * @param blockName 1392 * the name of the block to retrieve. 1393 * @return the UnicodeBlock constant corresponding to {@code blockName}. 1394 * @throws NullPointerException 1395 * if {@code blockName} is {@code null}. 1396 * @throws IllegalArgumentException 1397 * if {@code blockName} is not a valid block name. 1398 * @since 1.5 1399 */ 1400 public static final UnicodeBlock forName(String blockName) { 1401 // BEGIN android-note 1402 // trying to get closer to the RI which defines this as final. 1403 // END android-note 1404 if (blockName == null) { 1405 throw new NullPointerException(); 1406 } 1407 // BEGIN android-changed 1408 if (BLOCKS == null) { 1409 BLOCKS = UCharacter.getBlockTable(); 1410 } 1411 int block = UCharacter.forName(blockName); 1412 if (block == -1) { 1413 if(blockName.equals("SURROGATES_AREA")) { 1414 return SURROGATES_AREA; 1415 } else if(blockName.equalsIgnoreCase("greek")) { 1416 return GREEK; 1417 } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") || 1418 blockName.equals("Combining Marks for Symbols") || 1419 blockName.equals("CombiningMarksforSymbols")) { 1420 return COMBINING_MARKS_FOR_SYMBOLS; 1421 } 1422 throw new IllegalArgumentException(); 1423 } 1424 return BLOCKS[block]; 1425 // END android-changed 1426 } 1427 1428 /** 1429 * Gets the constant for the Unicode block that contains the specified 1430 * character. 1431 * 1432 * @param c 1433 * the character for which to get the {@code UnicodeBlock} 1434 * constant. 1435 * @return the {@code UnicodeBlock} constant for the block that contains 1436 * {@code c}, or {@code null} if {@code c} does not belong to 1437 * any defined block. 1438 */ 1439 public static UnicodeBlock of(char c) { 1440 return of((int) c); 1441 } 1442 1443 /** 1444 * Gets the constant for the Unicode block that contains the specified 1445 * Unicode code point. 1446 * 1447 * @param codePoint 1448 * the Unicode code point for which to get the 1449 * {@code UnicodeBlock} constant. 1450 * @return the {@code UnicodeBlock} constant for the block that contains 1451 * {@code codePoint}, or {@code null} if {@code codePoint} does 1452 * not belong to any defined block. 1453 * @throws IllegalArgumentException 1454 * if {@code codePoint} is not a valid Unicode code point. 1455 * @since 1.5 1456 */ 1457 public static UnicodeBlock of(int codePoint) { 1458 if (!isValidCodePoint(codePoint)) { 1459 throw new IllegalArgumentException(); 1460 } 1461 // BEGIN android-changed 1462 if (BLOCKS == null) { 1463 BLOCKS = UCharacter.getBlockTable(); 1464 } 1465 int block = UCharacter.of(codePoint); 1466 if(block == -1 || block >= BLOCKS.length) { 1467 return null; 1468 } 1469 return BLOCKS[block]; 1470 // END android-changed 1471 } 1472 1473 // BEGIN android-changed 1474 private UnicodeBlock(String blockName, int start, int end) { 1475 super(blockName); 1476 } 1477 // END android-changed 1478 } 1479 1480 /** 1481 * Constructs a new {@code Character} with the specified primitive char 1482 * value. 1483 * 1484 * @param value 1485 * the primitive char value to store in the new instance. 1486 */ 1487 public Character(char value) { 1488 this.value = value; 1489 } 1490 1491 /** 1492 * Gets the primitive value of this character. 1493 * 1494 * @return this object's primitive value. 1495 */ 1496 public char charValue() { 1497 return value; 1498 } 1499 1500 /** 1501 * Compares this object to the specified character object to determine their 1502 * relative order. 1503 * 1504 * @param c 1505 * the character object to compare this object to. 1506 * @return {@code 0} if the value of this character and the value of 1507 * {@code c} are equal; a positive value if the value of this 1508 * character is greater than the value of {@code c}; a negative 1509 * value if the value of this character is less than the value of 1510 * {@code c}. 1511 * @see java.lang.Comparable 1512 * @since 1.2 1513 */ 1514 public int compareTo(Character c) { 1515 return value - c.value; 1516 } 1517 1518 /** 1519 * Returns a {@code Character} instance for the {@code char} value passed. 1520 * <p> 1521 * If it is not necessary to get a new {@code Character} instance, it is 1522 * recommended to use this method instead of the constructor, since it 1523 * maintains a cache of instances which may result in better performance. 1524 * 1525 * @param c 1526 * the char value for which to get a {@code Character} instance. 1527 * @return the {@code Character} instance for {@code c}. 1528 * @since 1.5 1529 */ 1530 public static Character valueOf(char c) { 1531 return c < 128 ? SMALL_VALUES[c] : new Character(c); 1532 } 1533 1534 /** 1535 * A cache of instances used by {@link #valueOf(char)} and auto-boxing 1536 */ 1537 private static final Character[] SMALL_VALUES = new Character[128]; 1538 1539 static { 1540 for(int i = 0; i < 128; i++) { 1541 SMALL_VALUES[i] = new Character((char) i); 1542 } 1543 } 1544 /** 1545 * Indicates whether {@code codePoint} is a valid Unicode code point. 1546 * 1547 * @param codePoint 1548 * the code point to test. 1549 * @return {@code true} if {@code codePoint} is a valid Unicode code point; 1550 * {@code false} otherwise. 1551 * @since 1.5 1552 */ 1553 public static boolean isValidCodePoint(int codePoint) { 1554 return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1555 } 1556 1557 /** 1558 * Indicates whether {@code codePoint} is within the supplementary code 1559 * point range. 1560 * 1561 * @param codePoint 1562 * the code point to test. 1563 * @return {@code true} if {@code codePoint} is within the supplementary 1564 * code point range; {@code false} otherwise. 1565 * @since 1.5 1566 */ 1567 public static boolean isSupplementaryCodePoint(int codePoint) { 1568 return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1569 } 1570 1571 /** 1572 * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit 1573 * that is used for representing supplementary characters in UTF-16 1574 * encoding. 1575 * 1576 * @param ch 1577 * the character to test. 1578 * @return {@code true} if {@code ch} is a high-surrogate code unit; 1579 * {@code false} otherwise. 1580 * @see #isLowSurrogate(char) 1581 * @since 1.5 1582 */ 1583 public static boolean isHighSurrogate(char ch) { 1584 return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch); 1585 } 1586 1587 /** 1588 * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit 1589 * that is used for representing supplementary characters in UTF-16 1590 * encoding. 1591 * 1592 * @param ch 1593 * the character to test. 1594 * @return {@code true} if {@code ch} is a low-surrogate code unit; 1595 * {@code false} otherwise. 1596 * @see #isHighSurrogate(char) 1597 * @since 1.5 1598 */ 1599 public static boolean isLowSurrogate(char ch) { 1600 return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch); 1601 } 1602 1603 /** 1604 * Indicates whether the specified character pair is a valid surrogate pair. 1605 * 1606 * @param high 1607 * the high surrogate unit to test. 1608 * @param low 1609 * the low surrogate unit to test. 1610 * @return {@code true} if {@code high} is a high-surrogate code unit and 1611 * {@code low} is a low-surrogate code unit; {@code false} 1612 * otherwise. 1613 * @see #isHighSurrogate(char) 1614 * @see #isLowSurrogate(char) 1615 * @since 1.5 1616 */ 1617 public static boolean isSurrogatePair(char high, char low) { 1618 return (isHighSurrogate(high) && isLowSurrogate(low)); 1619 } 1620 1621 /** 1622 * Calculates the number of {@code char} values required to represent the 1623 * specified Unicode code point. This method checks if the {@code codePoint} 1624 * is greater than or equal to {@code 0x10000}, in which case {@code 2} is 1625 * returned, otherwise {@code 1}. To test if the code point is valid, use 1626 * the {@link #isValidCodePoint(int)} method. 1627 * 1628 * @param codePoint 1629 * the code point for which to calculate the number of required 1630 * chars. 1631 * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise. 1632 * @see #isValidCodePoint(int) 1633 * @see #isSupplementaryCodePoint(int) 1634 * @since 1.5 1635 */ 1636 public static int charCount(int codePoint) { 1637 return (codePoint >= 0x10000 ? 2 : 1); 1638 } 1639 1640 /** 1641 * Converts a surrogate pair into a Unicode code point. This method assumes 1642 * that the pair are valid surrogates. If the pair are <i>not</i> valid 1643 * surrogates, then the result is indeterminate. The 1644 * {@link #isSurrogatePair(char, char)} method should be used prior to this 1645 * method to validate the pair. 1646 * 1647 * @param high 1648 * the high surrogate unit. 1649 * @param low 1650 * the low surrogate unit. 1651 * @return the Unicode code point corresponding to the surrogate unit pair. 1652 * @see #isSurrogatePair(char, char) 1653 * @since 1.5 1654 */ 1655 public static int toCodePoint(char high, char low) { 1656 // See RFC 2781, Section 2.2 1657 // http://www.faqs.org/rfcs/rfc2781.html 1658 int h = (high & 0x3FF) << 10; 1659 int l = low & 0x3FF; 1660 return (h | l) + 0x10000; 1661 } 1662 1663 /** 1664 * Returns the code point at {@code index} in the specified sequence of 1665 * character units. If the unit at {@code index} is a high-surrogate unit, 1666 * {@code index + 1} is less than the length of the sequence and the unit at 1667 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1668 * point represented by the pair is returned; otherwise the {@code char} 1669 * value at {@code index} is returned. 1670 * 1671 * @param seq 1672 * the source sequence of {@code char} units. 1673 * @param index 1674 * the position in {@code seq} from which to retrieve the code 1675 * point. 1676 * @return the Unicode code point or {@code char} value at {@code index} in 1677 * {@code seq}. 1678 * @throws NullPointerException 1679 * if {@code seq} is {@code null}. 1680 * @throws IndexOutOfBoundsException 1681 * if the {@code index} is negative or greater than or equal to 1682 * the length of {@code seq}. 1683 * @since 1.5 1684 */ 1685 public static int codePointAt(CharSequence seq, int index) { 1686 if (seq == null) { 1687 throw new NullPointerException(); 1688 } 1689 int len = seq.length(); 1690 if (index < 0 || index >= len) { 1691 throw new IndexOutOfBoundsException(); 1692 } 1693 1694 char high = seq.charAt(index++); 1695 if (index >= len) { 1696 return high; 1697 } 1698 char low = seq.charAt(index); 1699 if (isSurrogatePair(high, low)) { 1700 return toCodePoint(high, low); 1701 } 1702 return high; 1703 } 1704 1705 /** 1706 * Returns the code point at {@code index} in the specified array of 1707 * character units. If the unit at {@code index} is a high-surrogate unit, 1708 * {@code index + 1} is less than the length of the array and the unit at 1709 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1710 * point represented by the pair is returned; otherwise the {@code char} 1711 * value at {@code index} is returned. 1712 * 1713 * @param seq 1714 * the source array of {@code char} units. 1715 * @param index 1716 * the position in {@code seq} from which to retrieve the code 1717 * point. 1718 * @return the Unicode code point or {@code char} value at {@code index} in 1719 * {@code seq}. 1720 * @throws NullPointerException 1721 * if {@code seq} is {@code null}. 1722 * @throws IndexOutOfBoundsException 1723 * if the {@code index} is negative or greater than or equal to 1724 * the length of {@code seq}. 1725 * @since 1.5 1726 */ 1727 public static int codePointAt(char[] seq, int index) { 1728 if (seq == null) { 1729 throw new NullPointerException(); 1730 } 1731 int len = seq.length; 1732 if (index < 0 || index >= len) { 1733 throw new IndexOutOfBoundsException(); 1734 } 1735 1736 char high = seq[index++]; 1737 if (index >= len) { 1738 return high; 1739 } 1740 char low = seq[index]; 1741 if (isSurrogatePair(high, low)) { 1742 return toCodePoint(high, low); 1743 } 1744 return high; 1745 } 1746 1747 /** 1748 * Returns the code point at {@code index} in the specified array of 1749 * character units, where {@code index} has to be less than {@code limit}. 1750 * If the unit at {@code index} is a high-surrogate unit, {@code index + 1} 1751 * is less than {@code limit} and the unit at {@code index + 1} is a 1752 * low-surrogate unit, then the supplementary code point represented by the 1753 * pair is returned; otherwise the {@code char} value at {@code index} is 1754 * returned. 1755 * 1756 * @param seq 1757 * the source array of {@code char} units. 1758 * @param index 1759 * the position in {@code seq} from which to get the code point. 1760 * @param limit 1761 * the index after the last unit in {@code seq} that can be used. 1762 * @return the Unicode code point or {@code char} value at {@code index} in 1763 * {@code seq}. 1764 * @throws NullPointerException 1765 * if {@code seq} is {@code null}. 1766 * @throws IndexOutOfBoundsException 1767 * if {@code index < 0}, {@code index >= limit}, 1768 * {@code limit < 0} or if {@code limit} is greater than the 1769 * length of {@code seq}. 1770 * @since 1.5 1771 */ 1772 public static int codePointAt(char[] seq, int index, int limit) { 1773 if (index < 0 || index >= limit || limit < 0 || limit > seq.length) { 1774 throw new IndexOutOfBoundsException(); 1775 } 1776 1777 char high = seq[index++]; 1778 if (index >= limit) { 1779 return high; 1780 } 1781 char low = seq[index]; 1782 if (isSurrogatePair(high, low)) { 1783 return toCodePoint(high, low); 1784 } 1785 return high; 1786 } 1787 1788 /** 1789 * Returns the code point that preceds {@code index} in the specified 1790 * sequence of character units. If the unit at {@code index - 1} is a 1791 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1792 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1793 * point represented by the pair is returned; otherwise the {@code char} 1794 * value at {@code index - 1} is returned. 1795 * 1796 * @param seq 1797 * the source sequence of {@code char} units. 1798 * @param index 1799 * the position in {@code seq} following the code 1800 * point that should be returned. 1801 * @return the Unicode code point or {@code char} value before {@code index} 1802 * in {@code seq}. 1803 * @throws NullPointerException 1804 * if {@code seq} is {@code null}. 1805 * @throws IndexOutOfBoundsException 1806 * if the {@code index} is less than 1 or greater than the 1807 * length of {@code seq}. 1808 * @since 1.5 1809 */ 1810 public static int codePointBefore(CharSequence seq, int index) { 1811 if (seq == null) { 1812 throw new NullPointerException(); 1813 } 1814 int len = seq.length(); 1815 if (index < 1 || index > len) { 1816 throw new IndexOutOfBoundsException(); 1817 } 1818 1819 char low = seq.charAt(--index); 1820 if (--index < 0) { 1821 return low; 1822 } 1823 char high = seq.charAt(index); 1824 if (isSurrogatePair(high, low)) { 1825 return toCodePoint(high, low); 1826 } 1827 return low; 1828 } 1829 1830 /** 1831 * Returns the code point that preceds {@code index} in the specified 1832 * array of character units. If the unit at {@code index - 1} is a 1833 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1834 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1835 * point represented by the pair is returned; otherwise the {@code char} 1836 * value at {@code index - 1} is returned. 1837 * 1838 * @param seq 1839 * the source array of {@code char} units. 1840 * @param index 1841 * the position in {@code seq} following the code 1842 * point that should be returned. 1843 * @return the Unicode code point or {@code char} value before {@code index} 1844 * in {@code seq}. 1845 * @throws NullPointerException 1846 * if {@code seq} is {@code null}. 1847 * @throws IndexOutOfBoundsException 1848 * if the {@code index} is less than 1 or greater than the 1849 * length of {@code seq}. 1850 * @since 1.5 1851 */ 1852 public static int codePointBefore(char[] seq, int index) { 1853 if (seq == null) { 1854 throw new NullPointerException(); 1855 } 1856 int len = seq.length; 1857 if (index < 1 || index > len) { 1858 throw new IndexOutOfBoundsException(); 1859 } 1860 1861 char low = seq[--index]; 1862 if (--index < 0) { 1863 return low; 1864 } 1865 char high = seq[index]; 1866 if (isSurrogatePair(high, low)) { 1867 return toCodePoint(high, low); 1868 } 1869 return low; 1870 } 1871 1872 /** 1873 * Returns the code point that preceds the {@code index} in the specified 1874 * array of character units and is not less than {@code start}. If the unit 1875 * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not 1876 * less than {@code start} and the unit at {@code index - 2} is a 1877 * high-surrogate unit, then the supplementary code point represented by the 1878 * pair is returned; otherwise the {@code char} value at {@code index - 1} 1879 * is returned. 1880 * 1881 * @param seq 1882 * the source array of {@code char} units. 1883 * @param index 1884 * the position in {@code seq} following the code point that 1885 * should be returned. 1886 * @param start 1887 * the index of the first element in {@code seq}. 1888 * @return the Unicode code point or {@code char} value before {@code index} 1889 * in {@code seq}. 1890 * @throws NullPointerException 1891 * if {@code seq} is {@code null}. 1892 * @throws IndexOutOfBoundsException 1893 * if the {@code index <= start}, {@code start < 0}, 1894 * {@code index} is greater than the length of {@code seq}, or 1895 * if {@code start} is equal or greater than the length of 1896 * {@code seq}. 1897 * @since 1.5 1898 */ 1899 public static int codePointBefore(char[] seq, int index, int start) { 1900 if (seq == null) { 1901 throw new NullPointerException(); 1902 } 1903 int len = seq.length; 1904 if (index <= start || index > len || start < 0 || start >= len) { 1905 throw new IndexOutOfBoundsException(); 1906 } 1907 1908 char low = seq[--index]; 1909 if (--index < start) { 1910 return low; 1911 } 1912 char high = seq[index]; 1913 if (isSurrogatePair(high, low)) { 1914 return toCodePoint(high, low); 1915 } 1916 return low; 1917 } 1918 1919 /** 1920 * Converts the specified Unicode code point into a UTF-16 encoded sequence 1921 * and copies the value(s) into the char array {@code dst}, starting at 1922 * index {@code dstIndex}. 1923 * 1924 * @param codePoint 1925 * the Unicode code point to encode. 1926 * @param dst 1927 * the destination array to copy the encoded value into. 1928 * @param dstIndex 1929 * the index in {@code dst} from where to start copying. 1930 * @return the number of {@code char} value units copied into {@code dst}. 1931 * @throws IllegalArgumentException 1932 * if {@code codePoint} is not a valid Unicode code point. 1933 * @throws NullPointerException 1934 * if {@code dst} is {@code null}. 1935 * @throws IndexOutOfBoundsException 1936 * if {@code dstIndex} is negative, greater than or equal to 1937 * {@code dst.length} or equals {@code dst.length - 1} when 1938 * {@code codePoint} is a 1939 * {@link #isSupplementaryCodePoint(int) supplementary code point}. 1940 * @since 1.5 1941 */ 1942 public static int toChars(int codePoint, char[] dst, int dstIndex) { 1943 if (!isValidCodePoint(codePoint)) { 1944 throw new IllegalArgumentException(); 1945 } 1946 if (dst == null) { 1947 throw new NullPointerException(); 1948 } 1949 if (dstIndex < 0 || dstIndex >= dst.length) { 1950 throw new IndexOutOfBoundsException(); 1951 } 1952 1953 if (isSupplementaryCodePoint(codePoint)) { 1954 if (dstIndex == dst.length - 1) { 1955 throw new IndexOutOfBoundsException(); 1956 } 1957 // See RFC 2781, Section 2.1 1958 // http://www.faqs.org/rfcs/rfc2781.html 1959 int cpPrime = codePoint - 0x10000; 1960 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 1961 int low = 0xDC00 | (cpPrime & 0x3FF); 1962 dst[dstIndex] = (char) high; 1963 dst[dstIndex + 1] = (char) low; 1964 return 2; 1965 } 1966 1967 dst[dstIndex] = (char) codePoint; 1968 return 1; 1969 } 1970 1971 /** 1972 * Converts the specified Unicode code point into a UTF-16 encoded sequence 1973 * and returns it as a char array. 1974 * 1975 * @param codePoint 1976 * the Unicode code point to encode. 1977 * @return the UTF-16 encoded char sequence. If {@code codePoint} is a 1978 * {@link #isSupplementaryCodePoint(int) supplementary code point}, 1979 * then the returned array contains two characters, otherwise it 1980 * contains just one character. 1981 * @throws IllegalArgumentException 1982 * if {@code codePoint} is not a valid Unicode code point. 1983 * @since 1.5 1984 */ 1985 public static char[] toChars(int codePoint) { 1986 if (!isValidCodePoint(codePoint)) { 1987 throw new IllegalArgumentException(); 1988 } 1989 1990 if (isSupplementaryCodePoint(codePoint)) { 1991 int cpPrime = codePoint - 0x10000; 1992 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 1993 int low = 0xDC00 | (cpPrime & 0x3FF); 1994 return new char[] { (char) high, (char) low }; 1995 } 1996 return new char[] { (char) codePoint }; 1997 } 1998 1999 /** 2000 * Counts the number of Unicode code points in the subsequence of the 2001 * specified character sequence, as delineated by {@code beginIndex} and 2002 * {@code endIndex}. Any surrogate values with missing pair values will be 2003 * counted as one code point. 2004 * 2005 * @param seq 2006 * the {@code CharSequence} to look through. 2007 * @param beginIndex 2008 * the inclusive index to begin counting at. 2009 * @param endIndex 2010 * the exclusive index to stop counting at. 2011 * @return the number of Unicode code points. 2012 * @throws NullPointerException 2013 * if {@code seq} is {@code null}. 2014 * @throws IndexOutOfBoundsException 2015 * if {@code beginIndex < 0}, {@code beginIndex > endIndex} or 2016 * if {@code endIndex} is greater than the length of {@code seq}. 2017 * @since 1.5 2018 */ 2019 public static int codePointCount(CharSequence seq, int beginIndex, 2020 int endIndex) { 2021 if (seq == null) { 2022 throw new NullPointerException(); 2023 } 2024 int len = seq.length(); 2025 if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) { 2026 throw new IndexOutOfBoundsException(); 2027 } 2028 2029 int result = 0; 2030 for (int i = beginIndex; i < endIndex; i++) { 2031 char c = seq.charAt(i); 2032 if (isHighSurrogate(c)) { 2033 if (++i < endIndex) { 2034 c = seq.charAt(i); 2035 if (!isLowSurrogate(c)) { 2036 result++; 2037 } 2038 } 2039 } 2040 result++; 2041 } 2042 return result; 2043 } 2044 2045 /** 2046 * Counts the number of Unicode code points in the subsequence of the 2047 * specified char array, as delineated by {@code offset} and {@code count}. 2048 * Any surrogate values with missing pair values will be counted as one code 2049 * point. 2050 * 2051 * @param seq 2052 * the char array to look through 2053 * @param offset 2054 * the inclusive index to begin counting at. 2055 * @param count 2056 * the number of {@code char} values to look through in 2057 * {@code seq}. 2058 * @return the number of Unicode code points. 2059 * @throws NullPointerException 2060 * if {@code seq} is {@code null}. 2061 * @throws IndexOutOfBoundsException 2062 * if {@code offset < 0}, {@code count < 0} or if 2063 * {@code offset + count} is greater than the length of 2064 * {@code seq}. 2065 * @since 1.5 2066 */ 2067 public static int codePointCount(char[] seq, int offset, int count) { 2068 if (seq == null) { 2069 throw new NullPointerException(); 2070 } 2071 int len = seq.length; 2072 int endIndex = offset + count; 2073 if (offset < 0 || count < 0 || endIndex > len) { 2074 throw new IndexOutOfBoundsException(); 2075 } 2076 2077 int result = 0; 2078 for (int i = offset; i < endIndex; i++) { 2079 char c = seq[i]; 2080 if (isHighSurrogate(c)) { 2081 if (++i < endIndex) { 2082 c = seq[i]; 2083 if (!isLowSurrogate(c)) { 2084 result++; 2085 } 2086 } 2087 } 2088 result++; 2089 } 2090 return result; 2091 } 2092 2093 /** 2094 * Determines the index in the specified character sequence that is offset 2095 * {@code codePointOffset} code points from {@code index}. 2096 * 2097 * @param seq 2098 * the character sequence to find the index in. 2099 * @param index 2100 * the start index in {@code seq}. 2101 * @param codePointOffset 2102 * the number of code points to look backwards or forwards; may 2103 * be a negative or positive value. 2104 * @return the index in {@code seq} that is {@code codePointOffset} code 2105 * points away from {@code index}. 2106 * @throws NullPointerException 2107 * if {@code seq} is {@code null}. 2108 * @throws IndexOutOfBoundsException 2109 * if {@code index < 0}, {@code index} is greater than the 2110 * length of {@code seq}, or if there are not enough values in 2111 * {@code seq} to skip {@code codePointOffset} code points 2112 * forwards or backwards (if {@code codePointOffset} is 2113 * negative) from {@code index}. 2114 * @since 1.5 2115 */ 2116 public static int offsetByCodePoints(CharSequence seq, int index, 2117 int codePointOffset) { 2118 if (seq == null) { 2119 throw new NullPointerException(); 2120 } 2121 int len = seq.length(); 2122 if (index < 0 || index > len) { 2123 throw new IndexOutOfBoundsException(); 2124 } 2125 2126 if (codePointOffset == 0) { 2127 return index; 2128 } 2129 2130 if (codePointOffset > 0) { 2131 int codePoints = codePointOffset; 2132 int i = index; 2133 while (codePoints > 0) { 2134 codePoints--; 2135 if (i >= len) { 2136 throw new IndexOutOfBoundsException(); 2137 } 2138 if (isHighSurrogate(seq.charAt(i))) { 2139 int next = i + 1; 2140 if (next < len && isLowSurrogate(seq.charAt(next))) { 2141 i++; 2142 } 2143 } 2144 i++; 2145 } 2146 return i; 2147 } 2148 2149 assert codePointOffset < 0; 2150 int codePoints = -codePointOffset; 2151 int i = index; 2152 while (codePoints > 0) { 2153 codePoints--; 2154 i--; 2155 if (i < 0) { 2156 throw new IndexOutOfBoundsException(); 2157 } 2158 if (isLowSurrogate(seq.charAt(i))) { 2159 int prev = i - 1; 2160 if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) { 2161 i--; 2162 } 2163 } 2164 } 2165 return i; 2166 } 2167 2168 /** 2169 * Determines the index in a subsequence of the specified character array 2170 * that is offset {@code codePointOffset} code points from {@code index}. 2171 * The subsequence is delineated by {@code start} and {@code count}. 2172 * 2173 * @param seq 2174 * the character array to find the index in. 2175 * @param start 2176 * the inclusive index that marks the beginning of the 2177 * subsequence. 2178 * @param count 2179 * the number of {@code char} values to include within the 2180 * subsequence. 2181 * @param index 2182 * the start index in the subsequence of the char array. 2183 * @param codePointOffset 2184 * the number of code points to look backwards or forwards; may 2185 * be a negative or positive value. 2186 * @return the index in {@code seq} that is {@code codePointOffset} code 2187 * points away from {@code index}. 2188 * @throws NullPointerException 2189 * if {@code seq} is {@code null}. 2190 * @throws IndexOutOfBoundsException 2191 * if {@code start < 0}, {@code count < 0}, 2192 * {@code index < start}, {@code index > start + count}, 2193 * {@code start + count} is greater than the length of 2194 * {@code seq}, or if there are not enough values in 2195 * {@code seq} to skip {@code codePointOffset} code points 2196 * forward or backward (if {@code codePointOffset} is 2197 * negative) from {@code index}. 2198 * @since 1.5 2199 */ 2200 public static int offsetByCodePoints(char[] seq, int start, int count, 2201 int index, int codePointOffset) { 2202 if (seq == null) { 2203 throw new NullPointerException(); 2204 } 2205 int end = start + count; 2206 if (start < 0 || count < 0 || end > seq.length || index < start 2207 || index > end) { 2208 throw new IndexOutOfBoundsException(); 2209 } 2210 2211 if (codePointOffset == 0) { 2212 return index; 2213 } 2214 2215 if (codePointOffset > 0) { 2216 int codePoints = codePointOffset; 2217 int i = index; 2218 while (codePoints > 0) { 2219 codePoints--; 2220 if (i >= end) { 2221 throw new IndexOutOfBoundsException(); 2222 } 2223 if (isHighSurrogate(seq[i])) { 2224 int next = i + 1; 2225 if (next < end && isLowSurrogate(seq[next])) { 2226 i++; 2227 } 2228 } 2229 i++; 2230 } 2231 return i; 2232 } 2233 2234 assert codePointOffset < 0; 2235 int codePoints = -codePointOffset; 2236 int i = index; 2237 while (codePoints > 0) { 2238 codePoints--; 2239 i--; 2240 if (i < start) { 2241 throw new IndexOutOfBoundsException(); 2242 } 2243 if (isLowSurrogate(seq[i])) { 2244 int prev = i - 1; 2245 if (prev >= start && isHighSurrogate(seq[prev])) { 2246 i--; 2247 } 2248 } 2249 } 2250 return i; 2251 } 2252 2253 /** 2254 * Convenience method to determine the value of the specified character 2255 * {@code c} in the supplied radix. The value of {@code radix} must be 2256 * between MIN_RADIX and MAX_RADIX. 2257 * 2258 * @param c 2259 * the character to determine the value of. 2260 * @param radix 2261 * the radix. 2262 * @return the value of {@code c} in {@code radix} if {@code radix} lies 2263 * between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise. 2264 */ 2265 public static int digit(char c, int radix) { 2266 // BEGIN android-changed 2267 // if (radix >= MIN_RADIX && radix <= MAX_RADIX) { 2268 // if (c < 128) { 2269 // // Optimized for ASCII 2270 // int result = -1; 2271 // if ('0' <= c && c <= '9') { 2272 // result = c - '0'; 2273 // } else if ('a' <= c && c <= 'z') { 2274 // result = c - ('a' - 10); 2275 // } else if ('A' <= c && c <= 'Z') { 2276 // result = c - ('A' - 10); 2277 // } 2278 // return result < radix ? result : -1; 2279 // } 2280 // int result = BinarySearch.binarySearchRange(digitKeys, c); 2281 // if (result >= 0 && c <= digitValues[result * 2]) { 2282 // int value = (char) (c - digitValues[result * 2 + 1]); 2283 // if (value >= radix) { 2284 // return -1; 2285 // } 2286 // return value; 2287 // } 2288 // } 2289 // return -1; 2290 return UCharacter.digit(c, radix); 2291 // ENd android-changed 2292 } 2293 2294 /** 2295 * Convenience method to determine the value of the character 2296 * {@code codePoint} in the supplied radix. The value of {@code radix} must 2297 * be between MIN_RADIX and MAX_RADIX. 2298 * 2299 * @param codePoint 2300 * the character, including supplementary characters. 2301 * @param radix 2302 * the radix. 2303 * @return if {@code radix} lies between {@link #MIN_RADIX} and 2304 * {@link #MAX_RADIX} then the value of the character in the radix; 2305 * -1 otherwise. 2306 */ 2307 public static int digit(int codePoint, int radix) { 2308 return UCharacter.digit(codePoint, radix); 2309 } 2310 2311 /** 2312 * Compares this object with the specified object and indicates if they are 2313 * equal. In order to be equal, {@code object} must be an instance of 2314 * {@code Character} and have the same char value as this object. 2315 * 2316 * @param object 2317 * the object to compare this double with. 2318 * @return {@code true} if the specified object is equal to this 2319 * {@code Character}; {@code false} otherwise. 2320 */ 2321 @Override 2322 public boolean equals(Object object) { 2323 return (object instanceof Character) 2324 && (value == ((Character) object).value); 2325 } 2326 2327 /** 2328 * Returns the character which represents the specified digit in the 2329 * specified radix. The {@code radix} must be between {@code MIN_RADIX} and 2330 * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and 2331 * smaller than {@code radix}. If any of these conditions does not hold, 0 2332 * is returned. 2333 * 2334 * @param digit 2335 * the integer value. 2336 * @param radix 2337 * the radix. 2338 * @return the character which represents the {@code digit} in the 2339 * {@code radix}. 2340 */ 2341 public static char forDigit(int digit, int radix) { 2342 if (MIN_RADIX <= radix && radix <= MAX_RADIX) { 2343 if (0 <= digit && digit < radix) { 2344 return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10); 2345 } 2346 } 2347 return 0; 2348 } 2349 2350 /** 2351 * Gets the numeric value of the specified Unicode character. 2352 * 2353 * @param c 2354 * the Unicode character to get the numeric value of. 2355 * @return a non-negative numeric integer value if a numeric value for 2356 * {@code c} exists, -1 if there is no numeric value for {@code c}, 2357 * -2 if the numeric value can not be represented with an integer. 2358 */ 2359 public static int getNumericValue(char c) { 2360 // BEGIN android-changed 2361 // if (c < 128) { 2362 // // Optimized for ASCII 2363 // if (c >= '0' && c <= '9') { 2364 // return c - '0'; 2365 // } 2366 // if (c >= 'a' && c <= 'z') { 2367 // return c - ('a' - 10); 2368 // } 2369 // if (c >= 'A' && c <= 'Z') { 2370 // return c - ('A' - 10); 2371 // } 2372 // return -1; 2373 // } 2374 // int result = BinarySearch.binarySearchRange(numericKeys, c); 2375 // if (result >= 0 && c <= numericValues[result * 2]) { 2376 // char difference = numericValues[result * 2 + 1]; 2377 // if (difference == 0) { 2378 // return -2; 2379 // } 2380 // // Value is always positive, must be negative value 2381 // if (difference > c) { 2382 // return c - (short) difference; 2383 // } 2384 // return c - difference; 2385 // } 2386 // return -1; 2387 return UCharacter.getNumericValue(c); 2388 // END android-changed 2389 } 2390 2391 /** 2392 * Gets the numeric value of the specified Unicode code point. For example, 2393 * the code point '\u216B' stands for the Roman number XII, which has the 2394 * numeric value 12. 2395 * 2396 * @param codePoint 2397 * the Unicode code point to get the numeric value of. 2398 * @return a non-negative numeric integer value if a numeric value for 2399 * {@code codePoint} exists, -1 if there is no numeric value for 2400 * {@code codePoint}, -2 if the numeric value can not be 2401 * represented with an integer. 2402 */ 2403 public static int getNumericValue(int codePoint) { 2404 return UCharacter.getNumericValue(codePoint); 2405 } 2406 2407 /** 2408 * Gets the general Unicode category of the specified character. 2409 * 2410 * @param c 2411 * the character to get the category of. 2412 * @return the Unicode category of {@code c}. 2413 */ 2414 public static int getType(char c) { 2415 // BEGIN android-changed 2416 // if(c < 1000) { 2417 // return typeValuesCache[(int)c]; 2418 // } 2419 // int result = BinarySearch.binarySearchRange(typeKeys, c); 2420 // int high = typeValues[result * 2]; 2421 // if (c <= high) { 2422 // int code = typeValues[result * 2 + 1]; 2423 // if (code < 0x100) { 2424 // return code; 2425 // } 2426 // return (c & 1) == 1 ? code >> 8 : code & 0xff; 2427 // } 2428 // return UNASSIGNED; 2429 return getType((int) c); 2430 // END android-changed 2431 } 2432 2433 /** 2434 * Gets the general Unicode category of the specified code point. 2435 * 2436 * @param codePoint 2437 * the Unicode code point to get the category of. 2438 * @return the Unicode category of {@code codePoint}. 2439 */ 2440 public static int getType(int codePoint) { 2441 // BEGIN android-changed 2442 // if (codePoint < 1000 && codePoint > 0) { 2443 // return typeValuesCache[codePoint]; 2444 // } 2445 // END android-changed 2446 int type = UCharacter.getType(codePoint); 2447 2448 // the type values returned by UCharacter are not compatible with what 2449 // the spec says.RI's Character type values skip the value 17. 2450 if (type <= Character.FORMAT) { 2451 return type; 2452 } 2453 return (type + 1); 2454 } 2455 2456 /** 2457 * Gets the Unicode directionality of the specified character. 2458 * 2459 * @param c 2460 * the character to get the directionality of. 2461 * @return the Unicode directionality of {@code c}. 2462 */ 2463 public static byte getDirectionality(char c) { 2464 // BEGIN android-changed 2465 // int result = BinarySearch.binarySearchRange(bidiKeys, c); 2466 // int high = bidiValues[result * 2]; 2467 // if (c <= high) { 2468 // int code = bidiValues[result * 2 + 1]; 2469 // if (code < 0x100) { 2470 // return (byte) (code - 1); 2471 // } 2472 // return (byte) (((c & 1) == 1 ? code >> 8 : code & 0xff) - 1); 2473 // } 2474 // return DIRECTIONALITY_UNDEFINED; 2475 return getDirectionality((int)c); 2476 // END android-changed 2477 } 2478 2479 /** 2480 * Gets the Unicode directionality of the specified character. 2481 * 2482 * @param codePoint 2483 * the Unicode code point to get the directionality of. 2484 * @return the Unicode directionality of {@code codePoint}. 2485 */ 2486 public static byte getDirectionality(int codePoint) { 2487 if (getType(codePoint) == Character.UNASSIGNED) { 2488 return Character.DIRECTIONALITY_UNDEFINED; 2489 } 2490 2491 byte UCDirectionality = UCharacter.getDirectionality(codePoint); 2492 if (UCDirectionality == -1) { 2493 return -1; 2494 } 2495 return DIRECTIONALITY[UCDirectionality]; 2496 } 2497 2498 /** 2499 * Indicates whether the specified character is mirrored. 2500 * 2501 * @param c 2502 * the character to check. 2503 * @return {@code true} if {@code c} is mirrored; {@code false} 2504 * otherwise. 2505 */ 2506 public static boolean isMirrored(char c) { 2507 // BEGIN android-changed 2508 // int value = c / 16; 2509 // if (value >= mirrored.length) { 2510 // return false; 2511 // } 2512 // int bit = 1 << (c % 16); 2513 // return (mirrored[value] & bit) != 0; 2514 return isMirrored((int)c); 2515 // ENd android-changed 2516 } 2517 2518 /** 2519 * Indicates whether the specified code point is mirrored. 2520 * 2521 * @param codePoint 2522 * the code point to check. 2523 * @return {@code true} if {@code codePoint} is mirrored, {@code false} 2524 * otherwise. 2525 */ 2526 public static boolean isMirrored(int codePoint) { 2527 return UCharacter.isMirrored(codePoint); 2528 } 2529 2530 @Override 2531 public int hashCode() { 2532 return value; 2533 } 2534 2535 /** 2536 * Indicates whether the specified character is defined in the Unicode 2537 * specification. 2538 * 2539 * @param c 2540 * the character to check. 2541 * @return {@code true} if the general Unicode category of the character is 2542 * not {@code UNASSIGNED}; {@code false} otherwise. 2543 */ 2544 public static boolean isDefined(char c) { 2545 // BEGIN android-changed 2546 // return getType(c) != UNASSIGNED; 2547 return UCharacter.isDefined(c); 2548 // END android-changed 2549 } 2550 2551 /** 2552 * Indicates whether the specified code point is defined in the Unicode 2553 * specification. 2554 * 2555 * @param codePoint 2556 * the code point to check. 2557 * @return {@code true} if the general Unicode category of the code point is 2558 * not {@code UNASSIGNED}; {@code false} otherwise. 2559 */ 2560 public static boolean isDefined(int codePoint) { 2561 return UCharacter.isDefined(codePoint); 2562 } 2563 2564 /** 2565 * Indicates whether the specified character is a digit. 2566 * 2567 * @param c 2568 * the character to check. 2569 * @return {@code true} if {@code c} is a digit; {@code false} 2570 * otherwise. 2571 */ 2572 public static boolean isDigit(char c) { 2573 // Optimized case for ASCII 2574 if ('0' <= c && c <= '9') { 2575 return true; 2576 } 2577 if (c < 1632) { 2578 return false; 2579 } 2580 // BEGIN android-changed 2581 return UCharacter.isDigit(c); 2582 // END android-changed 2583 } 2584 2585 /** 2586 * Indicates whether the specified code point is a digit. 2587 * 2588 * @param codePoint 2589 * the code point to check. 2590 * @return {@code true} if {@code codePoint} is a digit; {@code false} 2591 * otherwise. 2592 */ 2593 public static boolean isDigit(int codePoint) { 2594 return UCharacter.isDigit(codePoint); 2595 } 2596 2597 /** 2598 * Indicates whether the specified character is ignorable in a Java or 2599 * Unicode identifier. 2600 * 2601 * @param c 2602 * the character to check. 2603 * @return {@code true} if {@code c} is ignorable; {@code false} otherwise. 2604 */ 2605 public static boolean isIdentifierIgnorable(char c) { 2606 // BEGIN android-changed 2607 // return (c >= 0 && c <= 8) || (c >= 0xe && c <= 0x1b) 2608 // || (c >= 0x7f && c <= 0x9f) || getType(c) == FORMAT; 2609 return UCharacter.isIdentifierIgnorable(c); 2610 // END android-changed 2611 } 2612 2613 /** 2614 * Indicates whether the specified code point is ignorable in a Java or 2615 * Unicode identifier. 2616 * 2617 * @param codePoint 2618 * the code point to check. 2619 * @return {@code true} if {@code codePoint} is ignorable; {@code false} 2620 * otherwise. 2621 */ 2622 public static boolean isIdentifierIgnorable(int codePoint) { 2623 return UCharacter.isIdentifierIgnorable(codePoint); 2624 } 2625 2626 /** 2627 * Indicates whether the specified character is an ISO control character. 2628 * 2629 * @param c 2630 * the character to check. 2631 * @return {@code true} if {@code c} is an ISO control character; 2632 * {@code false} otherwise. 2633 */ 2634 public static boolean isISOControl(char c) { 2635 return isISOControl((int)c); 2636 } 2637 2638 /** 2639 * Indicates whether the specified code point is an ISO control character. 2640 * 2641 * @param c 2642 * the code point to check. 2643 * @return {@code true} if {@code c} is an ISO control character; 2644 * {@code false} otherwise. 2645 */ 2646 public static boolean isISOControl(int c) { 2647 return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f); 2648 } 2649 2650 /** 2651 * Indicates whether the specified character is a valid part of a Java 2652 * identifier other than the first character. 2653 * 2654 * @param c 2655 * the character to check. 2656 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2657 * {@code false} otherwise. 2658 */ 2659 public static boolean isJavaIdentifierPart(char c) { 2660 // Optimized case for ASCII 2661 if (c < 128) { 2662 return (typeTags[c] & ISJAVAPART) != 0; 2663 } 2664 2665 int type = getType(c); 2666 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2667 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 2668 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2669 || type == NON_SPACING_MARK || type == COMBINING_SPACING_MARK 2670 || (c >= 0x80 && c <= 0x9f) || type == FORMAT; 2671 } 2672 2673 /** 2674 * Indicates whether the specified code point is a valid part of a Java 2675 * identifier other than the first character. 2676 * 2677 * @param codePoint 2678 * the code point to check. 2679 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2680 * {@code false} otherwise. 2681 */ 2682 public static boolean isJavaIdentifierPart(int codePoint) { 2683 int type = getType(codePoint); 2684 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2685 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 2686 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2687 || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK 2688 || isIdentifierIgnorable(codePoint); 2689 } 2690 2691 /** 2692 * Indicates whether the specified character is a valid first character for 2693 * a Java identifier. 2694 * 2695 * @param c 2696 * the character to check. 2697 * @return {@code true} if {@code c} is a valid first character of a Java 2698 * identifier; {@code false} otherwise. 2699 */ 2700 public static boolean isJavaIdentifierStart(char c) { 2701 // Optimized case for ASCII 2702 if (c < 128) { 2703 return (typeTags[c] & ISJAVASTART) != 0; 2704 } 2705 2706 int type = getType(c); 2707 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2708 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 2709 || type == LETTER_NUMBER; 2710 } 2711 2712 /** 2713 * Indicates whether the specified code point is a valid start for a Java 2714 * identifier. 2715 * 2716 * @param codePoint 2717 * the code point to check. 2718 * @return {@code true} if {@code codePoint} is a valid start of a Java 2719 * identifier; {@code false} otherwise. 2720 */ 2721 public static boolean isJavaIdentifierStart(int codePoint) { 2722 int type = getType(codePoint); 2723 return isLetter(codePoint) || type == CURRENCY_SYMBOL 2724 || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER; 2725 } 2726 2727 /** 2728 * Indicates whether the specified character is a Java letter. 2729 * 2730 * @param c 2731 * the character to check. 2732 * @return {@code true} if {@code c} is a Java letter; {@code false} 2733 * otherwise. 2734 * @deprecated Use {@link #isJavaIdentifierStart(char)} 2735 */ 2736 @Deprecated 2737 public static boolean isJavaLetter(char c) { 2738 return isJavaIdentifierStart(c); 2739 } 2740 2741 /** 2742 * Indicates whether the specified character is a Java letter or digit 2743 * character. 2744 * 2745 * @param c 2746 * the character to check. 2747 * @return {@code true} if {@code c} is a Java letter or digit; 2748 * {@code false} otherwise. 2749 * @deprecated Use {@link #isJavaIdentifierPart(char)} 2750 */ 2751 @Deprecated 2752 public static boolean isJavaLetterOrDigit(char c) { 2753 return isJavaIdentifierPart(c); 2754 } 2755 2756 /** 2757 * Indicates whether the specified character is a letter. 2758 * 2759 * @param c 2760 * the character to check. 2761 * @return {@code true} if {@code c} is a letter; {@code false} otherwise. 2762 */ 2763 public static boolean isLetter(char c) { 2764 // BEGIN android-changed 2765 // if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) { 2766 // return true; 2767 // } 2768 // if (c < 128) { 2769 // return false; 2770 // } 2771 // int type = getType(c); 2772 // return type >= UPPERCASE_LETTER && type <= OTHER_LETTER; 2773 return UCharacter.isLetter(c); 2774 // END android-changed 2775 } 2776 2777 /** 2778 * Indicates whether the specified code point is a letter. 2779 * 2780 * @param codePoint 2781 * the code point to check. 2782 * @return {@code true} if {@code codePoint} is a letter; {@code false} 2783 * otherwise. 2784 */ 2785 public static boolean isLetter(int codePoint) { 2786 return UCharacter.isLetter(codePoint); 2787 } 2788 2789 /** 2790 * Indicates whether the specified character is a letter or a digit. 2791 * 2792 * @param c 2793 * the character to check. 2794 * @return {@code true} if {@code c} is a letter or a digit; {@code false} 2795 * otherwise. 2796 */ 2797 public static boolean isLetterOrDigit(char c) { 2798 // BEGIN android-changed 2799 // int type = getType(c); 2800 // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2801 // || type == DECIMAL_DIGIT_NUMBER; 2802 return UCharacter.isLetterOrDigit(c); 2803 // END andorid-changed 2804 } 2805 2806 /** 2807 * Indicates whether the specified code point is a letter or a digit. 2808 * 2809 * @param codePoint 2810 * the code point to check. 2811 * @return {@code true} if {@code codePoint} is a letter or a digit; 2812 * {@code false} otherwise. 2813 */ 2814 public static boolean isLetterOrDigit(int codePoint) { 2815 return UCharacter.isLetterOrDigit(codePoint); 2816 } 2817 2818 /** 2819 * Indicates whether the specified character is a lower case letter. 2820 * 2821 * @param c 2822 * the character to check. 2823 * @return {@code true} if {@code c} is a lower case letter; {@code false} 2824 * otherwise. 2825 */ 2826 public static boolean isLowerCase(char c) { 2827 // BEGIN android-changed 2828 // // Optimized case for ASCII 2829 // if ('a' <= c && c <= 'z') { 2830 // return true; 2831 // } 2832 // if (c < 128) { 2833 // return false; 2834 // } 2835 // 2836 // return getType(c) == LOWERCASE_LETTER; 2837 return UCharacter.isLowerCase(c); 2838 // END android-changed 2839 } 2840 2841 /** 2842 * Indicates whether the specified code point is a lower case letter. 2843 * 2844 * @param codePoint 2845 * the code point to check. 2846 * @return {@code true} if {@code codePoint} is a lower case letter; 2847 * {@code false} otherwise. 2848 */ 2849 public static boolean isLowerCase(int codePoint) { 2850 return UCharacter.isLowerCase(codePoint); 2851 } 2852 2853 /** 2854 * Indicates whether the specified character is a Java space. 2855 * 2856 * @param c 2857 * the character to check. 2858 * @return {@code true} if {@code c} is a Java space; {@code false} 2859 * otherwise. 2860 * @deprecated Use {@link #isWhitespace(char)} 2861 */ 2862 @Deprecated 2863 public static boolean isSpace(char c) { 2864 return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' '; 2865 } 2866 2867 /** 2868 * Indicates whether the specified character is a Unicode space character. 2869 * That is, if it is a member of one of the Unicode categories Space 2870 * Separator, Line Separator, or Paragraph Separator. 2871 * 2872 * @param c 2873 * the character to check. 2874 * @return {@code true} if {@code c} is a Unicode space character, 2875 * {@code false} otherwise. 2876 */ 2877 public static boolean isSpaceChar(char c) { 2878 // BEGIN android-changed 2879 // if (c == 0x20 || c == 0xa0 || c == 0x1680) { 2880 // return true; 2881 // } 2882 // if (c < 0x2000) { 2883 // return false; 2884 // } 2885 // return c <= 0x200b || c == 0x2028 || c == 0x2029 || c == 0x202f 2886 // || c == 0x3000; 2887 return UCharacter.isSpaceChar(c); 2888 // END android-changed 2889 } 2890 2891 /** 2892 * Indicates whether the specified code point is a Unicode space character. 2893 * That is, if it is a member of one of the Unicode categories Space 2894 * Separator, Line Separator, or Paragraph Separator. 2895 * 2896 * @param codePoint 2897 * the code point to check. 2898 * @return {@code true} if {@code codePoint} is a Unicode space character, 2899 * {@code false} otherwise. 2900 */ 2901 public static boolean isSpaceChar(int codePoint) { 2902 return UCharacter.isSpaceChar(codePoint); 2903 } 2904 2905 /** 2906 * Indicates whether the specified character is a titlecase character. 2907 * 2908 * @param c 2909 * the character to check. 2910 * @return {@code true} if {@code c} is a titlecase character, {@code false} 2911 * otherwise. 2912 */ 2913 public static boolean isTitleCase(char c) { 2914 // BEGIN android-changed 2915 // if (c == '\u01c5' || c == '\u01c8' || c == '\u01cb' || c == '\u01f2') { 2916 // return true; 2917 // } 2918 // if (c >= '\u1f88' && c <= '\u1ffc') { 2919 // // 0x1f88 - 0x1f8f, 0x1f98 - 0x1f9f, 0x1fa8 - 0x1faf 2920 // if (c > '\u1faf') { 2921 // return c == '\u1fbc' || c == '\u1fcc' || c == '\u1ffc'; 2922 // } 2923 // int last = c & 0xf; 2924 // return last >= 8 && last <= 0xf; 2925 // } 2926 // return false; 2927 return UCharacter.isTitleCase(c); 2928 // END android-changed 2929 } 2930 2931 /** 2932 * Indicates whether the specified code point is a titlecase character. 2933 * 2934 * @param codePoint 2935 * the code point to check. 2936 * @return {@code true} if {@code codePoint} is a titlecase character, 2937 * {@code false} otherwise. 2938 */ 2939 public static boolean isTitleCase(int codePoint) { 2940 return UCharacter.isTitleCase(codePoint); 2941 } 2942 2943 /** 2944 * Indicates whether the specified character is valid as part of a Unicode 2945 * identifier other than the first character. 2946 * 2947 * @param c 2948 * the character to check. 2949 * @return {@code true} if {@code c} is valid as part of a Unicode 2950 * identifier; {@code false} otherwise. 2951 */ 2952 public static boolean isUnicodeIdentifierPart(char c) { 2953 // BEGIN android-changed 2954 // int type = getType(c); 2955 // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2956 // || type == CONNECTOR_PUNCTUATION 2957 // || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2958 // || type == NON_SPACING_MARK || type == COMBINING_SPACING_MARK 2959 // || isIdentifierIgnorable(c); 2960 return UCharacter.isUnicodeIdentifierPart(c); 2961 // END android-changed 2962 } 2963 2964 /** 2965 * Indicates whether the specified code point is valid as part of a Unicode 2966 * identifier other than the first character. 2967 * 2968 * @param codePoint 2969 * the code point to check. 2970 * @return {@code true} if {@code codePoint} is valid as part of a Unicode 2971 * identifier; {@code false} otherwise. 2972 */ 2973 public static boolean isUnicodeIdentifierPart(int codePoint) { 2974 return UCharacter.isUnicodeIdentifierPart(codePoint); 2975 } 2976 2977 /** 2978 * Indicates whether the specified character is a valid initial character 2979 * for a Unicode identifier. 2980 * 2981 * @param c 2982 * the character to check. 2983 * @return {@code true} if {@code c} is a valid first character for a 2984 * Unicode identifier; {@code false} otherwise. 2985 */ 2986 public static boolean isUnicodeIdentifierStart(char c) { 2987 // BEGIN android-changed 2988 // int type = getType(c); 2989 // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2990 // || type == LETTER_NUMBER; 2991 return UCharacter.isUnicodeIdentifierStart(c); 2992 // END android-changed 2993 } 2994 2995 /** 2996 * Indicates whether the specified code point is a valid initial character 2997 * for a Unicode identifier. 2998 * 2999 * @param codePoint 3000 * the code point to check. 3001 * @return {@code true} if {@code codePoint} is a valid first character for 3002 * a Unicode identifier; {@code false} otherwise. 3003 */ 3004 public static boolean isUnicodeIdentifierStart(int codePoint) { 3005 return UCharacter.isUnicodeIdentifierStart(codePoint); 3006 } 3007 3008 /** 3009 * Indicates whether the specified character is an upper case letter. 3010 * 3011 * @param c 3012 * the character to check. 3013 * @return {@code true} if {@code c} is a upper case letter; {@code false} 3014 * otherwise. 3015 */ 3016 public static boolean isUpperCase(char c) { 3017 // Optimized case for ASCII 3018 if ('A' <= c && c <= 'Z') { 3019 return true; 3020 } 3021 if (c < 128) { 3022 return false; 3023 } 3024 // BEGIN android-changed 3025 return UCharacter.isUpperCase(c); 3026 // END android-changed 3027 } 3028 3029 /** 3030 * Indicates whether the specified code point is an upper case letter. 3031 * 3032 * @param codePoint 3033 * the code point to check. 3034 * @return {@code true} if {@code codePoint} is a upper case letter; 3035 * {@code false} otherwise. 3036 */ 3037 public static boolean isUpperCase(int codePoint) { 3038 return UCharacter.isUpperCase(codePoint); 3039 } 3040 3041 /** 3042 * Indicates whether the specified character is a whitespace character in 3043 * Java. 3044 * 3045 * @param c 3046 * the character to check. 3047 * @return {@code true} if the supplied {@code c} is a whitespace character 3048 * in Java; {@code false} otherwise. 3049 */ 3050 public static boolean isWhitespace(char c) { 3051 // BEGIN android-changed 3052 // // Optimized case for ASCII 3053 // if ((c >= 0x1c && c <= 0x20) || (c >= 0x9 && c <= 0xd)) { 3054 // return true; 3055 // } 3056 // if (c == 0x1680) { 3057 // return true; 3058 // } 3059 // if (c < 0x2000 || c == 0x2007) { 3060 // return false; 3061 // } 3062 // return c <= 0x200b || c == 0x2028 || c == 0x2029 || c == 0x3000; 3063 return UCharacter.isWhitespace(c); 3064 // END android-changed 3065 } 3066 3067 /** 3068 * Indicates whether the specified code point is a whitespace character in 3069 * Java. 3070 * 3071 * @param codePoint 3072 * the code point to check. 3073 * @return {@code true} if the supplied {@code c} is a whitespace character 3074 * in Java; {@code false} otherwise. 3075 */ 3076 public static boolean isWhitespace(int codePoint) { 3077 //FIXME depends on ICU when the codePoint is '\u2007' 3078 return UCharacter.isWhitespace(codePoint); 3079 3080 } 3081 3082 /** 3083 * Reverses the order of the first and second byte in the specified 3084 * character. 3085 * 3086 * @param c 3087 * the character to reverse. 3088 * @return the character with reordered bytes. 3089 */ 3090 public static char reverseBytes(char c) { 3091 return (char)((c<<8) | (c>>8)); 3092 } 3093 3094 /** 3095 * Returns the lower case equivalent for the specified character if the 3096 * character is an upper case letter. Otherwise, the specified character is 3097 * returned unchanged. 3098 * 3099 * @param c 3100 * the character 3101 * @return if {@code c} is an upper case character then its lower case 3102 * counterpart, otherwise just {@code c}. 3103 */ 3104 public static char toLowerCase(char c) { 3105 // BEGIN android-changed 3106 // // Optimized case for ASCII 3107 // if ('A' <= c && c <= 'Z') { 3108 // return (char) (c + ('a' - 'A')); 3109 // } 3110 // if (c < 192) {// || c == 215 || (c > 222 && c < 256)) { 3111 // return c; 3112 // } 3113 // if (c<1000) { 3114 // return (char)lowercaseValuesCache[c-192]; 3115 // } 3116 // 3117 // int result = BinarySearch.binarySearchRange(lowercaseKeys, c); 3118 // if (result >= 0) { 3119 // boolean by2 = false; 3120 // char start = lowercaseKeys.charAt(result); 3121 // char end = lowercaseValues[result * 2]; 3122 // if ((start & 0x8000) != (end & 0x8000)) { 3123 // end ^= 0x8000; 3124 // by2 = true; 3125 // } 3126 // if (c <= end) { 3127 // if (by2 && (c & 1) != (start & 1)) { 3128 // return c; 3129 // } 3130 // char mapping = lowercaseValues[result * 2 + 1]; 3131 // return (char) (c + mapping); 3132 // } 3133 // } 3134 // return c; 3135 return (char)UCharacter.toLowerCase(c); 3136 // END android-changed 3137 } 3138 3139 /** 3140 * Returns the lower case equivalent for the specified code point if it is 3141 * an upper case letter. Otherwise, the specified code point is returned 3142 * unchanged. 3143 * 3144 * @param codePoint 3145 * the code point to check. 3146 * @return if {@code codePoint} is an upper case character then its lower 3147 * case counterpart, otherwise just {@code codePoint}. 3148 */ 3149 public static int toLowerCase(int codePoint) { 3150 return UCharacter.toLowerCase(codePoint); 3151 } 3152 3153 @Override 3154 public String toString() { 3155 return String.valueOf(value); 3156 } 3157 3158 /** 3159 * Converts the specified character to its string representation. 3160 * 3161 * @param value 3162 * the character to convert. 3163 * @return the character converted to a string. 3164 */ 3165 public static String toString(char value) { 3166 return String.valueOf(value); 3167 } 3168 3169 /** 3170 * Returns the title case equivalent for the specified character if it 3171 * exists. Otherwise, the specified character is returned unchanged. 3172 * 3173 * @param c 3174 * the character to convert. 3175 * @return the title case equivalent of {@code c} if it exists, otherwise 3176 * {@code c}. 3177 */ 3178 public static char toTitleCase(char c) { 3179 // BEGIN android-changed 3180 // if (isTitleCase(c)) { 3181 // return c; 3182 // } 3183 // int result = BinarySearch.binarySearch(titlecaseKeys, c); 3184 // if (result >= 0) { 3185 // return titlecaseValues[result]; 3186 // } 3187 // return toUpperCase(c); 3188 return (char)UCharacter.toTitleCase(c); 3189 // ENd android-changed 3190 } 3191 3192 /** 3193 * Returns the title case equivalent for the specified code point if it 3194 * exists. Otherwise, the specified code point is returned unchanged. 3195 * 3196 * @param codePoint 3197 * the code point to convert. 3198 * @return the title case equivalent of {@code codePoint} if it exists, 3199 * otherwise {@code codePoint}. 3200 */ 3201 public static int toTitleCase(int codePoint) { 3202 return UCharacter.toTitleCase(codePoint); 3203 } 3204 3205 /** 3206 * Returns the upper case equivalent for the specified character if the 3207 * character is a lower case letter. Otherwise, the specified character is 3208 * returned unchanged. 3209 * 3210 * @param c 3211 * the character to convert. 3212 * @return if {@code c} is a lower case character then its upper case 3213 * counterpart, otherwise just {@code c}. 3214 */ 3215 public static char toUpperCase(char c) { 3216 // BEGIN android-changed 3217 // // Optimized case for ASCII 3218 // if ('a' <= c && c <= 'z') { 3219 // return (char) (c - ('a' - 'A')); 3220 // } 3221 // if (c < 181) { 3222 // return c; 3223 // } 3224 // if (c<1000) { 3225 // return (char)uppercaseValuesCache[(int)c-181]; 3226 // } 3227 // int result = BinarySearch.binarySearchRange(uppercaseKeys, c); 3228 // if (result >= 0) { 3229 // boolean by2 = false; 3230 // char start = uppercaseKeys.charAt(result); 3231 // char end = uppercaseValues[result * 2]; 3232 // if ((start & 0x8000) != (end & 0x8000)) { 3233 // end ^= 0x8000; 3234 // by2 = true; 3235 // } 3236 // if (c <= end) { 3237 // if (by2 && (c & 1) != (start & 1)) { 3238 // return c; 3239 // } 3240 // char mapping = uppercaseValues[result * 2 + 1]; 3241 // return (char) (c + mapping); 3242 // } 3243 // } 3244 // return c; 3245 return (char)UCharacter.toUpperCase(c); 3246 // END android-changed 3247 } 3248 3249 /** 3250 * Returns the upper case equivalent for the specified code point if the 3251 * code point is a lower case letter. Otherwise, the specified code point is 3252 * returned unchanged. 3253 * 3254 * @param codePoint 3255 * the code point to convert. 3256 * @return if {@code codePoint} is a lower case character then its upper 3257 * case counterpart, otherwise just {@code codePoint}. 3258 */ 3259 public static int toUpperCase(int codePoint) { 3260 return UCharacter.toUpperCase(codePoint); 3261 } 3262} 3263