Character.java revision 9a501d6cb2a26c3b5d77497826ea33481716ab2d
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.lang; 19 20import java.io.Serializable; 21// BEGIN android-removed 22// import java.util.SortedMap; 23// import java.util.TreeMap; 24// 25// import org.apache.harmony.luni.util.BinarySearch; 26// END android-removed 27 28// BEGIN android-changed 29import com.ibm.icu4jni.lang.UCharacter; 30// END android-changed 31 32/** 33 * The wrapper for the primitive type {@code char}. This class also provides a 34 * number of utility methods for working with characters. 35 * <p> 36 * Character data is based upon the Unicode Standard, 4.0. The Unicode 37 * specification, character tables and other information are available at <a 38 * href="http://www.unicode.org/">http://www.unicode.org/</a>. 39 * <p> 40 * Unicode characters are referred to as <i>code points</i>. The range of valid 41 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i> 42 * is the code point range U+0000 to U+FFFF. Characters above the BMP are 43 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16 44 * encoding and {@code char} pairs are used to represent code points in the 45 * supplementary range. A pair of {@code char} values that represent a 46 * supplementary character are made up of a <i>high surrogate</i> with a value 47 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of 48 * 0xDC00 to 0xDFFF. 49 * <p> 50 * On the Java platform a {@code char} value represents either a single BMP code 51 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type 52 * is used to represent all Unicode code points. 53 * 54 * @since 1.0 55 */ 56public final class Character implements Serializable, Comparable<Character> { 57 private static final long serialVersionUID = 3786198910865385080L; 58 59 private final char value; 60 61 /** 62 * The minimum {@code Character} value. 63 */ 64 public static final char MIN_VALUE = '\u0000'; 65 66 /** 67 * The maximum {@code Character} value. 68 */ 69 public static final char MAX_VALUE = '\uffff'; 70 71 /** 72 * The minimum radix used for conversions between characters and integers. 73 */ 74 public static final int MIN_RADIX = 2; 75 76 /** 77 * The maximum radix used for conversions between characters and integers. 78 */ 79 public static final int MAX_RADIX = 36; 80 81 /** 82 * The {@link Class} object that represents the primitive type {@code char}. 83 */ 84 @SuppressWarnings("unchecked") 85 public static final Class<Character> TYPE = (Class<Character>) new char[0] 86 .getClass().getComponentType(); 87 88 // Note: This can't be set to "char.class", since *that* is 89 // defined to be "java.lang.Character.TYPE"; 90 91 /** 92 * Unicode category constant Cn. 93 */ 94 public static final byte UNASSIGNED = 0; 95 96 /** 97 * Unicode category constant Lu. 98 */ 99 public static final byte UPPERCASE_LETTER = 1; 100 101 /** 102 * Unicode category constant Ll. 103 */ 104 public static final byte LOWERCASE_LETTER = 2; 105 106 /** 107 * Unicode category constant Lt. 108 */ 109 public static final byte TITLECASE_LETTER = 3; 110 111 /** 112 * Unicode category constant Lm. 113 */ 114 public static final byte MODIFIER_LETTER = 4; 115 116 /** 117 * Unicode category constant Lo. 118 */ 119 public static final byte OTHER_LETTER = 5; 120 121 /** 122 * Unicode category constant Mn. 123 */ 124 public static final byte NON_SPACING_MARK = 6; 125 126 /** 127 * Unicode category constant Me. 128 */ 129 public static final byte ENCLOSING_MARK = 7; 130 131 /** 132 * Unicode category constant Mc. 133 */ 134 public static final byte COMBINING_SPACING_MARK = 8; 135 136 /** 137 * Unicode category constant Nd. 138 */ 139 public static final byte DECIMAL_DIGIT_NUMBER = 9; 140 141 /** 142 * Unicode category constant Nl. 143 */ 144 public static final byte LETTER_NUMBER = 10; 145 146 /** 147 * Unicode category constant No. 148 */ 149 public static final byte OTHER_NUMBER = 11; 150 151 /** 152 * Unicode category constant Zs. 153 */ 154 public static final byte SPACE_SEPARATOR = 12; 155 156 /** 157 * Unicode category constant Zl. 158 */ 159 public static final byte LINE_SEPARATOR = 13; 160 161 /** 162 * Unicode category constant Zp. 163 */ 164 public static final byte PARAGRAPH_SEPARATOR = 14; 165 166 /** 167 * Unicode category constant Cc. 168 */ 169 public static final byte CONTROL = 15; 170 171 /** 172 * Unicode category constant Cf. 173 */ 174 public static final byte FORMAT = 16; 175 176 /** 177 * Unicode category constant Co. 178 */ 179 public static final byte PRIVATE_USE = 18; 180 181 /** 182 * Unicode category constant Cs. 183 */ 184 public static final byte SURROGATE = 19; 185 186 /** 187 * Unicode category constant Pd. 188 */ 189 public static final byte DASH_PUNCTUATION = 20; 190 191 /** 192 * Unicode category constant Ps. 193 */ 194 public static final byte START_PUNCTUATION = 21; 195 196 /** 197 * Unicode category constant Pe. 198 */ 199 public static final byte END_PUNCTUATION = 22; 200 201 /** 202 * Unicode category constant Pc. 203 */ 204 public static final byte CONNECTOR_PUNCTUATION = 23; 205 206 /** 207 * Unicode category constant Po. 208 */ 209 public static final byte OTHER_PUNCTUATION = 24; 210 211 /** 212 * Unicode category constant Sm. 213 */ 214 public static final byte MATH_SYMBOL = 25; 215 216 /** 217 * Unicode category constant Sc. 218 */ 219 public static final byte CURRENCY_SYMBOL = 26; 220 221 /** 222 * Unicode category constant Sk. 223 */ 224 public static final byte MODIFIER_SYMBOL = 27; 225 226 /** 227 * Unicode category constant So. 228 */ 229 public static final byte OTHER_SYMBOL = 28; 230 231 /** 232 * Unicode category constant Pi. 233 * 234 * @since 1.4 235 */ 236 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 237 238 /** 239 * Unicode category constant Pf. 240 * 241 * @since 1.4 242 */ 243 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 244 245 /** 246 * Unicode bidirectional constant. 247 * 248 * @since 1.4 249 */ 250 public static final byte DIRECTIONALITY_UNDEFINED = -1; 251 252 /** 253 * Unicode bidirectional constant L. 254 * 255 * @since 1.4 256 */ 257 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 258 259 /** 260 * Unicode bidirectional constant R. 261 * 262 * @since 1.4 263 */ 264 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 265 266 /** 267 * Unicode bidirectional constant AL. 268 * 269 * @since 1.4 270 */ 271 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 272 273 /** 274 * Unicode bidirectional constant EN. 275 * 276 * @since 1.4 277 */ 278 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 279 280 /** 281 * Unicode bidirectional constant ES. 282 * 283 * @since 1.4 284 */ 285 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 286 287 /** 288 * Unicode bidirectional constant ET. 289 * 290 * @since 1.4 291 */ 292 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 293 294 /** 295 * Unicode bidirectional constant AN. 296 * 297 * @since 1.4 298 */ 299 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 300 301 /** 302 * Unicode bidirectional constant CS. 303 * 304 * @since 1.4 305 */ 306 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 307 308 /** 309 * Unicode bidirectional constant NSM. 310 * 311 * @since 1.4 312 */ 313 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 314 315 /** 316 * Unicode bidirectional constant BN. 317 * 318 * @since 1.4 319 */ 320 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 321 322 /** 323 * Unicode bidirectional constant B. 324 * 325 * @since 1.4 326 */ 327 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 328 329 /** 330 * Unicode bidirectional constant S. 331 * 332 * @since 1.4 333 */ 334 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 335 336 /** 337 * Unicode bidirectional constant WS. 338 * 339 * @since 1.4 340 */ 341 public static final byte DIRECTIONALITY_WHITESPACE = 12; 342 343 /** 344 * Unicode bidirectional constant ON. 345 * 346 * @since 1.4 347 */ 348 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 349 350 /** 351 * Unicode bidirectional constant LRE. 352 * 353 * @since 1.4 354 */ 355 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 356 357 /** 358 * Unicode bidirectional constant LRO. 359 * 360 * @since 1.4 361 */ 362 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 363 364 /** 365 * Unicode bidirectional constant RLE. 366 * 367 * @since 1.4 368 */ 369 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 370 371 /** 372 * Unicode bidirectional constant RLO. 373 * 374 * @since 1.4 375 */ 376 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 377 378 /** 379 * Unicode bidirectional constant PDF. 380 * 381 * @since 1.4 382 */ 383 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 384 385 /** 386 * The minimum value of a high surrogate or leading surrogate unit in UTF-16 387 * encoding, {@code '\uD800'}. 388 * 389 * @since 1.5 390 */ 391 public static final char MIN_HIGH_SURROGATE = '\uD800'; 392 393 /** 394 * The maximum value of a high surrogate or leading surrogate unit in UTF-16 395 * encoding, {@code '\uDBFF'}. 396 * 397 * @since 1.5 398 */ 399 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 400 401 /** 402 * The minimum value of a low surrogate or trailing surrogate unit in UTF-16 403 * encoding, {@code '\uDC00'}. 404 * 405 * @since 1.5 406 */ 407 public static final char MIN_LOW_SURROGATE = '\uDC00'; 408 409 /** 410 * The maximum value of a low surrogate or trailing surrogate unit in UTF-16 411 * encoding, {@code '\uDFFF'}. 412 * 413 * @since 1.5 414 */ 415 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 416 417 /** 418 * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}. 419 * 420 * @since 1.5 421 */ 422 public static final char MIN_SURROGATE = '\uD800'; 423 424 /** 425 * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}. 426 * 427 * @since 1.5 428 */ 429 public static final char MAX_SURROGATE = '\uDFFF'; 430 431 /** 432 * The minimum value of a supplementary code point, {@code U+010000}. 433 * 434 * @since 1.5 435 */ 436 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; 437 438 /** 439 * The minimum code point value, {@code U+0000}. 440 * 441 * @since 1.5 442 */ 443 public static final int MIN_CODE_POINT = 0x000000; 444 445 /** 446 * The maximum code point value, {@code U+10FFFF}. 447 * 448 * @since 1.5 449 */ 450 public static final int MAX_CODE_POINT = 0x10FFFF; 451 452 /** 453 * The number of bits required to represent a {@code Character} value 454 * unsigned form. 455 * 456 * @since 1.5 457 */ 458 public static final int SIZE = 16; 459 460 // BEGIN android-removed 461 // Unicode 3.0.1 (same as Unicode 3.0.0) 462 // private static final String bidiKeys = ... 463 464 // private static final char[] bidiValues = ... 465 466 // private static final char[] mirrored = ... 467 468 // Unicode 3.0.1 (same as Unicode 3.0.0) 469 // private static final String typeKeys = ... 470 471 // private static final char[] typeValues = ... 472 473 // private static final int[] typeValuesCache = ... 474 475 // Unicode 3.0.1 (same as Unicode 3.0.0) 476 // private static final String uppercaseKeys = ... 477 478 // private static final char[] uppercaseValues = ... 479 480 // private static final int[] uppercaseValuesCache = ... 481 482 // private static final String lowercaseKeys = ... 483 484 // private static final char[] lowercaseValues = ... 485 486 // private static final int[] lowercaseValuesCache = ... 487 488 // private static final String digitKeys = ... 489 490 // private static final char[] digitValues = ... 491 // END android-removed 492 493 // BEGIN android-note 494 // put this in a helper class so that it's only initialized on demand? 495 // END android-note 496 private static final char[] typeTags = "\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0000\u0000\u0000\u0000\u0003\u0000\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0000\u0000\u0000\u0000\u0002" 497 .getValue(); 498 499 // BEGIN android-note 500 // put this in a helper class so that it's only initialized on demand? 501 // END android-note 502 private static final byte[] DIRECTIONALITY = new byte[] { 503 DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT, 504 DIRECTIONALITY_EUROPEAN_NUMBER, 505 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, 506 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, 507 DIRECTIONALITY_ARABIC_NUMBER, 508 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, 509 DIRECTIONALITY_PARAGRAPH_SEPARATOR, 510 DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE, 511 DIRECTIONALITY_OTHER_NEUTRALS, 512 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, 513 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, 514 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, 515 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, 516 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, 517 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, 518 DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL }; 519 520 private static final int ISJAVASTART = 1; 521 522 private static final int ISJAVAPART = 2; 523 524 // BEGIN android-removed 525 // Unicode 3.0.1 (same as Unicode 3.0.0) 526 // private static final String titlecaseKeys = ... 527 528 // private static final char[] titlecaseValues = ... 529 530 // Unicode 3.0.0 (NOT the same as Unicode 3.0.1) 531 // private static final String numericKeys = ... 532 533 // private static final char[] numericValues = ... 534 // END android-removed 535 536 /* 537 * Represents a subset of the Unicode character set. 538 */ 539 public static class Subset { 540 String name; 541 542 /** 543 * Constructs a new {@code Subset}. 544 * 545 * @param string 546 * this subset's name. 547 */ 548 protected Subset(String string) { 549 if (string == null) { 550 throw new NullPointerException(); 551 } 552 name = string; 553 } 554 555 /** 556 * Compares this character subset with the specified object. Uses 557 * {@link java.lang.Object#equals(Object)} to do the comparison. 558 * 559 * @param object 560 * the object to compare this character subset with. 561 * @return {@code true} if {@code object} is this subset, that is, if 562 * {@code object == this}; {@code false} otherwise. 563 */ 564 @Override 565 public final boolean equals(Object object) { 566 return super.equals(object); 567 } 568 569 /** 570 * Returns the integer hash code for this character subset. 571 * 572 * @return this subset's hash code, which is the hash code computed by 573 * {@link java.lang.Object#hashCode()}. 574 */ 575 @Override 576 public final int hashCode() { 577 return super.hashCode(); 578 } 579 580 /** 581 * Returns the string representation of this subset. 582 * 583 * @return this subset's name. 584 */ 585 @Override 586 public final String toString() { 587 return name; 588 } 589 } 590 591 /** 592 * Represents a block of Unicode characters, as defined by the Unicode 4.0.1 593 * specification. 594 * 595 * @since 1.2 596 */ 597 public static final class UnicodeBlock extends Subset { 598 /** 599 * The "Surrogates Area" Unicode Block. 600 * 601 * @deprecated As of Java 5, this block has been replaced by 602 * {@link #HIGH_SURROGATES}, 603 * {@link #HIGH_PRIVATE_USE_SURROGATES} and 604 * {@link #LOW_SURROGATES}. 605 */ 606 @Deprecated 607 public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA", 0x0, 0x0); 608 /** 609 * The "Basic Latin" Unicode Block. 610 * 611 * @since 1.2 612 */ 613 public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN", 0x0, 0x7f); 614 /** 615 * The "Latin-1 Supplement" Unicode Block. 616 * 617 * @since 1.2 618 */ 619 public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT", 0x80, 0xff); 620 /** 621 * The "Latin Extended-A" Unicode Block. 622 * 623 * @since 1.2 624 */ 625 public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A", 0x100, 0x17f); 626 /** 627 * The "Latin Extended-B" Unicode Block. 628 * 629 * @since 1.2 630 */ 631 public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B", 0x180, 0x24f); 632 /** 633 * The "IPA Extensions" Unicode Block. 634 * 635 * @since 1.2 636 */ 637 public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS", 0x250, 0x2af); 638 /** 639 * The "Spacing Modifier Letters" Unicode Block. 640 * 641 * @since 1.2 642 */ 643 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 0x2b0, 0x2ff); 644 /** 645 * The "Combining Diacritical Marks" Unicode Block. 646 * 647 * @since 1.2 648 */ 649 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 0x300, 0x36f); 650 /** 651 * The "Greek and Coptic" Unicode Block. Previously referred 652 * to as "Greek". 653 * 654 * @since 1.2 655 */ 656 public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK", 0x370, 0x3ff); 657 /** 658 * The "Cyrillic" Unicode Block. 659 * 660 * @since 1.2 661 */ 662 public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC", 0x400, 0x4ff); 663 /** 664 * The "Cyrillic Supplement" Unicode Block. Previously 665 * referred to as "Cyrillic Supplementary". 666 * 667 * @since 1.5 668 */ 669 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 0x500, 0x52f); 670 /** 671 * The "Armenian" Unicode Block. 672 * 673 * @since 1.2 674 */ 675 public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN", 0x530, 0x58f); 676 /** 677 * The "Hebrew" Unicode Block. 678 * 679 * @since 1.2 680 */ 681 public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW", 0x590, 0x5ff); 682 /** 683 * The "Arabic" Unicode Block. 684 * 685 * @since 1.2 686 */ 687 public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC", 0x600, 0x6ff); 688 /** 689 * The "Syriac" Unicode Block. 690 * 691 * @since 1.4 692 */ 693 public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC", 0x700, 0x74f); 694 /** 695 * The "Thaana" Unicode Block. 696 * 697 * @since 1.4 698 */ 699 public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA", 0x780, 0x7bf); 700 /** 701 * The "Devanagari" Unicode Block. 702 * 703 * @since 1.2 704 */ 705 public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI", 0x900, 0x97f); 706 /** 707 * The "Bengali" Unicode Block. 708 * 709 * @since 1.2 710 */ 711 public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI", 0x980, 0x9ff); 712 /** 713 * The "Gurmukhi" Unicode Block. 714 * 715 * @since 1.2 716 */ 717 public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI", 0xa00, 0xa7f); 718 /** 719 * The "Gujarati" Unicode Block. 720 * 721 * @since 1.2 722 */ 723 public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI", 0xa80, 0xaff); 724 /** 725 * The "Oriya" Unicode Block. 726 * 727 * @since 1.2 728 */ 729 public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA", 0xb00, 0xb7f); 730 /** 731 * The "Tamil" Unicode Block. 732 * 733 * @since 1.2 734 */ 735 public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL", 0xb80, 0xbff); 736 /** 737 * The "Telugu" Unicode Block. 738 * 739 * @since 1.2 740 */ 741 public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU", 0xc00, 0xc7f); 742 /** 743 * The "Kannada" Unicode Block. 744 * 745 * @since 1.2 746 */ 747 public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA", 0xc80, 0xcff); 748 /** 749 * The "Malayalam" Unicode Block. 750 * 751 * @since 1.2 752 */ 753 public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM", 0xd00, 0xd7f); 754 /** 755 * The "Sinhala" Unicode Block. 756 * 757 * @since 1.4 758 */ 759 public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA", 0xd80, 0xdff); 760 /** 761 * The "Thai" Unicode Block. 762 * 763 * @since 1.2 764 */ 765 public static final UnicodeBlock THAI = new UnicodeBlock("THAI", 0xe00, 0xe7f); 766 /** 767 * The "Lao" Unicode Block. 768 * 769 * @since 1.2 770 */ 771 public static final UnicodeBlock LAO = new UnicodeBlock("LAO", 0xe80, 0xeff); 772 /** 773 * The "Tibetan" Unicode Block. 774 * 775 * @since 1.2 776 */ 777 public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN", 0xf00, 0xfff); 778 /** 779 * The "Myanmar" Unicode Block. 780 * 781 * @since 1.4 782 */ 783 public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR", 0x1000, 0x109f); 784 /** 785 * The "Georgian" Unicode Block. 786 * 787 * @since 1.2 788 */ 789 public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN", 0x10a0, 0x10ff); 790 /** 791 * The "Hangul Jamo" Unicode Block. 792 * 793 * @since 1.2 794 */ 795 public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO", 0x1100, 0x11ff); 796 /** 797 * The "Ethiopic" Unicode Block. 798 * 799 * @since 1.4 800 */ 801 public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC", 0x1200, 0x137f); 802 /** 803 * The "Cherokee" Unicode Block. 804 * 805 * @since 1.4 806 */ 807 public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE", 0x13a0, 0x13ff); 808 /** 809 * The "Unified Canadian Aboriginal Syllabics" Unicode Block. 810 * 811 * @since 1.4 812 */ 813 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 0x1400, 0x167f); 814 /** 815 * The "Ogham" Unicode Block. 816 * 817 * @since 1.4 818 */ 819 public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM", 0x1680, 0x169f); 820 /** 821 * The "Runic" Unicode Block. 822 * 823 * @since 1.4 824 */ 825 public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC", 0x16a0, 0x16ff); 826 /** 827 * The "Tagalog" Unicode Block. 828 * 829 * @since 1.5 830 */ 831 public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG", 0x1700, 0x171f); 832 /** 833 * The "Hanunoo" Unicode Block. 834 * 835 * @since 1.5 836 */ 837 public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO", 0x1720, 0x173f); 838 /** 839 * The "Buhid" Unicode Block. 840 * 841 * @since 1.5 842 */ 843 public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID", 0x1740, 0x175f); 844 /** 845 * The "Tagbanwa" Unicode Block. 846 * 847 * @since 1.5 848 */ 849 public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA", 0x1760, 0x177f); 850 /** 851 * The "Khmer" Unicode Block. 852 * 853 * @since 1.4 854 */ 855 public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER", 0x1780, 0x17ff); 856 /** 857 * The "Mongolian" Unicode Block. 858 * 859 * @since 1.4 860 */ 861 public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN", 0x1800, 0x18af); 862 /** 863 * The "Limbu" Unicode Block. 864 * 865 * @since 1.5 866 */ 867 public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU", 0x1900, 0x194f); 868 /** 869 * The "Tai Le" Unicode Block. 870 * 871 * @since 1.5 872 */ 873 public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE", 0x1950, 0x197f); 874 /** 875 * The "Khmer Symbols" Unicode Block. 876 * 877 * @since 1.5 878 */ 879 public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS", 0x19e0, 0x19ff); 880 /** 881 * The "Phonetic Extensions" Unicode Block. 882 * 883 * @since 1.5 884 */ 885 public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS", 0x1d00, 0x1d7f); 886 /** 887 * The "Latin Extended Additional" Unicode Block. 888 * 889 * @since 1.2 890 */ 891 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 0x1e00, 0x1eff); 892 /** 893 * The "Greek Extended" Unicode Block. 894 * 895 * @since 1.2 896 */ 897 public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED", 0x1f00, 0x1fff); 898 /** 899 * The "General Punctuation" Unicode Block. 900 * 901 * @since 1.2 902 */ 903 public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION", 0x2000, 0x206f); 904 /** 905 * The "Superscripts and Subscripts" Unicode Block. 906 * 907 * @since 1.2 908 */ 909 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 0x2070, 0x209f); 910 /** 911 * The "Currency Symbols" Unicode Block. 912 * 913 * @since 1.2 914 */ 915 public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS", 0x20a0, 0x20cf); 916 /** 917 * The "Combining Diacritical Marks for Symbols" Unicode 918 * Block. Previously referred to as "Combining Marks for 919 * Symbols". 920 * 921 * @since 1.2 922 */ 923 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 0x20d0, 0x20ff); 924 /** 925 * The "Letterlike Symbols" Unicode Block. 926 * 927 * @since 1.2 928 */ 929 public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS", 0x2100, 0x214f); 930 /** 931 * The "Number Forms" Unicode Block. 932 * 933 * @since 1.2 934 */ 935 public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS", 0x2150, 0x218f); 936 /** 937 * The "Arrows" Unicode Block. 938 * 939 * @since 1.2 940 */ 941 public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS", 0x2190, 0x21ff); 942 /** 943 * The "Mathematical Operators" Unicode Block. 944 * 945 * @since 1.2 946 */ 947 public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS", 0x2200, 0x22ff); 948 /** 949 * The "Miscellaneous Technical" Unicode Block. 950 * 951 * @since 1.2 952 */ 953 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 0x2300, 0x23ff); 954 /** 955 * The "Control Pictures" Unicode Block. 956 * 957 * @since 1.2 958 */ 959 public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES", 0x2400, 0x243f); 960 /** 961 * The "Optical Character Recognition" Unicode Block. 962 * 963 * @since 1.2 964 */ 965 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 0x2440, 0x245f); 966 /** 967 * The "Enclosed Alphanumerics" Unicode Block. 968 * 969 * @since 1.2 970 */ 971 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 0x2460, 0x24ff); 972 /** 973 * The "Box Drawing" Unicode Block. 974 * 975 * @since 1.2 976 */ 977 public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING", 0x2500, 0x257f); 978 /** 979 * The "Block Elements" Unicode Block. 980 * 981 * @since 1.2 982 */ 983 public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS", 0x2580, 0x259f); 984 /** 985 * The "Geometric Shapes" Unicode Block. 986 * 987 * @since 1.2 988 */ 989 public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES", 0x25a0, 0x25ff); 990 /** 991 * The "Miscellaneous Symbols" Unicode Block. 992 * 993 * @since 1.2 994 */ 995 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 0x2600, 0x26ff); 996 /** 997 * The "Dingbats" Unicode Block. 998 * 999 * @since 1.2 1000 */ 1001 public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS", 0x2700, 0x27bf); 1002 /** 1003 * The "Miscellaneous Mathematical Symbols-A" Unicode Block. 1004 * 1005 * @since 1.5 1006 */ 1007 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 0x27c0, 0x27ef); 1008 /** 1009 * The "Supplemental Arrows-A" Unicode Block. 1010 * 1011 * @since 1.5 1012 */ 1013 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 0x27f0, 0x27ff); 1014 /** 1015 * The "Braille Patterns" Unicode Block. 1016 * 1017 * @since 1.4 1018 */ 1019 public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS", 0x2800, 0x28ff); 1020 /** 1021 * The "Supplemental Arrows-B" Unicode Block. 1022 * 1023 * @since 1.5 1024 */ 1025 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 0x2900, 0x297f); 1026 /** 1027 * The "Miscellaneous Mathematical Symbols-B" Unicode Block. 1028 * 1029 * @since 1.5 1030 */ 1031 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 0x2980, 0x29ff); 1032 /** 1033 * The "Supplemental Mathematical Operators" Unicode Block. 1034 * 1035 * @since 1.5 1036 */ 1037 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 0x2a00, 0x2aff); 1038 /** 1039 * The "Miscellaneous Symbols and Arrows" Unicode Block. 1040 * 1041 * @since 1.2 1042 */ 1043 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 0x2b00, 0x2bff); 1044 /** 1045 * The "CJK Radicals Supplement" Unicode Block. 1046 * 1047 * @since 1.4 1048 */ 1049 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 0x2e80, 0x2eff); 1050 /** 1051 * The "Kangxi Radicals" Unicode Block. 1052 * 1053 * @since 1.4 1054 */ 1055 public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS", 0x2f00, 0x2fdf); 1056 /** 1057 * The "Ideographic Description Characters" Unicode Block. 1058 * 1059 * @since 1.4 1060 */ 1061 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 0x2ff0, 0x2fff); 1062 /** 1063 * The "CJK Symbols and Punctuation" Unicode Block. 1064 * 1065 * @since 1.2 1066 */ 1067 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 0x3000, 0x303f); 1068 /** 1069 * The "Hiragana" Unicode Block. 1070 * 1071 * @since 1.2 1072 */ 1073 public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA", 0x3040, 0x309f); 1074 /** 1075 * The "Katakana" Unicode Block. 1076 * 1077 * @since 1.2 1078 */ 1079 public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA", 0x30a0, 0x30ff); 1080 /** 1081 * The "Bopomofo" Unicode Block. 1082 * 1083 * @since 1.2 1084 */ 1085 public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO", 0x3100, 0x312f); 1086 /** 1087 * The "Hangul Compatibility Jamo" Unicode Block. 1088 * 1089 * @since 1.2 1090 */ 1091 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 0x3130, 0x318f); 1092 /** 1093 * The "Kanbun" Unicode Block. 1094 * 1095 * @since 1.2 1096 */ 1097 public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN", 0x3190, 0x319f); 1098 /** 1099 * The "Bopomofo Extended" Unicode Block. 1100 * 1101 * @since 1.4 1102 */ 1103 public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED", 0x31a0, 0x31bf); 1104 /** 1105 * The "Katakana Phonetic Extensions" Unicode Block. 1106 * 1107 * @since 1.5 1108 */ 1109 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 0x31f0, 0x31ff); 1110 /** 1111 * The "Enclosed CJK Letters and Months" Unicode Block. 1112 * 1113 * @since 1.2 1114 */ 1115 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 0x3200, 0x32ff); 1116 /** 1117 * The "CJK Compatibility" Unicode Block. 1118 * 1119 * @since 1.2 1120 */ 1121 public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY", 0x3300, 0x33ff); 1122 /** 1123 * The "CJK Unified Ideographs Extension A" Unicode Block. 1124 * 1125 * @since 1.4 1126 */ 1127 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 0x3400, 0x4dbf); 1128 /** 1129 * The "Yijing Hexagram Symbols" Unicode Block. 1130 * 1131 * @since 1.5 1132 */ 1133 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 0x4dc0, 0x4dff); 1134 /** 1135 * The "CJK Unified Ideographs" Unicode Block. 1136 * 1137 * @since 1.2 1138 */ 1139 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 0x4e00, 0x9fff); 1140 /** 1141 * The "Yi Syllables" Unicode Block. 1142 * 1143 * @since 1.4 1144 */ 1145 public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES", 0xa000, 0xa48f); 1146 /** 1147 * The "Yi Radicals" Unicode Block. 1148 * 1149 * @since 1.4 1150 */ 1151 public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS", 0xa490, 0xa4cf); 1152 /** 1153 * The "Hangul Syllables" Unicode Block. 1154 * 1155 * @since 1.2 1156 */ 1157 public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES", 0xac00, 0xd7af); 1158 /** 1159 * The "High Surrogates" Unicode Block. This block represents 1160 * code point values in the high surrogate range 0xD800 to 0xDB7F 1161 */ 1162 public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES", 0xd800, 0xdb7f); 1163 /** 1164 * The "High Private Use Surrogates" Unicode Block. This block 1165 * represents code point values in the high surrogate range 0xDB80 to 1166 * 0xDBFF 1167 */ 1168 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 0xdb80, 0xdbff); 1169 /** 1170 * The "Low Surrogates" Unicode Block. This block represents 1171 * code point values in the low surrogate range 0xDC00 to 0xDFFF 1172 */ 1173 public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES", 0xdc00, 0xdfff); 1174 /** 1175 * The "Private Use Area" Unicode Block. 1176 * 1177 * @since 1.2 1178 */ 1179 public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA", 0xe000, 0xf8ff); 1180 /** 1181 * The "CJK Compatibility Ideographs" Unicode Block. 1182 * 1183 * @since 1.2 1184 */ 1185 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 0xf900, 0xfaff); 1186 /** 1187 * The "Alphabetic Presentation Forms" Unicode Block. 1188 * 1189 * @since 1.2 1190 */ 1191 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 0xfb00, 0xfb4f); 1192 /** 1193 * The "Arabic Presentation Forms-A" Unicode Block. 1194 * 1195 * @since 1.2 1196 */ 1197 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 0xfb50, 0xfdff); 1198 /** 1199 * The "Variation Selectors" Unicode Block. 1200 * 1201 * @since 1.5 1202 */ 1203 public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS", 0xfe00, 0xfe0f); 1204 /** 1205 * The "Combining Half Marks" Unicode Block. 1206 * 1207 * @since 1.2 1208 */ 1209 public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS", 0xfe20, 0xfe2f); 1210 /** 1211 * The "CJK Compatibility Forms" Unicode Block. 1212 * 1213 * @since 1.2 1214 */ 1215 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 0xfe30, 0xfe4f); 1216 /** 1217 * The "Small Form Variants" Unicode Block. 1218 * 1219 * @since 1.2 1220 */ 1221 public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS", 0xfe50, 0xfe6f); 1222 /** 1223 * The "Arabic Presentation Forms-B" Unicode Block. 1224 * 1225 * @since 1.2 1226 */ 1227 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 0xfe70, 0xfeff); 1228 /** 1229 * The "Halfwidth and Fullwidth Forms" Unicode Block. 1230 * 1231 * @since 1.2 1232 */ 1233 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 0xff00, 0xffef); 1234 /** 1235 * The "Specials" Unicode Block. 1236 * 1237 * @since 1.2 1238 */ 1239 public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS", 0xfff0, 0xffff); 1240 /** 1241 * The "Linear B Syllabary" Unicode Block. 1242 * 1243 * @since 1.2 1244 */ 1245 public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY", 0x10000, 0x1007f); 1246 /** 1247 * The "Linear B Ideograms" Unicode Block. 1248 * 1249 * @since 1.5 1250 */ 1251 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 0x10080, 0x100ff); 1252 /** 1253 * The "Aegean Numbers" Unicode Block. 1254 * 1255 * @since 1.5 1256 */ 1257 public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS", 0x10100, 0x1013f); 1258 /** 1259 * The "Old Italic" Unicode Block. 1260 * 1261 * @since 1.5 1262 */ 1263 public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC", 0x10300, 0x1032f); 1264 /** 1265 * The "Gothic" Unicode Block. 1266 * 1267 * @since 1.5 1268 */ 1269 public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC", 0x10330, 0x1034f); 1270 /** 1271 * The "Ugaritic" Unicode Block. 1272 * 1273 * @since 1.5 1274 */ 1275 public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC", 0x10380, 0x1039f); 1276 /** 1277 * The "Deseret" Unicode Block. 1278 * 1279 * @since 1.5 1280 */ 1281 public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET", 0x10400, 0x1044f); 1282 /** 1283 * The "Shavian" Unicode Block. 1284 * 1285 * @since 1.5 1286 */ 1287 public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN", 0x10450, 0x1047f); 1288 /** 1289 * The "Osmanya" Unicode Block. 1290 * 1291 * @since 1.5 1292 */ 1293 public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA", 0x10480, 0x104af); 1294 /** 1295 * The "Cypriot Syllabary" Unicode Block. 1296 * 1297 * @since 1.5 1298 */ 1299 public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY", 0x10800, 0x1083f); 1300 /** 1301 * The "Byzantine Musical Symbols" Unicode Block. 1302 * 1303 * @since 1.5 1304 */ 1305 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 0x1d000, 0x1d0ff); 1306 /** 1307 * The "Musical Symbols" Unicode Block. 1308 * 1309 * @since 1.5 1310 */ 1311 public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS", 0x1d100, 0x1d1ff); 1312 /** 1313 * The "Tai Xuan Jing Symbols" Unicode Block. 1314 * 1315 * @since 1.5 1316 */ 1317 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 0x1d300, 0x1d35f); 1318 /** 1319 * The "Mathematical Alphanumeric Symbols" Unicode Block. 1320 * 1321 * @since 1.5 1322 */ 1323 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 0x1d400, 0x1d7ff); 1324 /** 1325 * The "CJK Unified Ideographs Extension B" Unicode Block. 1326 * 1327 * @since 1.5 1328 */ 1329 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 0x20000, 0x2a6df); 1330 /** 1331 * The "CJK Compatibility Ideographs Supplement" Unicode Block. 1332 * 1333 * @since 1.5 1334 */ 1335 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 0x2f800, 0x2fa1f); 1336 /** 1337 * The "Tags" Unicode Block. 1338 * 1339 * @since 1.5 1340 */ 1341 public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS", 0xe0000, 0xe007f); 1342 /** 1343 * The "Variation Selectors Supplement" Unicode Block. 1344 * 1345 * @since 1.5 1346 */ 1347 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 0xe0100, 0xe01ef); 1348 /** 1349 * The "Supplementary Private Use Area-A" Unicode Block. 1350 * 1351 * @since 1.5 1352 */ 1353 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 0xf0000, 0xfffff); 1354 /** 1355 * The "Supplementary Private Use Area-B" Unicode Block. 1356 * 1357 * @since 1.5 1358 */ 1359 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 0x100000, 0x10ffff); 1360 1361 /* 1362 * All of the UnicodeBlocks with valid ranges in ascending order. 1363 */ 1364 private static UnicodeBlock[] BLOCKS; 1365 1366 // BEGIN android-changed 1367 // /* 1368 // * A SortedMap (String.CASE_INSENSITIVE_ORDER) with keys that represents 1369 // * valid block names and values of the UnicodeBlock constant they map 1370 // * to. 1371 // */ 1372 // private static final SortedMap<String, UnicodeBlock> BLOCKS_BY_NAME = ...; 1373 // END android-changed 1374 1375 /** 1376 * Retrieves the constant that corresponds to the specified block name. 1377 * The block names are defined by the Unicode 4.0.1 specification in the 1378 * {@code Blocks-4.0.1.txt} file. 1379 * <p> 1380 * Block names may be one of the following: 1381 * <ul> 1382 * <li>Canonical block name, as defined by the Unicode specification; 1383 * case-insensitive.</li> 1384 * <li>Canonical block name without any spaces, as defined by the 1385 * Unicode specification; case-insensitive.</li> 1386 * <li>{@code UnicodeBlock} constant identifier. This is determined by 1387 * uppercasing the canonical name and replacing all spaces and hyphens 1388 * with underscores.</li> 1389 * </ul> 1390 * 1391 * @param blockName 1392 * the name of the block to retrieve. 1393 * @return the UnicodeBlock constant corresponding to {@code blockName}. 1394 * @throws NullPointerException 1395 * if {@code blockName} is {@code null}. 1396 * @throws IllegalArgumentException 1397 * if {@code blockName} is not a valid block name. 1398 * @since 1.5 1399 */ 1400 public static final UnicodeBlock forName(String blockName) { 1401 // BEGIN android-note 1402 // trying to get closer to the RI which defines this as final. 1403 // END android-note 1404 if (blockName == null) { 1405 throw new NullPointerException(); 1406 } 1407 // BEGIN android-changed 1408 if (BLOCKS == null) { 1409 BLOCKS = UCharacter.getBlockTable(); 1410 } 1411 int block = UCharacter.forName(blockName); 1412 if (block == -1) { 1413 if(blockName.equals("SURROGATES_AREA")) { 1414 return SURROGATES_AREA; 1415 } else if(blockName.equalsIgnoreCase("greek")) { 1416 return GREEK; 1417 } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") || 1418 blockName.equals("Combining Marks for Symbols") || 1419 blockName.equals("CombiningMarksforSymbols")) { 1420 return COMBINING_MARKS_FOR_SYMBOLS; 1421 } 1422 throw new IllegalArgumentException(); 1423 } 1424 return BLOCKS[block]; 1425 // END android-changed 1426 } 1427 1428 /** 1429 * Gets the constant for the Unicode block that contains the specified 1430 * character. 1431 * 1432 * @param c 1433 * the character for which to get the {@code UnicodeBlock} 1434 * constant. 1435 * @return the {@code UnicodeBlock} constant for the block that contains 1436 * {@code c}, or {@code null} if {@code c} does not belong to 1437 * any defined block. 1438 */ 1439 public static UnicodeBlock of(char c) { 1440 return of((int) c); 1441 } 1442 1443 /** 1444 * Gets the constant for the Unicode block that contains the specified 1445 * Unicode code point. 1446 * 1447 * @param codePoint 1448 * the Unicode code point for which to get the 1449 * {@code UnicodeBlock} constant. 1450 * @return the {@code UnicodeBlock} constant for the block that contains 1451 * {@code codePoint}, or {@code null} if {@code codePoint} does 1452 * not belong to any defined block. 1453 * @throws IllegalArgumentException 1454 * if {@code codePoint} is not a valid Unicode code point. 1455 * @since 1.5 1456 */ 1457 public static UnicodeBlock of(int codePoint) { 1458 if (!isValidCodePoint(codePoint)) { 1459 throw new IllegalArgumentException(); 1460 } 1461 // BEGIN android-changed 1462 if (BLOCKS == null) { 1463 BLOCKS = UCharacter.getBlockTable(); 1464 } 1465 int block = UCharacter.of(codePoint); 1466 if(block == -1 || block >= BLOCKS.length) { 1467 return null; 1468 } 1469 return BLOCKS[block]; 1470 // END android-changed 1471 } 1472 1473 // BEGIN android-changed 1474 private UnicodeBlock(String blockName, int start, int end) { 1475 super(blockName); 1476 } 1477 // END android-changed 1478 } 1479 1480 /** 1481 * Constructs a new {@code Character} with the specified primitive char 1482 * value. 1483 * 1484 * @param value 1485 * the primitive char value to store in the new instance. 1486 */ 1487 public Character(char value) { 1488 this.value = value; 1489 } 1490 1491 /** 1492 * Gets the primitive value of this character. 1493 * 1494 * @return this object's primitive value. 1495 */ 1496 public char charValue() { 1497 return value; 1498 } 1499 1500 /** 1501 * Compares this object to the specified character object to determine their 1502 * relative order. 1503 * 1504 * @param c 1505 * the character object to compare this object to. 1506 * @return {@code 0} if the value of this character and the value of 1507 * {@code c} are equal; a positive value if the value of this 1508 * character is greater than the value of {@code c}; a negative 1509 * value if the value of this character is less than the value of 1510 * {@code c}. 1511 * @see java.lang.Comparable 1512 * @since 1.2 1513 */ 1514 public int compareTo(Character c) { 1515 return value - c.value; 1516 } 1517 1518 /** 1519 * Returns a {@code Character} instance for the {@code char} value passed. 1520 * For ASCII/Latin-1 characters (and generally all characters with a Unicode 1521 * value up to 512), this method should be used instead of the constructor, 1522 * as it maintains a cache of corresponding {@code Character} instances. 1523 * 1524 * @param c 1525 * the char value for which to get a {@code Character} instance. 1526 * @return the {@code Character} instance for {@code c}. 1527 * @since 1.5 1528 */ 1529 public static Character valueOf(char c) { 1530 if (c >= CACHE_LEN ) { 1531 return new Character(c); 1532 } 1533 return valueOfCache.CACHE[c]; 1534 } 1535 1536 private static final int CACHE_LEN = 512; 1537 1538 static class valueOfCache { 1539 /* 1540 * Provides a cache for the 'valueOf' method. A size of 512 should cache the 1541 * first couple pages of Unicode, which includes the ASCII/Latin-1 1542 * characters, which other parts of this class are optimized for. 1543 */ 1544 private static final Character[] CACHE = new Character[CACHE_LEN ]; 1545 1546 static { 1547 for(int i=0; i<CACHE.length; i++){ 1548 CACHE[i] = new Character((char)i); 1549 } 1550 } 1551 } 1552 /** 1553 * Indicates whether {@code codePoint} is a valid Unicode code point. 1554 * 1555 * @param codePoint 1556 * the code point to test. 1557 * @return {@code true} if {@code codePoint} is a valid Unicode code point; 1558 * {@code false} otherwise. 1559 * @since 1.5 1560 */ 1561 public static boolean isValidCodePoint(int codePoint) { 1562 return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1563 } 1564 1565 /** 1566 * Indicates whether {@code codePoint} is within the supplementary code 1567 * point range. 1568 * 1569 * @param codePoint 1570 * the code point to test. 1571 * @return {@code true} if {@code codePoint} is within the supplementary 1572 * code point range; {@code false} otherwise. 1573 * @since 1.5 1574 */ 1575 public static boolean isSupplementaryCodePoint(int codePoint) { 1576 return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1577 } 1578 1579 /** 1580 * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit 1581 * that is used for representing supplementary characters in UTF-16 1582 * encoding. 1583 * 1584 * @param ch 1585 * the character to test. 1586 * @return {@code true} if {@code ch} is a high-surrogate code unit; 1587 * {@code false} otherwise. 1588 * @see #isLowSurrogate(char) 1589 * @since 1.5 1590 */ 1591 public static boolean isHighSurrogate(char ch) { 1592 return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch); 1593 } 1594 1595 /** 1596 * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit 1597 * that is used for representing supplementary characters in UTF-16 1598 * encoding. 1599 * 1600 * @param ch 1601 * the character to test. 1602 * @return {@code true} if {@code ch} is a low-surrogate code unit; 1603 * {@code false} otherwise. 1604 * @see #isHighSurrogate(char) 1605 * @since 1.5 1606 */ 1607 public static boolean isLowSurrogate(char ch) { 1608 return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch); 1609 } 1610 1611 /** 1612 * Indicates whether the specified character pair is a valid surrogate pair. 1613 * 1614 * @param high 1615 * the high surrogate unit to test. 1616 * @param low 1617 * the low surrogate unit to test. 1618 * @return {@code true} if {@code high} is a high-surrogate code unit and 1619 * {@code low} is a low-surrogate code unit; {@code false} 1620 * otherwise. 1621 * @see #isHighSurrogate(char) 1622 * @see #isLowSurrogate(char) 1623 * @since 1.5 1624 */ 1625 public static boolean isSurrogatePair(char high, char low) { 1626 return (isHighSurrogate(high) && isLowSurrogate(low)); 1627 } 1628 1629 /** 1630 * Calculates the number of {@code char} values required to represent the 1631 * specified Unicode code point. This method checks if the {@code codePoint} 1632 * is greater than or equal to {@code 0x10000}, in which case {@code 2} is 1633 * returned, otherwise {@code 1}. To test if the code point is valid, use 1634 * the {@link #isValidCodePoint(int)} method. 1635 * 1636 * @param codePoint 1637 * the code point for which to calculate the number of required 1638 * chars. 1639 * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise. 1640 * @see #isValidCodePoint(int) 1641 * @see #isSupplementaryCodePoint(int) 1642 * @since 1.5 1643 */ 1644 public static int charCount(int codePoint) { 1645 return (codePoint >= 0x10000 ? 2 : 1); 1646 } 1647 1648 /** 1649 * Converts a surrogate pair into a Unicode code point. This method assumes 1650 * that the pair are valid surrogates. If the pair are <i>not</i> valid 1651 * surrogates, then the result is indeterminate. The 1652 * {@link #isSurrogatePair(char, char)} method should be used prior to this 1653 * method to validate the pair. 1654 * 1655 * @param high 1656 * the high surrogate unit. 1657 * @param low 1658 * the low surrogate unit. 1659 * @return the Unicode code point corresponding to the surrogate unit pair. 1660 * @see #isSurrogatePair(char, char) 1661 * @since 1.5 1662 */ 1663 public static int toCodePoint(char high, char low) { 1664 // See RFC 2781, Section 2.2 1665 // http://www.faqs.org/rfcs/rfc2781.html 1666 int h = (high & 0x3FF) << 10; 1667 int l = low & 0x3FF; 1668 return (h | l) + 0x10000; 1669 } 1670 1671 /** 1672 * Returns the code point at {@code index} in the specified sequence of 1673 * character units. If the unit at {@code index} is a high-surrogate unit, 1674 * {@code index + 1} is less than the length of the sequence and the unit at 1675 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1676 * point represented by the pair is returned; otherwise the {@code char} 1677 * value at {@code index} is returned. 1678 * 1679 * @param seq 1680 * the source sequence of {@code char} units. 1681 * @param index 1682 * the position in {@code seq} from which to retrieve the code 1683 * point. 1684 * @return the Unicode code point or {@code char} value at {@code index} in 1685 * {@code seq}. 1686 * @throws NullPointerException 1687 * if {@code seq} is {@code null}. 1688 * @throws IndexOutOfBoundsException 1689 * if the {@code index} is negative or greater than or equal to 1690 * the length of {@code seq}. 1691 * @since 1.5 1692 */ 1693 public static int codePointAt(CharSequence seq, int index) { 1694 if (seq == null) { 1695 throw new NullPointerException(); 1696 } 1697 int len = seq.length(); 1698 if (index < 0 || index >= len) { 1699 throw new IndexOutOfBoundsException(); 1700 } 1701 1702 char high = seq.charAt(index++); 1703 if (index >= len) { 1704 return high; 1705 } 1706 char low = seq.charAt(index); 1707 if (isSurrogatePair(high, low)) { 1708 return toCodePoint(high, low); 1709 } 1710 return high; 1711 } 1712 1713 /** 1714 * Returns the code point at {@code index} in the specified array of 1715 * character units. If the unit at {@code index} is a high-surrogate unit, 1716 * {@code index + 1} is less than the length of the array and the unit at 1717 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1718 * point represented by the pair is returned; otherwise the {@code char} 1719 * value at {@code index} is returned. 1720 * 1721 * @param seq 1722 * the source array of {@code char} units. 1723 * @param index 1724 * the position in {@code seq} from which to retrieve the code 1725 * point. 1726 * @return the Unicode code point or {@code char} value at {@code index} in 1727 * {@code seq}. 1728 * @throws NullPointerException 1729 * if {@code seq} is {@code null}. 1730 * @throws IndexOutOfBoundsException 1731 * if the {@code index} is negative or greater than or equal to 1732 * the length of {@code seq}. 1733 * @since 1.5 1734 */ 1735 public static int codePointAt(char[] seq, int index) { 1736 if (seq == null) { 1737 throw new NullPointerException(); 1738 } 1739 int len = seq.length; 1740 if (index < 0 || index >= len) { 1741 throw new IndexOutOfBoundsException(); 1742 } 1743 1744 char high = seq[index++]; 1745 if (index >= len) { 1746 return high; 1747 } 1748 char low = seq[index]; 1749 if (isSurrogatePair(high, low)) { 1750 return toCodePoint(high, low); 1751 } 1752 return high; 1753 } 1754 1755 /** 1756 * Returns the code point at {@code index} in the specified array of 1757 * character units, where {@code index} has to be less than {@code limit}. 1758 * If the unit at {@code index} is a high-surrogate unit, {@code index + 1} 1759 * is less than {@code limit} and the unit at {@code index + 1} is a 1760 * low-surrogate unit, then the supplementary code point represented by the 1761 * pair is returned; otherwise the {@code char} value at {@code index} is 1762 * returned. 1763 * 1764 * @param seq 1765 * the source array of {@code char} units. 1766 * @param index 1767 * the position in {@code seq} from which to get the code point. 1768 * @param limit 1769 * the index after the last unit in {@code seq} that can be used. 1770 * @return the Unicode code point or {@code char} value at {@code index} in 1771 * {@code seq}. 1772 * @throws NullPointerException 1773 * if {@code seq} is {@code null}. 1774 * @throws IndexOutOfBoundsException 1775 * if {@code index < 0}, {@code index >= limit}, 1776 * {@code limit < 0} or if {@code limit} is greater than the 1777 * length of {@code seq}. 1778 * @since 1.5 1779 */ 1780 public static int codePointAt(char[] seq, int index, int limit) { 1781 if (index < 0 || index >= limit || limit < 0 || limit > seq.length) { 1782 throw new IndexOutOfBoundsException(); 1783 } 1784 1785 char high = seq[index++]; 1786 if (index >= limit) { 1787 return high; 1788 } 1789 char low = seq[index]; 1790 if (isSurrogatePair(high, low)) { 1791 return toCodePoint(high, low); 1792 } 1793 return high; 1794 } 1795 1796 /** 1797 * Returns the code point that preceds {@code index} in the specified 1798 * sequence of character units. If the unit at {@code index - 1} is a 1799 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1800 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1801 * point represented by the pair is returned; otherwise the {@code char} 1802 * value at {@code index - 1} is returned. 1803 * 1804 * @param seq 1805 * the source sequence of {@code char} units. 1806 * @param index 1807 * the position in {@code seq} following the code 1808 * point that should be returned. 1809 * @return the Unicode code point or {@code char} value before {@code index} 1810 * in {@code seq}. 1811 * @throws NullPointerException 1812 * if {@code seq} is {@code null}. 1813 * @throws IndexOutOfBoundsException 1814 * if the {@code index} is less than 1 or greater than the 1815 * length of {@code seq}. 1816 * @since 1.5 1817 */ 1818 public static int codePointBefore(CharSequence seq, int index) { 1819 if (seq == null) { 1820 throw new NullPointerException(); 1821 } 1822 int len = seq.length(); 1823 if (index < 1 || index > len) { 1824 throw new IndexOutOfBoundsException(); 1825 } 1826 1827 char low = seq.charAt(--index); 1828 if (--index < 0) { 1829 return low; 1830 } 1831 char high = seq.charAt(index); 1832 if (isSurrogatePair(high, low)) { 1833 return toCodePoint(high, low); 1834 } 1835 return low; 1836 } 1837 1838 /** 1839 * Returns the code point that preceds {@code index} in the specified 1840 * array of character units. If the unit at {@code index - 1} is a 1841 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1842 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1843 * point represented by the pair is returned; otherwise the {@code char} 1844 * value at {@code index - 1} is returned. 1845 * 1846 * @param seq 1847 * the source array of {@code char} units. 1848 * @param index 1849 * the position in {@code seq} following the code 1850 * point that should be returned. 1851 * @return the Unicode code point or {@code char} value before {@code index} 1852 * in {@code seq}. 1853 * @throws NullPointerException 1854 * if {@code seq} is {@code null}. 1855 * @throws IndexOutOfBoundsException 1856 * if the {@code index} is less than 1 or greater than the 1857 * length of {@code seq}. 1858 * @since 1.5 1859 */ 1860 public static int codePointBefore(char[] seq, int index) { 1861 if (seq == null) { 1862 throw new NullPointerException(); 1863 } 1864 int len = seq.length; 1865 if (index < 1 || index > len) { 1866 throw new IndexOutOfBoundsException(); 1867 } 1868 1869 char low = seq[--index]; 1870 if (--index < 0) { 1871 return low; 1872 } 1873 char high = seq[index]; 1874 if (isSurrogatePair(high, low)) { 1875 return toCodePoint(high, low); 1876 } 1877 return low; 1878 } 1879 1880 /** 1881 * Returns the code point that preceds the {@code index} in the specified 1882 * array of character units and is not less than {@code start}. If the unit 1883 * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not 1884 * less than {@code start} and the unit at {@code index - 2} is a 1885 * high-surrogate unit, then the supplementary code point represented by the 1886 * pair is returned; otherwise the {@code char} value at {@code index - 1} 1887 * is returned. 1888 * 1889 * @param seq 1890 * the source array of {@code char} units. 1891 * @param index 1892 * the position in {@code seq} following the code point that 1893 * should be returned. 1894 * @param start 1895 * the index of the first element in {@code seq}. 1896 * @return the Unicode code point or {@code char} value before {@code index} 1897 * in {@code seq}. 1898 * @throws NullPointerException 1899 * if {@code seq} is {@code null}. 1900 * @throws IndexOutOfBoundsException 1901 * if the {@code index <= start}, {@code start < 0}, 1902 * {@code index} is greater than the length of {@code seq}, or 1903 * if {@code start} is equal or greater than the length of 1904 * {@code seq}. 1905 * @since 1.5 1906 */ 1907 public static int codePointBefore(char[] seq, int index, int start) { 1908 if (seq == null) { 1909 throw new NullPointerException(); 1910 } 1911 int len = seq.length; 1912 if (index <= start || index > len || start < 0 || start >= len) { 1913 throw new IndexOutOfBoundsException(); 1914 } 1915 1916 char low = seq[--index]; 1917 if (--index < start) { 1918 return low; 1919 } 1920 char high = seq[index]; 1921 if (isSurrogatePair(high, low)) { 1922 return toCodePoint(high, low); 1923 } 1924 return low; 1925 } 1926 1927 /** 1928 * Converts the specified Unicode code point into a UTF-16 encoded sequence 1929 * and copies the value(s) into the char array {@code dst}, starting at 1930 * index {@code dstIndex}. 1931 * 1932 * @param codePoint 1933 * the Unicode code point to encode. 1934 * @param dst 1935 * the destination array to copy the encoded value into. 1936 * @param dstIndex 1937 * the index in {@code dst} from where to start copying. 1938 * @return the number of {@code char} value units copied into {@code dst}. 1939 * @throws IllegalArgumentException 1940 * if {@code codePoint} is not a valid Unicode code point. 1941 * @throws NullPointerException 1942 * if {@code dst} is {@code null}. 1943 * @throws IndexOutOfBoundsException 1944 * if {@code dstIndex} is negative, greater than or equal to 1945 * {@code dst.length} or equals {@code dst.length - 1} when 1946 * {@code codePoint} is a 1947 * {@link #isSupplementaryCodePoint(int) supplementary code point}. 1948 * @since 1.5 1949 */ 1950 public static int toChars(int codePoint, char[] dst, int dstIndex) { 1951 if (!isValidCodePoint(codePoint)) { 1952 throw new IllegalArgumentException(); 1953 } 1954 if (dst == null) { 1955 throw new NullPointerException(); 1956 } 1957 if (dstIndex < 0 || dstIndex >= dst.length) { 1958 throw new IndexOutOfBoundsException(); 1959 } 1960 1961 if (isSupplementaryCodePoint(codePoint)) { 1962 if (dstIndex == dst.length - 1) { 1963 throw new IndexOutOfBoundsException(); 1964 } 1965 // See RFC 2781, Section 2.1 1966 // http://www.faqs.org/rfcs/rfc2781.html 1967 int cpPrime = codePoint - 0x10000; 1968 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 1969 int low = 0xDC00 | (cpPrime & 0x3FF); 1970 dst[dstIndex] = (char) high; 1971 dst[dstIndex + 1] = (char) low; 1972 return 2; 1973 } 1974 1975 dst[dstIndex] = (char) codePoint; 1976 return 1; 1977 } 1978 1979 /** 1980 * Converts the specified Unicode code point into a UTF-16 encoded sequence 1981 * and returns it as a char array. 1982 * 1983 * @param codePoint 1984 * the Unicode code point to encode. 1985 * @return the UTF-16 encoded char sequence. If {@code codePoint} is a 1986 * {@link #isSupplementaryCodePoint(int) supplementary code point}, 1987 * then the returned array contains two characters, otherwise it 1988 * contains just one character. 1989 * @throws IllegalArgumentException 1990 * if {@code codePoint} is not a valid Unicode code point. 1991 * @since 1.5 1992 */ 1993 public static char[] toChars(int codePoint) { 1994 if (!isValidCodePoint(codePoint)) { 1995 throw new IllegalArgumentException(); 1996 } 1997 1998 if (isSupplementaryCodePoint(codePoint)) { 1999 int cpPrime = codePoint - 0x10000; 2000 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 2001 int low = 0xDC00 | (cpPrime & 0x3FF); 2002 return new char[] { (char) high, (char) low }; 2003 } 2004 return new char[] { (char) codePoint }; 2005 } 2006 2007 /** 2008 * Counts the number of Unicode code points in the subsequence of the 2009 * specified character sequence, as delineated by {@code beginIndex} and 2010 * {@code endIndex}. Any surrogate values with missing pair values will be 2011 * counted as one code point. 2012 * 2013 * @param seq 2014 * the {@code CharSequence} to look through. 2015 * @param beginIndex 2016 * the inclusive index to begin counting at. 2017 * @param endIndex 2018 * the exclusive index to stop counting at. 2019 * @return the number of Unicode code points. 2020 * @throws NullPointerException 2021 * if {@code seq} is {@code null}. 2022 * @throws IndexOutOfBoundsException 2023 * if {@code beginIndex < 0}, {@code beginIndex > endIndex} or 2024 * if {@code endIndex} is greater than the length of {@code seq}. 2025 * @since 1.5 2026 */ 2027 public static int codePointCount(CharSequence seq, int beginIndex, 2028 int endIndex) { 2029 if (seq == null) { 2030 throw new NullPointerException(); 2031 } 2032 int len = seq.length(); 2033 if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) { 2034 throw new IndexOutOfBoundsException(); 2035 } 2036 2037 int result = 0; 2038 for (int i = beginIndex; i < endIndex; i++) { 2039 char c = seq.charAt(i); 2040 if (isHighSurrogate(c)) { 2041 if (++i < endIndex) { 2042 c = seq.charAt(i); 2043 if (!isLowSurrogate(c)) { 2044 result++; 2045 } 2046 } 2047 } 2048 result++; 2049 } 2050 return result; 2051 } 2052 2053 /** 2054 * Counts the number of Unicode code points in the subsequence of the 2055 * specified char array, as delineated by {@code offset} and {@code count}. 2056 * Any surrogate values with missing pair values will be counted as one code 2057 * point. 2058 * 2059 * @param seq 2060 * the char array to look through 2061 * @param offset 2062 * the inclusive index to begin counting at. 2063 * @param count 2064 * the number of {@code char} values to look through in 2065 * {@code seq}. 2066 * @return the number of Unicode code points. 2067 * @throws NullPointerException 2068 * if {@code seq} is {@code null}. 2069 * @throws IndexOutOfBoundsException 2070 * if {@code offset < 0}, {@code count < 0} or if 2071 * {@code offset + count} is greater than the length of 2072 * {@code seq}. 2073 * @since 1.5 2074 */ 2075 public static int codePointCount(char[] seq, int offset, int count) { 2076 if (seq == null) { 2077 throw new NullPointerException(); 2078 } 2079 int len = seq.length; 2080 int endIndex = offset + count; 2081 if (offset < 0 || count < 0 || endIndex > len) { 2082 throw new IndexOutOfBoundsException(); 2083 } 2084 2085 int result = 0; 2086 for (int i = offset; i < endIndex; i++) { 2087 char c = seq[i]; 2088 if (isHighSurrogate(c)) { 2089 if (++i < endIndex) { 2090 c = seq[i]; 2091 if (!isLowSurrogate(c)) { 2092 result++; 2093 } 2094 } 2095 } 2096 result++; 2097 } 2098 return result; 2099 } 2100 2101 /** 2102 * Determines the index in the specified character sequence that is offset 2103 * {@code codePointOffset} code points from {@code index}. 2104 * 2105 * @param seq 2106 * the character sequence to find the index in. 2107 * @param index 2108 * the start index in {@code seq}. 2109 * @param codePointOffset 2110 * the number of code points to look backwards or forwards; may 2111 * be a negative or positive value. 2112 * @return the index in {@code seq} that is {@code codePointOffset} code 2113 * points away from {@code index}. 2114 * @throws NullPointerException 2115 * if {@code seq} is {@code null}. 2116 * @throws IndexOutOfBoundsException 2117 * if {@code index < 0}, {@code index} is greater than the 2118 * length of {@code seq}, or if there are not enough values in 2119 * {@code seq} to skip {@code codePointOffset} code points 2120 * forwards or backwards (if {@code codePointOffset} is 2121 * negative) from {@code index}. 2122 * @since 1.5 2123 */ 2124 public static int offsetByCodePoints(CharSequence seq, int index, 2125 int codePointOffset) { 2126 if (seq == null) { 2127 throw new NullPointerException(); 2128 } 2129 int len = seq.length(); 2130 if (index < 0 || index > len) { 2131 throw new IndexOutOfBoundsException(); 2132 } 2133 2134 if (codePointOffset == 0) { 2135 return index; 2136 } 2137 2138 if (codePointOffset > 0) { 2139 int codePoints = codePointOffset; 2140 int i = index; 2141 while (codePoints > 0) { 2142 codePoints--; 2143 if (i >= len) { 2144 throw new IndexOutOfBoundsException(); 2145 } 2146 if (isHighSurrogate(seq.charAt(i))) { 2147 int next = i + 1; 2148 if (next < len && isLowSurrogate(seq.charAt(next))) { 2149 i++; 2150 } 2151 } 2152 i++; 2153 } 2154 return i; 2155 } 2156 2157 assert codePointOffset < 0; 2158 int codePoints = -codePointOffset; 2159 int i = index; 2160 while (codePoints > 0) { 2161 codePoints--; 2162 i--; 2163 if (i < 0) { 2164 throw new IndexOutOfBoundsException(); 2165 } 2166 if (isLowSurrogate(seq.charAt(i))) { 2167 int prev = i - 1; 2168 if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) { 2169 i--; 2170 } 2171 } 2172 } 2173 return i; 2174 } 2175 2176 /** 2177 * Determines the index in a subsequence of the specified character array 2178 * that is offset {@code codePointOffset} code points from {@code index}. 2179 * The subsequence is delineated by {@code start} and {@code count}. 2180 * 2181 * @param seq 2182 * the character array to find the index in. 2183 * @param start 2184 * the inclusive index that marks the beginning of the 2185 * subsequence. 2186 * @param count 2187 * the number of {@code char} values to include within the 2188 * subsequence. 2189 * @param index 2190 * the start index in the subsequence of the char array. 2191 * @param codePointOffset 2192 * the number of code points to look backwards or forwards; may 2193 * be a negative or positive value. 2194 * @return the index in {@code seq} that is {@code codePointOffset} code 2195 * points away from {@code index}. 2196 * @throws NullPointerException 2197 * if {@code seq} is {@code null}. 2198 * @throws IndexOutOfBoundsException 2199 * if {@code start < 0}, {@code count < 0}, 2200 * {@code index < start}, {@code index > start + count}, 2201 * {@code start + count} is greater than the length of 2202 * {@code seq}, or if there are not enough values in 2203 * {@code seq} to skip {@code codePointOffset} code points 2204 * forward or backward (if {@code codePointOffset} is 2205 * negative) from {@code index}. 2206 * @since 1.5 2207 */ 2208 public static int offsetByCodePoints(char[] seq, int start, int count, 2209 int index, int codePointOffset) { 2210 if (seq == null) { 2211 throw new NullPointerException(); 2212 } 2213 int end = start + count; 2214 if (start < 0 || count < 0 || end > seq.length || index < start 2215 || index > end) { 2216 throw new IndexOutOfBoundsException(); 2217 } 2218 2219 if (codePointOffset == 0) { 2220 return index; 2221 } 2222 2223 if (codePointOffset > 0) { 2224 int codePoints = codePointOffset; 2225 int i = index; 2226 while (codePoints > 0) { 2227 codePoints--; 2228 if (i >= end) { 2229 throw new IndexOutOfBoundsException(); 2230 } 2231 if (isHighSurrogate(seq[i])) { 2232 int next = i + 1; 2233 if (next < end && isLowSurrogate(seq[next])) { 2234 i++; 2235 } 2236 } 2237 i++; 2238 } 2239 return i; 2240 } 2241 2242 assert codePointOffset < 0; 2243 int codePoints = -codePointOffset; 2244 int i = index; 2245 while (codePoints > 0) { 2246 codePoints--; 2247 i--; 2248 if (i < start) { 2249 throw new IndexOutOfBoundsException(); 2250 } 2251 if (isLowSurrogate(seq[i])) { 2252 int prev = i - 1; 2253 if (prev >= start && isHighSurrogate(seq[prev])) { 2254 i--; 2255 } 2256 } 2257 } 2258 return i; 2259 } 2260 2261 /** 2262 * Convenience method to determine the value of the specified character 2263 * {@code c} in the supplied radix. The value of {@code radix} must be 2264 * between MIN_RADIX and MAX_RADIX. 2265 * 2266 * @param c 2267 * the character to determine the value of. 2268 * @param radix 2269 * the radix. 2270 * @return the value of {@code c} in {@code radix} if {@code radix} lies 2271 * between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise. 2272 */ 2273 public static int digit(char c, int radix) { 2274 // BEGIN android-changed 2275 // if (radix >= MIN_RADIX && radix <= MAX_RADIX) { 2276 // if (c < 128) { 2277 // // Optimized for ASCII 2278 // int result = -1; 2279 // if ('0' <= c && c <= '9') { 2280 // result = c - '0'; 2281 // } else if ('a' <= c && c <= 'z') { 2282 // result = c - ('a' - 10); 2283 // } else if ('A' <= c && c <= 'Z') { 2284 // result = c - ('A' - 10); 2285 // } 2286 // return result < radix ? result : -1; 2287 // } 2288 // int result = BinarySearch.binarySearchRange(digitKeys, c); 2289 // if (result >= 0 && c <= digitValues[result * 2]) { 2290 // int value = (char) (c - digitValues[result * 2 + 1]); 2291 // if (value >= radix) { 2292 // return -1; 2293 // } 2294 // return value; 2295 // } 2296 // } 2297 // return -1; 2298 return UCharacter.digit(c, radix); 2299 // ENd android-changed 2300 } 2301 2302 /** 2303 * Convenience method to determine the value of the character 2304 * {@code codePoint} in the supplied radix. The value of {@code radix} must 2305 * be between MIN_RADIX and MAX_RADIX. 2306 * 2307 * @param codePoint 2308 * the character, including supplementary characters. 2309 * @param radix 2310 * the radix. 2311 * @return if {@code radix} lies between {@link #MIN_RADIX} and 2312 * {@link #MAX_RADIX} then the value of the character in the radix; 2313 * -1 otherwise. 2314 */ 2315 public static int digit(int codePoint, int radix) { 2316 return UCharacter.digit(codePoint, radix); 2317 } 2318 2319 /** 2320 * Compares this object with the specified object and indicates if they are 2321 * equal. In order to be equal, {@code object} must be an instance of 2322 * {@code Character} and have the same char value as this object. 2323 * 2324 * @param object 2325 * the object to compare this double with. 2326 * @return {@code true} if the specified object is equal to this 2327 * {@code Character}; {@code false} otherwise. 2328 */ 2329 @Override 2330 public boolean equals(Object object) { 2331 return (object instanceof Character) 2332 && (value == ((Character) object).value); 2333 } 2334 2335 /** 2336 * Returns the character which represents the specified digit in the 2337 * specified radix. The {@code radix} must be between {@code MIN_RADIX} and 2338 * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and 2339 * smaller than {@code radix}. If any of these conditions does not hold, 0 2340 * is returned. 2341 * 2342 * @param digit 2343 * the integer value. 2344 * @param radix 2345 * the radix. 2346 * @return the character which represents the {@code digit} in the 2347 * {@code radix}. 2348 */ 2349 public static char forDigit(int digit, int radix) { 2350 if (MIN_RADIX <= radix && radix <= MAX_RADIX) { 2351 if (0 <= digit && digit < radix) { 2352 return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10); 2353 } 2354 } 2355 return 0; 2356 } 2357 2358 /** 2359 * Gets the numeric value of the specified Unicode character. 2360 * 2361 * @param c 2362 * the Unicode character to get the numeric value of. 2363 * @return a non-negative numeric integer value if a numeric value for 2364 * {@code c} exists, -1 if there is no numeric value for {@code c}, 2365 * -2 if the numeric value can not be represented with an integer. 2366 */ 2367 public static int getNumericValue(char c) { 2368 // BEGIN android-changed 2369 // if (c < 128) { 2370 // // Optimized for ASCII 2371 // if (c >= '0' && c <= '9') { 2372 // return c - '0'; 2373 // } 2374 // if (c >= 'a' && c <= 'z') { 2375 // return c - ('a' - 10); 2376 // } 2377 // if (c >= 'A' && c <= 'Z') { 2378 // return c - ('A' - 10); 2379 // } 2380 // return -1; 2381 // } 2382 // int result = BinarySearch.binarySearchRange(numericKeys, c); 2383 // if (result >= 0 && c <= numericValues[result * 2]) { 2384 // char difference = numericValues[result * 2 + 1]; 2385 // if (difference == 0) { 2386 // return -2; 2387 // } 2388 // // Value is always positive, must be negative value 2389 // if (difference > c) { 2390 // return c - (short) difference; 2391 // } 2392 // return c - difference; 2393 // } 2394 // return -1; 2395 return UCharacter.getNumericValue(c); 2396 // END android-changed 2397 } 2398 2399 /** 2400 * Gets the numeric value of the specified Unicode code point. For example, 2401 * the code point '\u216B' stands for the Roman number XII, which has the 2402 * numeric value 12. 2403 * 2404 * @param codePoint 2405 * the Unicode code point to get the numeric value of. 2406 * @return a non-negative numeric integer value if a numeric value for 2407 * {@code codePoint} exists, -1 if there is no numeric value for 2408 * {@code codePoint}, -2 if the numeric value can not be 2409 * represented with an integer. 2410 */ 2411 public static int getNumericValue(int codePoint) { 2412 return UCharacter.getNumericValue(codePoint); 2413 } 2414 2415 /** 2416 * Gets the general Unicode category of the specified character. 2417 * 2418 * @param c 2419 * the character to get the category of. 2420 * @return the Unicode category of {@code c}. 2421 */ 2422 public static int getType(char c) { 2423 // BEGIN android-changed 2424 // if(c < 1000) { 2425 // return typeValuesCache[(int)c]; 2426 // } 2427 // int result = BinarySearch.binarySearchRange(typeKeys, c); 2428 // int high = typeValues[result * 2]; 2429 // if (c <= high) { 2430 // int code = typeValues[result * 2 + 1]; 2431 // if (code < 0x100) { 2432 // return code; 2433 // } 2434 // return (c & 1) == 1 ? code >> 8 : code & 0xff; 2435 // } 2436 // return UNASSIGNED; 2437 return getType((int) c); 2438 // END android-changed 2439 } 2440 2441 /** 2442 * Gets the general Unicode category of the specified code point. 2443 * 2444 * @param codePoint 2445 * the Unicode code point to get the category of. 2446 * @return the Unicode category of {@code codePoint}. 2447 */ 2448 public static int getType(int codePoint) { 2449 // BEGIN android-changed 2450 // if (codePoint < 1000 && codePoint > 0) { 2451 // return typeValuesCache[codePoint]; 2452 // } 2453 // END android-changed 2454 int type = UCharacter.getType(codePoint); 2455 2456 // the type values returned by UCharacter are not compatible with what 2457 // the spec says.RI's Character type values skip the value 17. 2458 if (type <= Character.FORMAT) { 2459 return type; 2460 } 2461 return (type + 1); 2462 } 2463 2464 /** 2465 * Gets the Unicode directionality of the specified character. 2466 * 2467 * @param c 2468 * the character to get the directionality of. 2469 * @return the Unicode directionality of {@code c}. 2470 */ 2471 public static byte getDirectionality(char c) { 2472 // BEGIN android-changed 2473 // int result = BinarySearch.binarySearchRange(bidiKeys, c); 2474 // int high = bidiValues[result * 2]; 2475 // if (c <= high) { 2476 // int code = bidiValues[result * 2 + 1]; 2477 // if (code < 0x100) { 2478 // return (byte) (code - 1); 2479 // } 2480 // return (byte) (((c & 1) == 1 ? code >> 8 : code & 0xff) - 1); 2481 // } 2482 // return DIRECTIONALITY_UNDEFINED; 2483 return getDirectionality((int)c); 2484 // END android-changed 2485 } 2486 2487 /** 2488 * Gets the Unicode directionality of the specified character. 2489 * 2490 * @param codePoint 2491 * the Unicode code point to get the directionality of. 2492 * @return the Unicode directionality of {@code codePoint}. 2493 */ 2494 public static byte getDirectionality(int codePoint) { 2495 if (getType(codePoint) == Character.UNASSIGNED) { 2496 return Character.DIRECTIONALITY_UNDEFINED; 2497 } 2498 2499 byte UCDirectionality = UCharacter.getDirectionality(codePoint); 2500 if (UCDirectionality == -1) { 2501 return -1; 2502 } 2503 return DIRECTIONALITY[UCDirectionality]; 2504 } 2505 2506 /** 2507 * Indicates whether the specified character is mirrored. 2508 * 2509 * @param c 2510 * the character to check. 2511 * @return {@code true} if {@code c} is mirrored; {@code false} 2512 * otherwise. 2513 */ 2514 public static boolean isMirrored(char c) { 2515 // BEGIN android-changed 2516 // int value = c / 16; 2517 // if (value >= mirrored.length) { 2518 // return false; 2519 // } 2520 // int bit = 1 << (c % 16); 2521 // return (mirrored[value] & bit) != 0; 2522 return isMirrored((int)c); 2523 // ENd android-changed 2524 } 2525 2526 /** 2527 * Indicates whether the specified code point is mirrored. 2528 * 2529 * @param codePoint 2530 * the code point to check. 2531 * @return {@code true} if {@code codePoint} is mirrored, {@code false} 2532 * otherwise. 2533 */ 2534 public static boolean isMirrored(int codePoint) { 2535 return UCharacter.isMirrored(codePoint); 2536 } 2537 2538 @Override 2539 public int hashCode() { 2540 return value; 2541 } 2542 2543 /** 2544 * Indicates whether the specified character is defined in the Unicode 2545 * specification. 2546 * 2547 * @param c 2548 * the character to check. 2549 * @return {@code true} if the general Unicode category of the character is 2550 * not {@code UNASSIGNED}; {@code false} otherwise. 2551 */ 2552 public static boolean isDefined(char c) { 2553 // BEGIN android-changed 2554 // return getType(c) != UNASSIGNED; 2555 return UCharacter.isDefined(c); 2556 // END android-changed 2557 } 2558 2559 /** 2560 * Indicates whether the specified code point is defined in the Unicode 2561 * specification. 2562 * 2563 * @param codePoint 2564 * the code point to check. 2565 * @return {@code true} if the general Unicode category of the code point is 2566 * not {@code UNASSIGNED}; {@code false} otherwise. 2567 */ 2568 public static boolean isDefined(int codePoint) { 2569 return UCharacter.isDefined(codePoint); 2570 } 2571 2572 /** 2573 * Indicates whether the specified character is a digit. 2574 * 2575 * @param c 2576 * the character to check. 2577 * @return {@code true} if {@code c} is a digit; {@code false} 2578 * otherwise. 2579 */ 2580 public static boolean isDigit(char c) { 2581 // Optimized case for ASCII 2582 if ('0' <= c && c <= '9') { 2583 return true; 2584 } 2585 if (c < 1632) { 2586 return false; 2587 } 2588 // BEGIN android-changed 2589 return UCharacter.isDigit(c); 2590 // END android-changed 2591 } 2592 2593 /** 2594 * Indicates whether the specified code point is a digit. 2595 * 2596 * @param codePoint 2597 * the code point to check. 2598 * @return {@code true} if {@code codePoint} is a digit; {@code false} 2599 * otherwise. 2600 */ 2601 public static boolean isDigit(int codePoint) { 2602 return UCharacter.isDigit(codePoint); 2603 } 2604 2605 /** 2606 * Indicates whether the specified character is ignorable in a Java or 2607 * Unicode identifier. 2608 * 2609 * @param c 2610 * the character to check. 2611 * @return {@code true} if {@code c} is ignorable; {@code false} otherwise. 2612 */ 2613 public static boolean isIdentifierIgnorable(char c) { 2614 // BEGIN android-changed 2615 // return (c >= 0 && c <= 8) || (c >= 0xe && c <= 0x1b) 2616 // || (c >= 0x7f && c <= 0x9f) || getType(c) == FORMAT; 2617 return UCharacter.isIdentifierIgnorable(c); 2618 // END android-changed 2619 } 2620 2621 /** 2622 * Indicates whether the specified code point is ignorable in a Java or 2623 * Unicode identifier. 2624 * 2625 * @param codePoint 2626 * the code point to check. 2627 * @return {@code true} if {@code codePoint} is ignorable; {@code false} 2628 * otherwise. 2629 */ 2630 public static boolean isIdentifierIgnorable(int codePoint) { 2631 return UCharacter.isIdentifierIgnorable(codePoint); 2632 } 2633 2634 /** 2635 * Indicates whether the specified character is an ISO control character. 2636 * 2637 * @param c 2638 * the character to check. 2639 * @return {@code true} if {@code c} is an ISO control character; 2640 * {@code false} otherwise. 2641 */ 2642 public static boolean isISOControl(char c) { 2643 return isISOControl((int)c); 2644 } 2645 2646 /** 2647 * Indicates whether the specified code point is an ISO control character. 2648 * 2649 * @param c 2650 * the code point to check. 2651 * @return {@code true} if {@code c} is an ISO control character; 2652 * {@code false} otherwise. 2653 */ 2654 public static boolean isISOControl(int c) { 2655 return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f); 2656 } 2657 2658 /** 2659 * Indicates whether the specified character is a valid part of a Java 2660 * identifier other than the first character. 2661 * 2662 * @param c 2663 * the character to check. 2664 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2665 * {@code false} otherwise. 2666 */ 2667 public static boolean isJavaIdentifierPart(char c) { 2668 // Optimized case for ASCII 2669 if (c < 128) { 2670 return (typeTags[c] & ISJAVAPART) != 0; 2671 } 2672 2673 int type = getType(c); 2674 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2675 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 2676 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2677 || type == NON_SPACING_MARK || type == COMBINING_SPACING_MARK 2678 || (c >= 0x80 && c <= 0x9f) || type == FORMAT; 2679 } 2680 2681 /** 2682 * Indicates whether the specified code point is a valid part of a Java 2683 * identifier other than the first character. 2684 * 2685 * @param codePoint 2686 * the code point to check. 2687 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2688 * {@code false} otherwise. 2689 */ 2690 public static boolean isJavaIdentifierPart(int codePoint) { 2691 int type = getType(codePoint); 2692 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2693 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 2694 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2695 || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK 2696 || isIdentifierIgnorable(codePoint); 2697 } 2698 2699 /** 2700 * Indicates whether the specified character is a valid first character for 2701 * a Java identifier. 2702 * 2703 * @param c 2704 * the character to check. 2705 * @return {@code true} if {@code c} is a valid first character of a Java 2706 * identifier; {@code false} otherwise. 2707 */ 2708 public static boolean isJavaIdentifierStart(char c) { 2709 // Optimized case for ASCII 2710 if (c < 128) { 2711 return (typeTags[c] & ISJAVASTART) != 0; 2712 } 2713 2714 int type = getType(c); 2715 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2716 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 2717 || type == LETTER_NUMBER; 2718 } 2719 2720 /** 2721 * Indicates whether the specified code point is a valid start for a Java 2722 * identifier. 2723 * 2724 * @param codePoint 2725 * the code point to check. 2726 * @return {@code true} if {@code codePoint} is a valid start of a Java 2727 * identifier; {@code false} otherwise. 2728 */ 2729 public static boolean isJavaIdentifierStart(int codePoint) { 2730 int type = getType(codePoint); 2731 return isLetter(codePoint) || type == CURRENCY_SYMBOL 2732 || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER; 2733 } 2734 2735 /** 2736 * Indicates whether the specified character is a Java letter. 2737 * 2738 * @param c 2739 * the character to check. 2740 * @return {@code true} if {@code c} is a Java letter; {@code false} 2741 * otherwise. 2742 * @deprecated Use {@link #isJavaIdentifierStart(char)} 2743 */ 2744 @Deprecated 2745 public static boolean isJavaLetter(char c) { 2746 return isJavaIdentifierStart(c); 2747 } 2748 2749 /** 2750 * Indicates whether the specified character is a Java letter or digit 2751 * character. 2752 * 2753 * @param c 2754 * the character to check. 2755 * @return {@code true} if {@code c} is a Java letter or digit; 2756 * {@code false} otherwise. 2757 * @deprecated Use {@link #isJavaIdentifierPart(char)} 2758 */ 2759 @Deprecated 2760 public static boolean isJavaLetterOrDigit(char c) { 2761 return isJavaIdentifierPart(c); 2762 } 2763 2764 /** 2765 * Indicates whether the specified character is a letter. 2766 * 2767 * @param c 2768 * the character to check. 2769 * @return {@code true} if {@code c} is a letter; {@code false} otherwise. 2770 */ 2771 public static boolean isLetter(char c) { 2772 // BEGIN android-changed 2773 // if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) { 2774 // return true; 2775 // } 2776 // if (c < 128) { 2777 // return false; 2778 // } 2779 // int type = getType(c); 2780 // return type >= UPPERCASE_LETTER && type <= OTHER_LETTER; 2781 return UCharacter.isLetter(c); 2782 // END android-changed 2783 } 2784 2785 /** 2786 * Indicates whether the specified code point is a letter. 2787 * 2788 * @param codePoint 2789 * the code point to check. 2790 * @return {@code true} if {@code codePoint} is a letter; {@code false} 2791 * otherwise. 2792 */ 2793 public static boolean isLetter(int codePoint) { 2794 return UCharacter.isLetter(codePoint); 2795 } 2796 2797 /** 2798 * Indicates whether the specified character is a letter or a digit. 2799 * 2800 * @param c 2801 * the character to check. 2802 * @return {@code true} if {@code c} is a letter or a digit; {@code false} 2803 * otherwise. 2804 */ 2805 public static boolean isLetterOrDigit(char c) { 2806 // BEGIN android-changed 2807 // int type = getType(c); 2808 // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2809 // || type == DECIMAL_DIGIT_NUMBER; 2810 return UCharacter.isLetterOrDigit(c); 2811 // END andorid-changed 2812 } 2813 2814 /** 2815 * Indicates whether the specified code point is a letter or a digit. 2816 * 2817 * @param codePoint 2818 * the code point to check. 2819 * @return {@code true} if {@code codePoint} is a letter or a digit; 2820 * {@code false} otherwise. 2821 */ 2822 public static boolean isLetterOrDigit(int codePoint) { 2823 return UCharacter.isLetterOrDigit(codePoint); 2824 } 2825 2826 /** 2827 * Indicates whether the specified character is a lower case letter. 2828 * 2829 * @param c 2830 * the character to check. 2831 * @return {@code true} if {@code c} is a lower case letter; {@code false} 2832 * otherwise. 2833 */ 2834 public static boolean isLowerCase(char c) { 2835 // BEGIN android-changed 2836 // // Optimized case for ASCII 2837 // if ('a' <= c && c <= 'z') { 2838 // return true; 2839 // } 2840 // if (c < 128) { 2841 // return false; 2842 // } 2843 // 2844 // return getType(c) == LOWERCASE_LETTER; 2845 return UCharacter.isLowerCase(c); 2846 // END android-changed 2847 } 2848 2849 /** 2850 * Indicates whether the specified code point is a lower case letter. 2851 * 2852 * @param codePoint 2853 * the code point to check. 2854 * @return {@code true} if {@code codePoint} is a lower case letter; 2855 * {@code false} otherwise. 2856 */ 2857 public static boolean isLowerCase(int codePoint) { 2858 return UCharacter.isLowerCase(codePoint); 2859 } 2860 2861 /** 2862 * Indicates whether the specified character is a Java space. 2863 * 2864 * @param c 2865 * the character to check. 2866 * @return {@code true} if {@code c} is a Java space; {@code false} 2867 * otherwise. 2868 * @deprecated Use {@link #isWhitespace(char)} 2869 */ 2870 @Deprecated 2871 public static boolean isSpace(char c) { 2872 return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' '; 2873 } 2874 2875 /** 2876 * Indicates whether the specified character is a Unicode space character. 2877 * That is, if it is a member of one of the Unicode categories Space 2878 * Separator, Line Separator, or Paragraph Separator. 2879 * 2880 * @param c 2881 * the character to check. 2882 * @return {@code true} if {@code c} is a Unicode space character, 2883 * {@code false} otherwise. 2884 */ 2885 public static boolean isSpaceChar(char c) { 2886 // BEGIN android-changed 2887 // if (c == 0x20 || c == 0xa0 || c == 0x1680) { 2888 // return true; 2889 // } 2890 // if (c < 0x2000) { 2891 // return false; 2892 // } 2893 // return c <= 0x200b || c == 0x2028 || c == 0x2029 || c == 0x202f 2894 // || c == 0x3000; 2895 return UCharacter.isSpaceChar(c); 2896 // END android-changed 2897 } 2898 2899 /** 2900 * Indicates whether the specified code point is a Unicode space character. 2901 * That is, if it is a member of one of the Unicode categories Space 2902 * Separator, Line Separator, or Paragraph Separator. 2903 * 2904 * @param codePoint 2905 * the code point to check. 2906 * @return {@code true} if {@code codePoint} is a Unicode space character, 2907 * {@code false} otherwise. 2908 */ 2909 public static boolean isSpaceChar(int codePoint) { 2910 return UCharacter.isSpaceChar(codePoint); 2911 } 2912 2913 /** 2914 * Indicates whether the specified character is a titlecase character. 2915 * 2916 * @param c 2917 * the character to check. 2918 * @return {@code true} if {@code c} is a titlecase character, {@code false} 2919 * otherwise. 2920 */ 2921 public static boolean isTitleCase(char c) { 2922 // BEGIN android-changed 2923 // if (c == '\u01c5' || c == '\u01c8' || c == '\u01cb' || c == '\u01f2') { 2924 // return true; 2925 // } 2926 // if (c >= '\u1f88' && c <= '\u1ffc') { 2927 // // 0x1f88 - 0x1f8f, 0x1f98 - 0x1f9f, 0x1fa8 - 0x1faf 2928 // if (c > '\u1faf') { 2929 // return c == '\u1fbc' || c == '\u1fcc' || c == '\u1ffc'; 2930 // } 2931 // int last = c & 0xf; 2932 // return last >= 8 && last <= 0xf; 2933 // } 2934 // return false; 2935 return UCharacter.isTitleCase(c); 2936 // END android-changed 2937 } 2938 2939 /** 2940 * Indicates whether the specified code point is a titlecase character. 2941 * 2942 * @param codePoint 2943 * the code point to check. 2944 * @return {@code true} if {@code codePoint} is a titlecase character, 2945 * {@code false} otherwise. 2946 */ 2947 public static boolean isTitleCase(int codePoint) { 2948 return UCharacter.isTitleCase(codePoint); 2949 } 2950 2951 /** 2952 * Indicates whether the specified character is valid as part of a Unicode 2953 * identifier other than the first character. 2954 * 2955 * @param c 2956 * the character to check. 2957 * @return {@code true} if {@code c} is valid as part of a Unicode 2958 * identifier; {@code false} otherwise. 2959 */ 2960 public static boolean isUnicodeIdentifierPart(char c) { 2961 // BEGIN android-changed 2962 // int type = getType(c); 2963 // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2964 // || type == CONNECTOR_PUNCTUATION 2965 // || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2966 // || type == NON_SPACING_MARK || type == COMBINING_SPACING_MARK 2967 // || isIdentifierIgnorable(c); 2968 return UCharacter.isUnicodeIdentifierPart(c); 2969 // END android-changed 2970 } 2971 2972 /** 2973 * Indicates whether the specified code point is valid as part of a Unicode 2974 * identifier other than the first character. 2975 * 2976 * @param codePoint 2977 * the code point to check. 2978 * @return {@code true} if {@code codePoint} is valid as part of a Unicode 2979 * identifier; {@code false} otherwise. 2980 */ 2981 public static boolean isUnicodeIdentifierPart(int codePoint) { 2982 return UCharacter.isUnicodeIdentifierPart(codePoint); 2983 } 2984 2985 /** 2986 * Indicates whether the specified character is a valid initial character 2987 * for a Unicode identifier. 2988 * 2989 * @param c 2990 * the character to check. 2991 * @return {@code true} if {@code c} is a valid first character for a 2992 * Unicode identifier; {@code false} otherwise. 2993 */ 2994 public static boolean isUnicodeIdentifierStart(char c) { 2995 // BEGIN android-changed 2996 // int type = getType(c); 2997 // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2998 // || type == LETTER_NUMBER; 2999 return UCharacter.isUnicodeIdentifierStart(c); 3000 // END android-changed 3001 } 3002 3003 /** 3004 * Indicates whether the specified code point is a valid initial character 3005 * for a Unicode identifier. 3006 * 3007 * @param codePoint 3008 * the code point to check. 3009 * @return {@code true} if {@code codePoint} is a valid first character for 3010 * a Unicode identifier; {@code false} otherwise. 3011 */ 3012 public static boolean isUnicodeIdentifierStart(int codePoint) { 3013 return UCharacter.isUnicodeIdentifierStart(codePoint); 3014 } 3015 3016 /** 3017 * Indicates whether the specified character is an upper case letter. 3018 * 3019 * @param c 3020 * the character to check. 3021 * @return {@code true} if {@code c} is a upper case letter; {@code false} 3022 * otherwise. 3023 */ 3024 public static boolean isUpperCase(char c) { 3025 // Optimized case for ASCII 3026 if ('A' <= c && c <= 'Z') { 3027 return true; 3028 } 3029 if (c < 128) { 3030 return false; 3031 } 3032 // BEGIN android-changed 3033 return UCharacter.isUpperCase(c); 3034 // END android-changed 3035 } 3036 3037 /** 3038 * Indicates whether the specified code point is an upper case letter. 3039 * 3040 * @param codePoint 3041 * the code point to check. 3042 * @return {@code true} if {@code codePoint} is a upper case letter; 3043 * {@code false} otherwise. 3044 */ 3045 public static boolean isUpperCase(int codePoint) { 3046 return UCharacter.isUpperCase(codePoint); 3047 } 3048 3049 /** 3050 * Indicates whether the specified character is a whitespace character in 3051 * Java. 3052 * 3053 * @param c 3054 * the character to check. 3055 * @return {@code true} if the supplied {@code c} is a whitespace character 3056 * in Java; {@code false} otherwise. 3057 */ 3058 public static boolean isWhitespace(char c) { 3059 // BEGIN android-changed 3060 // // Optimized case for ASCII 3061 // if ((c >= 0x1c && c <= 0x20) || (c >= 0x9 && c <= 0xd)) { 3062 // return true; 3063 // } 3064 // if (c == 0x1680) { 3065 // return true; 3066 // } 3067 // if (c < 0x2000 || c == 0x2007) { 3068 // return false; 3069 // } 3070 // return c <= 0x200b || c == 0x2028 || c == 0x2029 || c == 0x3000; 3071 return UCharacter.isWhitespace(c); 3072 // END android-changed 3073 } 3074 3075 /** 3076 * Indicates whether the specified code point is a whitespace character in 3077 * Java. 3078 * 3079 * @param codePoint 3080 * the code point to check. 3081 * @return {@code true} if the supplied {@code c} is a whitespace character 3082 * in Java; {@code false} otherwise. 3083 */ 3084 public static boolean isWhitespace(int codePoint) { 3085 //FIXME depends on ICU when the codePoint is '\u2007' 3086 return UCharacter.isWhitespace(codePoint); 3087 3088 } 3089 3090 /** 3091 * Reverses the order of the first and second byte in the specified 3092 * character. 3093 * 3094 * @param c 3095 * the character to reverse. 3096 * @return the character with reordered bytes. 3097 */ 3098 public static char reverseBytes(char c) { 3099 return (char)((c<<8) | (c>>8)); 3100 } 3101 3102 /** 3103 * Returns the lower case equivalent for the specified character if the 3104 * character is an upper case letter. Otherwise, the specified character is 3105 * returned unchanged. 3106 * 3107 * @param c 3108 * the character 3109 * @return if {@code c} is an upper case character then its lower case 3110 * counterpart, otherwise just {@code c}. 3111 */ 3112 public static char toLowerCase(char c) { 3113 // BEGIN android-changed 3114 // // Optimized case for ASCII 3115 // if ('A' <= c && c <= 'Z') { 3116 // return (char) (c + ('a' - 'A')); 3117 // } 3118 // if (c < 192) {// || c == 215 || (c > 222 && c < 256)) { 3119 // return c; 3120 // } 3121 // if (c<1000) { 3122 // return (char)lowercaseValuesCache[c-192]; 3123 // } 3124 // 3125 // int result = BinarySearch.binarySearchRange(lowercaseKeys, c); 3126 // if (result >= 0) { 3127 // boolean by2 = false; 3128 // char start = lowercaseKeys.charAt(result); 3129 // char end = lowercaseValues[result * 2]; 3130 // if ((start & 0x8000) != (end & 0x8000)) { 3131 // end ^= 0x8000; 3132 // by2 = true; 3133 // } 3134 // if (c <= end) { 3135 // if (by2 && (c & 1) != (start & 1)) { 3136 // return c; 3137 // } 3138 // char mapping = lowercaseValues[result * 2 + 1]; 3139 // return (char) (c + mapping); 3140 // } 3141 // } 3142 // return c; 3143 return (char)UCharacter.toLowerCase(c); 3144 // END android-changed 3145 } 3146 3147 /** 3148 * Returns the lower case equivalent for the specified code point if it is 3149 * an upper case letter. Otherwise, the specified code point is returned 3150 * unchanged. 3151 * 3152 * @param codePoint 3153 * the code point to check. 3154 * @return if {@code codePoint} is an upper case character then its lower 3155 * case counterpart, otherwise just {@code codePoint}. 3156 */ 3157 public static int toLowerCase(int codePoint) { 3158 return UCharacter.toLowerCase(codePoint); 3159 } 3160 3161 @Override 3162 public String toString() { 3163 return String.valueOf(value); 3164 } 3165 3166 /** 3167 * Converts the specified character to its string representation. 3168 * 3169 * @param value 3170 * the character to convert. 3171 * @return the character converted to a string. 3172 */ 3173 public static String toString(char value) { 3174 return String.valueOf(value); 3175 } 3176 3177 /** 3178 * Returns the title case equivalent for the specified character if it 3179 * exists. Otherwise, the specified character is returned unchanged. 3180 * 3181 * @param c 3182 * the character to convert. 3183 * @return the title case equivalent of {@code c} if it exists, otherwise 3184 * {@code c}. 3185 */ 3186 public static char toTitleCase(char c) { 3187 // BEGIN android-changed 3188 // if (isTitleCase(c)) { 3189 // return c; 3190 // } 3191 // int result = BinarySearch.binarySearch(titlecaseKeys, c); 3192 // if (result >= 0) { 3193 // return titlecaseValues[result]; 3194 // } 3195 // return toUpperCase(c); 3196 return (char)UCharacter.toTitleCase(c); 3197 // ENd android-changed 3198 } 3199 3200 /** 3201 * Returns the title case equivalent for the specified code point if it 3202 * exists. Otherwise, the specified code point is returned unchanged. 3203 * 3204 * @param codePoint 3205 * the code point to convert. 3206 * @return the title case equivalent of {@code codePoint} if it exists, 3207 * otherwise {@code codePoint}. 3208 */ 3209 public static int toTitleCase(int codePoint) { 3210 return UCharacter.toTitleCase(codePoint); 3211 } 3212 3213 /** 3214 * Returns the upper case equivalent for the specified character if the 3215 * character is a lower case letter. Otherwise, the specified character is 3216 * returned unchanged. 3217 * 3218 * @param c 3219 * the character to convert. 3220 * @return if {@code c} is a lower case character then its upper case 3221 * counterpart, otherwise just {@code c}. 3222 */ 3223 public static char toUpperCase(char c) { 3224 // BEGIN android-changed 3225 // // Optimized case for ASCII 3226 // if ('a' <= c && c <= 'z') { 3227 // return (char) (c - ('a' - 'A')); 3228 // } 3229 // if (c < 181) { 3230 // return c; 3231 // } 3232 // if (c<1000) { 3233 // return (char)uppercaseValuesCache[(int)c-181]; 3234 // } 3235 // int result = BinarySearch.binarySearchRange(uppercaseKeys, c); 3236 // if (result >= 0) { 3237 // boolean by2 = false; 3238 // char start = uppercaseKeys.charAt(result); 3239 // char end = uppercaseValues[result * 2]; 3240 // if ((start & 0x8000) != (end & 0x8000)) { 3241 // end ^= 0x8000; 3242 // by2 = true; 3243 // } 3244 // if (c <= end) { 3245 // if (by2 && (c & 1) != (start & 1)) { 3246 // return c; 3247 // } 3248 // char mapping = uppercaseValues[result * 2 + 1]; 3249 // return (char) (c + mapping); 3250 // } 3251 // } 3252 // return c; 3253 return (char)UCharacter.toUpperCase(c); 3254 // END android-changed 3255 } 3256 3257 /** 3258 * Returns the upper case equivalent for the specified code point if the 3259 * code point is a lower case letter. Otherwise, the specified code point is 3260 * returned unchanged. 3261 * 3262 * @param codePoint 3263 * the code point to convert. 3264 * @return if {@code codePoint} is a lower case character then its upper 3265 * case counterpart, otherwise just {@code codePoint}. 3266 */ 3267 public static int toUpperCase(int codePoint) { 3268 return UCharacter.toUpperCase(codePoint); 3269 } 3270 3271} 3272