Character.java revision fdb2704414a9ed92394ada0d1395e4db86889465
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.lang; 19 20import java.io.Serializable; 21// BEGIN android-removed 22// import java.util.SortedMap; 23// import java.util.TreeMap; 24// import org.apache.harmony.luni.util.BinarySearch; 25// END android-removed 26import com.ibm.icu4jni.lang.UCharacter; 27 28/** 29 * <p> 30 * Character is the wrapper for the primitive type <code>char</code>. This 31 * class also provides a number of utility methods for working with 32 * <code>char</code>s. 33 * </p> 34 * 35 * <p> 36 * Character data is based upon the Unicode Standard, 4.0. The Unicode 37 * specification, character tables and other information are available at <a 38 * href="http://www.unicode.org/">http://www.unicode.org/</a>. 39 * </p> 40 * 41 * <p> 42 * Unicode characters are referred to as <i>code points</i>. The range of valid 43 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i> 44 * is the code point range U+0000 to U+FFFF. Characters above the BMP are 45 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16 46 * encoding and <code>char</code> pairs are used to represent code points in 47 * the supplementary range. A pair of <code>char</code> values that represent 48 * a supplementary character are made up of a <i>high surrogate</i> with a 49 * value range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value 50 * range of 0xDC00 to 0xDFFF. 51 * </p> 52 * 53 * <p> 54 * On the Java platform a <code>char</code> value represents either a single 55 * BMP code point or a UTF-16 unit that's part of a surrogate pair. The 56 * <code>int</code> type is used to represent all Unicode code points. 57 * </p> 58 * 59 * @since 1.0 60 */ 61public final class Character implements Serializable, Comparable<Character> { 62 private static final long serialVersionUID = 3786198910865385080L; 63 64 private final char value; 65 66 /** 67 * The minimum possible Character value. 68 */ 69 public static final char MIN_VALUE = '\u0000'; 70 71 /** 72 * The maximum possible Character value. 73 */ 74 public static final char MAX_VALUE = '\uffff'; 75 76 /** 77 * The minimum possible radix used for conversions between Characters and 78 * integers. 79 */ 80 public static final int MIN_RADIX = 2; 81 82 /** 83 * The maximum possible radix used for conversions between Characters and 84 * integers. 85 */ 86 public static final int MAX_RADIX = 36; 87 88 /** 89 * The <code>char</code> {@link Class} object. 90 */ 91 @SuppressWarnings("unchecked") 92 public static final Class<Character> TYPE = (Class<Character>) new char[0] 93 .getClass().getComponentType(); 94 95 // Note: This can't be set to "char.class", since *that* is 96 // defined to be "java.lang.Character.TYPE"; 97 98 /** 99 * Unicode category constant Cn. 100 */ 101 public static final byte UNASSIGNED = 0; 102 103 /** 104 * Unicode category constant Lu. 105 */ 106 public static final byte UPPERCASE_LETTER = 1; 107 108 /** 109 * Unicode category constant Ll. 110 */ 111 public static final byte LOWERCASE_LETTER = 2; 112 113 /** 114 * Unicode category constant Lt. 115 */ 116 public static final byte TITLECASE_LETTER = 3; 117 118 /** 119 * Unicode category constant Lm. 120 */ 121 public static final byte MODIFIER_LETTER = 4; 122 123 /** 124 * Unicode category constant Lo. 125 */ 126 public static final byte OTHER_LETTER = 5; 127 128 /** 129 * Unicode category constant Mn. 130 */ 131 public static final byte NON_SPACING_MARK = 6; 132 133 /** 134 * Unicode category constant Me. 135 */ 136 public static final byte ENCLOSING_MARK = 7; 137 138 /** 139 * Unicode category constant Mc. 140 */ 141 public static final byte COMBINING_SPACING_MARK = 8; 142 143 /** 144 * Unicode category constant Nd. 145 */ 146 public static final byte DECIMAL_DIGIT_NUMBER = 9; 147 148 /** 149 * Unicode category constant Nl. 150 */ 151 public static final byte LETTER_NUMBER = 10; 152 153 /** 154 * Unicode category constant No. 155 */ 156 public static final byte OTHER_NUMBER = 11; 157 158 /** 159 * Unicode category constant Zs. 160 */ 161 public static final byte SPACE_SEPARATOR = 12; 162 163 /** 164 * Unicode category constant Zl. 165 */ 166 public static final byte LINE_SEPARATOR = 13; 167 168 /** 169 * Unicode category constant Zp. 170 */ 171 public static final byte PARAGRAPH_SEPARATOR = 14; 172 173 /** 174 * Unicode category constant Cc. 175 */ 176 public static final byte CONTROL = 15; 177 178 /** 179 * Unicode category constant Cf. 180 */ 181 public static final byte FORMAT = 16; 182 183 /** 184 * Unicode category constant Co. 185 */ 186 public static final byte PRIVATE_USE = 18; 187 188 /** 189 * Unicode category constant Cs. 190 */ 191 public static final byte SURROGATE = 19; 192 193 /** 194 * Unicode category constant Pd. 195 */ 196 public static final byte DASH_PUNCTUATION = 20; 197 198 /** 199 * Unicode category constant Ps. 200 */ 201 public static final byte START_PUNCTUATION = 21; 202 203 /** 204 * Unicode category constant Pe. 205 */ 206 public static final byte END_PUNCTUATION = 22; 207 208 /** 209 * Unicode category constant Pc. 210 */ 211 public static final byte CONNECTOR_PUNCTUATION = 23; 212 213 /** 214 * Unicode category constant Po. 215 */ 216 public static final byte OTHER_PUNCTUATION = 24; 217 218 /** 219 * Unicode category constant Sm. 220 */ 221 public static final byte MATH_SYMBOL = 25; 222 223 /** 224 * Unicode category constant Sc. 225 */ 226 public static final byte CURRENCY_SYMBOL = 26; 227 228 /** 229 * Unicode category constant Sk. 230 */ 231 public static final byte MODIFIER_SYMBOL = 27; 232 233 /** 234 * Unicode category constant So. 235 */ 236 public static final byte OTHER_SYMBOL = 28; 237 238 /** 239 * Unicode category constant Pi. 240 * @since 1.4 241 */ 242 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 243 244 /** 245 * Unicode category constant Pf. 246 * @since 1.4 247 */ 248 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 249 250 /** 251 * Unicode bidirectional constant. 252 * @since 1.4 253 */ 254 public static final byte DIRECTIONALITY_UNDEFINED = -1; 255 256 /** 257 * Unicode bidirectional constant L. 258 * @since 1.4 259 */ 260 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 261 262 /** 263 * Unicode bidirectional constant R. 264 * @since 1.4 265 */ 266 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 267 268 /** 269 * Unicode bidirectional constant AL. 270 * @since 1.4 271 */ 272 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 273 274 /** 275 * Unicode bidirectional constant EN. 276 * @since 1.4 277 */ 278 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 279 280 /** 281 * Unicode bidirectional constant ES. 282 * @since 1.4 283 */ 284 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 285 286 /** 287 * Unicode bidirectional constant ET. 288 * @since 1.4 289 */ 290 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 291 292 /** 293 * Unicode bidirectional constant AN. 294 * @since 1.4 295 */ 296 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 297 298 /** 299 * Unicode bidirectional constant CS. 300 * @since 1.4 301 */ 302 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 303 304 /** 305 * Unicode bidirectional constant NSM. 306 * @since 1.4 307 */ 308 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 309 310 /** 311 * Unicode bidirectional constant BN. 312 * @since 1.4 313 */ 314 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 315 316 /** 317 * Unicode bidirectional constant B. 318 * @since 1.4 319 */ 320 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 321 322 /** 323 * Unicode bidirectional constant S. 324 * @since 1.4 325 */ 326 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 327 328 /** 329 * Unicode bidirectional constant WS. 330 * @since 1.4 331 */ 332 public static final byte DIRECTIONALITY_WHITESPACE = 12; 333 334 /** 335 * Unicode bidirectional constant ON. 336 * @since 1.4 337 */ 338 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 339 340 /** 341 * Unicode bidirectional constant LRE. 342 * @since 1.4 343 */ 344 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 345 346 /** 347 * Unicode bidirectional constant LRO. 348 * @since 1.4 349 */ 350 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 351 352 /** 353 * Unicode bidirectional constant RLE. 354 * @since 1.4 355 */ 356 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 357 358 /** 359 * Unicode bidirectional constant RLO. 360 * @since 1.4 361 */ 362 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 363 364 /** 365 * Unicode bidirectional constant PDF. 366 * @since 1.4 367 */ 368 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 369 370 /** 371 * <p> 372 * Minimum value of a high surrogate or leading surrogate unit in UTF-16 373 * encoding - <code>'\uD800'</code>. 374 * </p> 375 * 376 * @since 1.5 377 */ 378 public static final char MIN_HIGH_SURROGATE = '\uD800'; 379 380 /** 381 * <p> 382 * Maximum value of a high surrogate or leading surrogate unit in UTF-16 383 * encoding - <code>'\uDBFF'</code>. 384 * </p> 385 * 386 * @since 1.5 387 */ 388 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 389 390 /** 391 * <p> 392 * Minimum value of a low surrogate or trailing surrogate unit in UTF-16 393 * encoding - <code>'\uDC00'</code>. 394 * </p> 395 * 396 * @since 1.5 397 */ 398 public static final char MIN_LOW_SURROGATE = '\uDC00'; 399 400 /** 401 * Maximum value of a low surrogate or trailing surrogate unit in UTF-16 402 * encoding - <code>'\uDFFF'</code>. 403 * </p> 404 * 405 * @since 1.5 406 */ 407 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 408 409 /** 410 * <p> 411 * Minimum value of a surrogate unit in UTF-16 encoding - <code>'\uD800'</code>. 412 * </p> 413 * 414 * @since 1.5 415 */ 416 public static final char MIN_SURROGATE = '\uD800'; 417 418 /** 419 * <p> 420 * Maximum value of a surrogate unit in UTF-16 encoding - <code>'\uDFFF'</code>. 421 * </p> 422 * 423 * @since 1.5 424 */ 425 public static final char MAX_SURROGATE = '\uDFFF'; 426 427 /** 428 * <p> 429 * Minimum value of a supplementary code point - <code>U+010000</code>. 430 * </p> 431 * 432 * @since 1.5 433 */ 434 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; 435 436 /** 437 * <p> 438 * Minimum code point value - <code>U+0000</code>. 439 * </p> 440 * 441 * @since 1.5 442 */ 443 public static final int MIN_CODE_POINT = 0x000000; 444 445 /** 446 * <p> 447 * Maximum code point value - <code>U+10FFFF</code>. 448 * </p> 449 * 450 * @since 1.5 451 */ 452 public static final int MAX_CODE_POINT = 0x10FFFF; 453 454 /** 455 * <p> 456 * Constant for the number of bits to represent a <code>char</code> in 457 * two's compliment form. 458 * </p> 459 * 460 * @since 1.5 461 */ 462 public static final int SIZE = 16; 463 464 // BEGIN android-removed 465 // removed strings containing information about chars that now are read from 466 // icu data. 467 // END android-removed 468 469 private static final char[] typeTags = "\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0002\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0000\u0000\u0000\u0000\u0003\u0000\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0003\u0000\u0000\u0000\u0000\u0002" 470 .getValue(); 471 472 private static final byte[] DIRECTIONALITY = new byte[] { 473 DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT, 474 DIRECTIONALITY_EUROPEAN_NUMBER, 475 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, 476 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, 477 DIRECTIONALITY_ARABIC_NUMBER, 478 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, 479 DIRECTIONALITY_PARAGRAPH_SEPARATOR, 480 DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE, 481 DIRECTIONALITY_OTHER_NEUTRALS, 482 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, 483 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, 484 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, 485 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, 486 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, 487 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, 488 DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL }; 489 490 private static final int ISJAVASTART = 1; 491 492 private static final int ISJAVAPART = 2; 493 494 // BEGIN android-removed 495 // removed strings containing information about chars that now are read from 496 // icu data. 497 // END android-removed 498 499 /* 500 * Subset represents a subset of characters. 501 */ 502 public static class Subset { 503 String name; 504 505 protected Subset(String string) { 506 if (string == null) { 507 throw new NullPointerException(); 508 } 509 name = string; 510 } 511 512 /** 513 * Compares the specified object to this Subset and returns true if they 514 * are equal. The object must be the same instance of Subset. 515 * 516 * @param object 517 * the object to compare 518 * @return true if the specified object is equal to this Subset, false 519 * otherwise 520 * 521 * @see #hashCode 522 */ 523 @Override 524 public final boolean equals(Object object) { 525 return super.equals(object); 526 } 527 528 /** 529 * Returns an integer hash code for the receiver. Objects which are 530 * equal answer the same value for this method. 531 * 532 * @return the receiver's hash 533 * 534 * @see #equals 535 */ 536 @Override 537 public final int hashCode() { 538 return super.hashCode(); 539 } 540 541 /** 542 * Returns the string representation of this Subset. 543 * 544 * @return the string representation of this Subset 545 */ 546 @Override 547 public final String toString() { 548 return name; 549 } 550 } 551 552 // BEGIN android-changed 553 554 /** 555 * Blocks of characters, as defined by the Unicode 4.0.1 specification. 556 * @since 1.2 557 */ 558 public static final class UnicodeBlock extends Subset { 559 /** 560 * The "Surrogates Area" Unicode Block. 561 * @deprecated As of Java 5, this block has been replaced by {@link #HIGH_SURROGATES}, {@link #HIGH_PRIVATE_USE_SURROGATES} and {@link #LOW_SURROGATES}. 562 */ 563 @Deprecated 564 public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA"); 565 /** 566 * The "Basic Latin" Unicode Block. 567 * @since 1.2 568 */ 569 public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN"); 570 /** 571 * The "Latin-1 Supplement" Unicode Block. 572 * @since 1.2 573 */ 574 public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT"); 575 /** 576 * The "Latin Extended-A" Unicode Block. 577 * @since 1.2 578 */ 579 public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A"); 580 /** 581 * The "Latin Extended-B" Unicode Block. 582 * @since 1.2 583 */ 584 public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B"); 585 /** 586 * The "IPA Extensions" Unicode Block. 587 * @since 1.2 588 */ 589 public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS"); 590 /** 591 * The "Spacing Modifier Letters" Unicode Block. 592 * @since 1.2 593 */ 594 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS"); 595 /** 596 * The "Combining Diacritical Marks" Unicode Block. 597 * @since 1.2 598 */ 599 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS"); 600 /** 601 * The "Greek and Coptic" Unicode Block. Previously referred to as "Greek". 602 * @since 1.2 603 */ 604 public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK"); 605 /** 606 * The "Cyrillic" Unicode Block. 607 * @since 1.2 608 */ 609 public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC"); 610 /** 611 * The "Cyrillic Supplement" Unicode Block. Previously referred to as "Cyrillic Supplementary". 612 * @since 1.5 613 */ 614 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY"); 615 /** 616 * The "Armenian" Unicode Block. 617 * @since 1.2 618 */ 619 public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN"); 620 /** 621 * The "Hebrew" Unicode Block. 622 * @since 1.2 623 */ 624 public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW"); 625 /** 626 * The "Arabic" Unicode Block. 627 * @since 1.2 628 */ 629 public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC"); 630 /** 631 * The "Syriac" Unicode Block. 632 * @since 1.4 633 */ 634 public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC"); 635 /** 636 * The "Thaana" Unicode Block. 637 * @since 1.4 638 */ 639 public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA"); 640 /** 641 * The "Devanagari" Unicode Block. 642 * @since 1.2 643 */ 644 public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI"); 645 /** 646 * The "Bengali" Unicode Block. 647 * @since 1.2 648 */ 649 public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI"); 650 /** 651 * The "Gurmukhi" Unicode Block. 652 * @since 1.2 653 */ 654 public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI"); 655 /** 656 * The "Gujarati" Unicode Block. 657 * @since 1.2 658 */ 659 public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI"); 660 /** 661 * The "Oriya" Unicode Block. 662 * @since 1.2 663 */ 664 public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA"); 665 /** 666 * The "Tamil" Unicode Block. 667 * @since 1.2 668 */ 669 public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL"); 670 /** 671 * The "Telugu" Unicode Block. 672 * @since 1.2 673 */ 674 public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU"); 675 /** 676 * The "Kannada" Unicode Block. 677 * @since 1.2 678 */ 679 public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA"); 680 /** 681 * The "Malayalam" Unicode Block. 682 * @since 1.2 683 */ 684 public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM"); 685 /** 686 * The "Sinhala" Unicode Block. 687 * @since 1.4 688 */ 689 public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA"); 690 /** 691 * The "Thai" Unicode Block. 692 * @since 1.2 693 */ 694 public static final UnicodeBlock THAI = new UnicodeBlock("THAI"); 695 /** 696 * The "Lao" Unicode Block. 697 * @since 1.2 698 */ 699 public static final UnicodeBlock LAO = new UnicodeBlock("LAO"); 700 /** 701 * The "Tibetan" Unicode Block. 702 * @since 1.2 703 */ 704 public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN"); 705 /** 706 * The "Myanmar" Unicode Block. 707 * @since 1.4 708 */ 709 public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR"); 710 /** 711 * The "Georgian" Unicode Block. 712 * @since 1.2 713 */ 714 public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN"); 715 /** 716 * The "Hangul Jamo" Unicode Block. 717 * @since 1.2 718 */ 719 public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO"); 720 /** 721 * The "Ethiopic" Unicode Block. 722 * @since 1.4 723 */ 724 public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC"); 725 /** 726 * The "Cherokee" Unicode Block. 727 * @since 1.4 728 */ 729 public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE"); 730 /** 731 * The "Unified Canadian Aboriginal Syllabics" Unicode Block. 732 * @since 1.4 733 */ 734 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS"); 735 /** 736 * The "Ogham" Unicode Block. 737 * @since 1.4 738 */ 739 public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM"); 740 /** 741 * The "Runic" Unicode Block. 742 * @since 1.4 743 */ 744 public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC"); 745 /** 746 * The "Tagalog" Unicode Block. 747 * @since 1.5 748 */ 749 public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG"); 750 /** 751 * The "Hanunoo" Unicode Block. 752 * @since 1.5 753 */ 754 public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO"); 755 /** 756 * The "Buhid" Unicode Block. 757 * @since 1.5 758 */ 759 public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID"); 760 /** 761 * The "Tagbanwa" Unicode Block. 762 * @since 1.5 763 */ 764 public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA"); 765 /** 766 * The "Khmer" Unicode Block. 767 * @since 1.4 768 */ 769 public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER"); 770 /** 771 * The "Mongolian" Unicode Block. 772 * @since 1.4 773 */ 774 public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN"); 775 /** 776 * The "Limbu" Unicode Block. 777 * @since 1.5 778 */ 779 public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU"); 780 /** 781 * The "Tai Le" Unicode Block. 782 * @since 1.5 783 */ 784 public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE"); 785 /** 786 * The "Khmer Symbols" Unicode Block. 787 * @since 1.5 788 */ 789 public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS"); 790 /** 791 * The "Phonetic Extensions" Unicode Block. 792 * @since 1.5 793 */ 794 public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS"); 795 /** 796 * The "Latin Extended Additional" Unicode Block. 797 * @since 1.2 798 */ 799 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL"); 800 /** 801 * The "Greek Extended" Unicode Block. 802 * @since 1.2 803 */ 804 public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED"); 805 /** 806 * The "General Punctuation" Unicode Block. 807 * @since 1.2 808 */ 809 public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION"); 810 /** 811 * The "Superscripts and Subscripts" Unicode Block. 812 * @since 1.2 813 */ 814 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS"); 815 /** 816 * The "Currency Symbols" Unicode Block. 817 * @since 1.2 818 */ 819 public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS"); 820 /** 821 * The "Combining Diacritical Marks for Symbols" Unicode Block. Previously referred to as "Combining Marks for Symbols". 822 * @since 1.2 823 */ 824 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS"); 825 /** 826 * The "Letterlike Symbols" Unicode Block. 827 * @since 1.2 828 */ 829 public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS"); 830 /** 831 * The "Number Forms" Unicode Block. 832 * @since 1.2 833 */ 834 public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS"); 835 /** 836 * The "Arrows" Unicode Block. 837 * @since 1.2 838 */ 839 public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS"); 840 /** 841 * The "Mathematical Operators" Unicode Block. 842 * @since 1.2 843 */ 844 public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS"); 845 /** 846 * The "Miscellaneous Technical" Unicode Block. 847 * @since 1.2 848 */ 849 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL"); 850 /** 851 * The "Control Pictures" Unicode Block. 852 * @since 1.2 853 */ 854 public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES"); 855 /** 856 * The "Optical Character Recognition" Unicode Block. 857 * @since 1.2 858 */ 859 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION"); 860 /** 861 * The "Enclosed Alphanumerics" Unicode Block. 862 * @since 1.2 863 */ 864 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS"); 865 /** 866 * The "Box Drawing" Unicode Block. 867 * @since 1.2 868 */ 869 public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING"); 870 /** 871 * The "Block Elements" Unicode Block. 872 * @since 1.2 873 */ 874 public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS"); 875 /** 876 * The "Geometric Shapes" Unicode Block. 877 * @since 1.2 878 */ 879 public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES"); 880 /** 881 * The "Miscellaneous Symbols" Unicode Block. 882 * @since 1.2 883 */ 884 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS"); 885 /** 886 * The "Dingbats" Unicode Block. 887 * @since 1.2 888 */ 889 public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS"); 890 /** 891 * The "Miscellaneous Mathematical Symbols-A" Unicode Block. 892 * @since 1.5 893 */ 894 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A"); 895 /** 896 * The "Supplemental Arrows-A" Unicode Block. 897 * @since 1.5 898 */ 899 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A"); 900 /** 901 * The "Braille Patterns" Unicode Block. 902 * @since 1.4 903 */ 904 public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS"); 905 /** 906 * The "Supplemental Arrows-B" Unicode Block. 907 * @since 1.5 908 */ 909 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B"); 910 /** 911 * The "Miscellaneous Mathematical Symbols-B" Unicode Block. 912 * @since 1.5 913 */ 914 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B"); 915 /** 916 * The "Supplemental Mathematical Operators" Unicode Block. 917 * @since 1.5 918 */ 919 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS"); 920 /** 921 * The "Miscellaneous Symbols and Arrows" Unicode Block. 922 * @since 1.2 923 */ 924 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS"); 925 /** 926 * The "CJK Radicals Supplement" Unicode Block. 927 * @since 1.4 928 */ 929 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT"); 930 /** 931 * The "Kangxi Radicals" Unicode Block. 932 * @since 1.4 933 */ 934 public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS"); 935 /** 936 * The "Ideographic Description Characters" Unicode Block. 937 * @since 1.4 938 */ 939 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS"); 940 /** 941 * The "CJK Symbols and Punctuation" Unicode Block. 942 * @since 1.2 943 */ 944 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION"); 945 /** 946 * The "Hiragana" Unicode Block. 947 * @since 1.2 948 */ 949 public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA"); 950 /** 951 * The "Katakana" Unicode Block. 952 * @since 1.2 953 */ 954 public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA"); 955 /** 956 * The "Bopomofo" Unicode Block. 957 * @since 1.2 958 */ 959 public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO"); 960 /** 961 * The "Hangul Compatibility Jamo" Unicode Block. 962 * @since 1.2 963 */ 964 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO"); 965 /** 966 * The "Kanbun" Unicode Block. 967 * @since 1.2 968 */ 969 public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN"); 970 /** 971 * The "Bopomofo Extended" Unicode Block. 972 * @since 1.4 973 */ 974 public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED"); 975 /** 976 * The "Katakana Phonetic Extensions" Unicode Block. 977 * @since 1.5 978 */ 979 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS"); 980 /** 981 * The "Enclosed CJK Letters and Months" Unicode Block. 982 * @since 1.2 983 */ 984 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS"); 985 /** 986 * The "CJK Compatibility" Unicode Block. 987 * @since 1.2 988 */ 989 public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY"); 990 /** 991 * The "CJK Unified Ideographs Extension A" Unicode Block. 992 * @since 1.4 993 */ 994 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A"); 995 /** 996 * The "Yijing Hexagram Symbols" Unicode Block. 997 * @since 1.5 998 */ 999 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS"); 1000 /** 1001 * The "CJK Unified Ideographs" Unicode Block. 1002 * @since 1.2 1003 */ 1004 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS"); 1005 /** 1006 * The "Yi Syllables" Unicode Block. 1007 * @since 1.4 1008 */ 1009 public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES"); 1010 /** 1011 * The "Yi Radicals" Unicode Block. 1012 * @since 1.4 1013 */ 1014 public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS"); 1015 /** 1016 * The "Hangul Syllables" Unicode Block. 1017 * @since 1.2 1018 */ 1019 public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES"); 1020 /** 1021 * The "High Surrogates" Unicode Block. 1022 * This block represents code point values in the high surrogate range 0xD800 to 0xDB7F @since 1.5 1023 */ 1024 public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES"); 1025 /** 1026 * The "High Private Use Surrogates" Unicode Block. 1027 * This block represents code point values in the high surrogate range 0xDB80 to 0xDBFF @since 1.5 1028 */ 1029 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES"); 1030 /** 1031 * The "Low Surrogates" Unicode Block. 1032 * This block represents code point values in the low surrogate range 0xDC00 to 0xDFFF @since 1.5 1033 */ 1034 public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES"); 1035 /** 1036 * The "Private Use Area" Unicode Block. 1037 * @since 1.2 1038 */ 1039 public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA"); 1040 /** 1041 * The "CJK Compatibility Ideographs" Unicode Block. 1042 * @since 1.2 1043 */ 1044 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS"); 1045 /** 1046 * The "Alphabetic Presentation Forms" Unicode Block. 1047 * @since 1.2 1048 */ 1049 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS"); 1050 /** 1051 * The "Arabic Presentation Forms-A" Unicode Block. 1052 * @since 1.2 1053 */ 1054 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A"); 1055 /** 1056 * The "Variation Selectors" Unicode Block. 1057 * @since 1.5 1058 */ 1059 public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS"); 1060 /** 1061 * The "Combining Half Marks" Unicode Block. 1062 * @since 1.2 1063 */ 1064 public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS"); 1065 /** 1066 * The "CJK Compatibility Forms" Unicode Block. 1067 * @since 1.2 1068 */ 1069 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS"); 1070 /** 1071 * The "Small Form Variants" Unicode Block. 1072 * @since 1.2 1073 */ 1074 public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS"); 1075 /** 1076 * The "Arabic Presentation Forms-B" Unicode Block. 1077 * @since 1.2 1078 */ 1079 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B"); 1080 /** 1081 * The "Halfwidth and Fullwidth Forms" Unicode Block. 1082 * @since 1.2 1083 */ 1084 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS"); 1085 /** 1086 * The "Specials" Unicode Block. 1087 * @since 1.2 1088 */ 1089 public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS"); 1090 /** 1091 * The "Linear B Syllabary" Unicode Block. 1092 * @since 1.2 1093 */ 1094 public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY"); 1095 /** 1096 * The "Linear B Ideograms" Unicode Block. 1097 * @since 1.5 1098 */ 1099 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS"); 1100 /** 1101 * The "Aegean Numbers" Unicode Block. 1102 * @since 1.5 1103 */ 1104 public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS"); 1105 /** 1106 * The "Old Italic" Unicode Block. 1107 * @since 1.5 1108 */ 1109 public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC"); 1110 /** 1111 * The "Gothic" Unicode Block. 1112 * @since 1.5 1113 */ 1114 public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC"); 1115 /** 1116 * The "Ugaritic" Unicode Block. 1117 * @since 1.5 1118 */ 1119 public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC"); 1120 /** 1121 * The "Deseret" Unicode Block. 1122 * @since 1.5 1123 */ 1124 public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET"); 1125 /** 1126 * The "Shavian" Unicode Block. 1127 * @since 1.5 1128 */ 1129 public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN"); 1130 /** 1131 * The "Osmanya" Unicode Block. 1132 * @since 1.5 1133 */ 1134 public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA"); 1135 /** 1136 * The "Cypriot Syllabary" Unicode Block. 1137 * @since 1.5 1138 */ 1139 public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY"); 1140 /** 1141 * The "Byzantine Musical Symbols" Unicode Block. 1142 * @since 1.5 1143 */ 1144 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS"); 1145 /** 1146 * The "Musical Symbols" Unicode Block. 1147 * @since 1.5 1148 */ 1149 public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS"); 1150 /** 1151 * The "Tai Xuan Jing Symbols" Unicode Block. 1152 * @since 1.5 1153 */ 1154 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS"); 1155 /** 1156 * The "Mathematical Alphanumeric Symbols" Unicode Block. 1157 * @since 1.5 1158 */ 1159 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS"); 1160 /** 1161 * The "CJK Unified Ideographs Extension B" Unicode Block. 1162 * @since 1.5 1163 */ 1164 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B"); 1165 /** 1166 * The "CJK Compatibility Ideographs Supplement" Unicode Block. 1167 * @since 1.5 1168 */ 1169 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT"); 1170 /** 1171 * The "Tags" Unicode Block. 1172 * @since 1.5 1173 */ 1174 public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS"); 1175 /** 1176 * The "Variation Selectors Supplement" Unicode Block. 1177 * @since 1.5 1178 */ 1179 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT"); 1180 /** 1181 * The "Supplementary Private Use Area-A" Unicode Block. 1182 * @since 1.5 1183 */ 1184 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A"); 1185 /** 1186 * The "Supplementary Private Use Area-B" Unicode Block. 1187 * @since 1.5 1188 */ 1189 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B"); 1190 1191 /* 1192 * All of the UnicodeBlocks with valid ranges in ascending order. 1193 */ 1194 private static UnicodeBlock[] BLOCKS; 1195 1196 // END android-changed 1197 1198 /** 1199 * <p> 1200 * Retrieves the constant that corresponds to the block name given. The 1201 * block names are defined by the Unicode 4.0.1 specification in the 1202 * <code>Blocks-4.0.1.txt</code> file. 1203 * </p> 1204 * <p> 1205 * Block names may be one of the following: 1206 * </p> 1207 * <ul> 1208 * <li>Canonical block name, as defined by the Unicode specification; 1209 * case-insensitive.</li> 1210 * <li>Canonical block name without any spaces, as defined by the 1211 * Unicode specification; case-insensitive.</li> 1212 * <li><code>UnicodeBlock</code> constant identifier. This is 1213 * determined by uppercasing the canonical name and replacing all spaces 1214 * and hyphens with underscores.</li> 1215 * </ul> 1216 * 1217 * @param blockName The name of the block to retrieve. 1218 * @return A UnicodeBlock constant. 1219 * @throws NullPointerException if <code>blockName</code> is 1220 * <code>null</code>. 1221 * @throws IllegalArgumentException if <code>blockName</code> is not a 1222 * valid block name. 1223 * @since 1.5 1224 */ 1225 public static UnicodeBlock forName(String blockName) { 1226 if (blockName == null) { 1227 throw new NullPointerException(); 1228 } 1229 // BEGIN android-changed 1230 if (BLOCKS == null) { 1231 BLOCKS = UCharacter.getBlockTable(); 1232 } 1233 int block = UCharacter.forName(blockName); 1234 if (block == -1) { 1235 if(blockName.equals("SURROGATES_AREA")) { 1236 return SURROGATES_AREA; 1237 } else if(blockName.equalsIgnoreCase("greek")) { 1238 return GREEK; 1239 } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") || 1240 blockName.equals("Combining Marks for Symbols") || 1241 blockName.equals("CombiningMarksforSymbols")) { 1242 return COMBINING_MARKS_FOR_SYMBOLS; 1243 } 1244 throw new IllegalArgumentException(); 1245 } 1246 return BLOCKS[block]; 1247 // END android-changed 1248 } 1249 1250 /** 1251 * <p> 1252 * Retrieves the constant that contains the given <code>char</code> or 1253 * <code>null</code> if there is none. 1254 * </p> 1255 * 1256 * @param c The character to retrieve a UnicodeBlock for. 1257 * @return A UnicodeBlock constant or <code>null</code>. 1258 */ 1259 public static UnicodeBlock of(char c) { 1260 return of((int) c); 1261 } 1262 1263 /** 1264 * <p> 1265 * Retrieves the constant that contains the given Unicode code point or 1266 * <code>null</code> if there is none. 1267 * </p> 1268 * 1269 * @param codePoint The Unicode code point to retrieve a UnicodeBlock 1270 * for. 1271 * @return A UnicodeBlock constant or <code>null</code>. 1272 * @throws IllegalArgumentException if <code>codePoint</code> is not a 1273 * valid Unicode code point. 1274 * @since 1.5 1275 */ 1276 public static UnicodeBlock of(int codePoint) { 1277 if (!isValidCodePoint(codePoint)) { 1278 throw new IllegalArgumentException(); 1279 } 1280 // BEGIN android-changed 1281 if (BLOCKS == null) { 1282 BLOCKS = UCharacter.getBlockTable(); 1283 } 1284 int block = UCharacter.of(codePoint); 1285 if(block == -1 || block >= BLOCKS.length) { 1286 return null; 1287 } 1288 return BLOCKS[block]; 1289 // END android-changed 1290 } 1291 1292 // BEGIN android-changed 1293 private UnicodeBlock(String blockName) { 1294 super(blockName); 1295 } 1296 // END android-changed 1297 } 1298 1299 /** 1300 * Constructs a new instance of the receiver which represents the char 1301 * valued argument. 1302 * 1303 * @param value 1304 * the char to store in the new instance. 1305 */ 1306 public Character(char value) { 1307 this.value = value; 1308 } 1309 1310 /** 1311 * Returns the char value which the receiver represents. 1312 * 1313 * @return char the value of the receiver 1314 */ 1315 public char charValue() { 1316 return value; 1317 } 1318 1319 /** 1320 * Compares the receiver to the specified Character to determine the 1321 * relative ordering. 1322 * 1323 * @param c 1324 * the Character 1325 * @return an int < 0 if this Character is less than the specified 1326 * Character, 0 if they are equal, and > 0 if this Character is 1327 * greater 1328 * @throws NullPointerException 1329 * if <code>c</code> is <code>null</code>. 1330 * @since 1.2 1331 */ 1332 public int compareTo(Character c) { 1333 return value - c.value; 1334 } 1335 1336 /** 1337 * <p> 1338 * Returns a <code>Character</code> instance for the <code>char</code> 1339 * value passed. This method is preferred over the constructor, as this 1340 * method may maintain a cache of instances. 1341 * </p> 1342 * 1343 * @param c The char value. 1344 * @return A <code>Character</code> instance. 1345 * @since 1.5 1346 */ 1347 public static Character valueOf(char c) { 1348 if (c >= CACHE_LEN ) { 1349 return new Character(c); 1350 } 1351 return valueOfCache.CACHE[c]; 1352 } 1353 1354 private static final int CACHE_LEN = 512; 1355 1356 static class valueOfCache { 1357 /* 1358 * Provides a cache for the 'valueOf' method. A size of 512 should cache the 1359 * first couple pages of Unicode, which includes the ASCII/Latin-1 1360 * characters, which other parts of this class are optimized for. 1361 */ 1362 private static final Character[] CACHE = new Character[CACHE_LEN ]; 1363 1364 static { 1365 for(int i=0; i<CACHE.length; i++){ 1366 CACHE[i] = new Character((char)i); 1367 } 1368 } 1369 } 1370 /** 1371 * <p> 1372 * A test for determining if the <code>codePoint</code> is a valid Unicode 1373 * code point. 1374 * </p> 1375 * 1376 * @param codePoint The code point to test. 1377 * @return A boolean value. 1378 * @since 1.5 1379 */ 1380 public static boolean isValidCodePoint(int codePoint) { 1381 return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1382 } 1383 1384 /** 1385 * <p> 1386 * A test for determining if the <code>codePoint</code> is within the 1387 * supplementary code point range. 1388 * </p> 1389 * 1390 * @param codePoint The code point to test. 1391 * @return A boolean value. 1392 * @since 1.5 1393 */ 1394 public static boolean isSupplementaryCodePoint(int codePoint) { 1395 return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1396 } 1397 1398 /** 1399 * <p> 1400 * A test for determining if the <code>char</code> is a high 1401 * surrogate/leading surrogate unit that's used for representing 1402 * supplementary characters in UTF-16 encoding. 1403 * </p> 1404 * 1405 * @param ch The <code>char</code> unit to test. 1406 * @return A boolean value. 1407 * @since 1.5 1408 * @see #isLowSurrogate(char) 1409 */ 1410 public static boolean isHighSurrogate(char ch) { 1411 return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch); 1412 } 1413 1414 /** 1415 * <p> 1416 * A test for determining if the <code>char</code> is a high 1417 * surrogate/leading surrogate unit that's used for representing 1418 * supplementary characters in UTF-16 encoding. 1419 * </p> 1420 * 1421 * @param ch The <code>char</code> unit to test. 1422 * @return A boolean value. 1423 * @since 1.5 1424 * @see #isHighSurrogate(char) 1425 */ 1426 public static boolean isLowSurrogate(char ch) { 1427 return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch); 1428 } 1429 1430 /** 1431 * <p> 1432 * A test for determining if the <code>char</code> pair is a valid 1433 * surrogate pair. 1434 * </p> 1435 * 1436 * @param high The high surrogate unit to test. 1437 * @param low The low surrogate unit to test. 1438 * @return A boolean value. 1439 * @since 1.5 1440 * @see #isHighSurrogate(char) 1441 * @see #isLowSurrogate(char) 1442 */ 1443 public static boolean isSurrogatePair(char high, char low) { 1444 return (isHighSurrogate(high) && isLowSurrogate(low)); 1445 } 1446 1447 /** 1448 * <p> 1449 * Calculates the number of <code>char</code> values required to represent 1450 * the Unicode code point. This method only tests if the 1451 * <code>codePoint</code> is greater than or equal to <code>0x10000</code>, 1452 * in which case <code>2</code> is returned, otherwise <code>1</code>. 1453 * To test if the code point is valid, use the 1454 * {@link #isValidCodePoint(int)} method. 1455 * </p> 1456 * 1457 * @param codePoint The code point to test. 1458 * @return An <code>int</code> value of 2 or 1. 1459 * @since 1.5 1460 * @see #isValidCodePoint(int) 1461 * @see #isSupplementaryCodePoint(int) 1462 */ 1463 public static int charCount(int codePoint) { 1464 return (codePoint >= 0x10000 ? 2 : 1); 1465 } 1466 1467 /** 1468 * <p> 1469 * Converts a surrogate pair into a Unicode code point. This method assume 1470 * that the pair are valid surrogates. If the pair are NOT valid surrogates, 1471 * then the result is indeterminate. The 1472 * {@link #isSurrogatePair(char, char)} method should be used prior to this 1473 * method to validate the pair. 1474 * </p> 1475 * 1476 * @param high The high surrogate unit. 1477 * @param low The low surrogate unit. 1478 * @return The decoded code point. 1479 * @since 1.5 1480 * @see #isSurrogatePair(char, char) 1481 */ 1482 public static int toCodePoint(char high, char low) { 1483 // See RFC 2781, Section 2.2 1484 // http://www.faqs.org/rfcs/rfc2781.html 1485 int h = (high & 0x3FF) << 10; 1486 int l = low & 0x3FF; 1487 return (h | l) + 0x10000; 1488 } 1489 1490 /** 1491 * <p> 1492 * Returns the code point at the index in the <code>CharSequence</code>. 1493 * If <code>char</code> unit at the index is a high-surrogate unit, the 1494 * next index is less than the length of the sequence and the 1495 * <code>char</code> unit at the next index is a low surrogate unit, then 1496 * the code point represented by the pair is returned; otherwise the 1497 * <code>char</code> unit at the index is returned. 1498 * </p> 1499 * 1500 * @param seq The sequence of <code>char</code> units. 1501 * @param index The index into the <code>seq</code> to retrieve and 1502 * convert. 1503 * @return The Unicode code point. 1504 * @throws NullPointerException if <code>seq</code> is <code>null</code>. 1505 * @throws IndexOutOfBoundsException if the <code>index</code> is negative 1506 * or greater than or equal to <code>seq.length()</code>. 1507 * @since 1.5 1508 */ 1509 public static int codePointAt(CharSequence seq, int index) { 1510 if (seq == null) { 1511 throw new NullPointerException(); 1512 } 1513 int len = seq.length(); 1514 if (index < 0 || index >= len) { 1515 throw new IndexOutOfBoundsException(); 1516 } 1517 1518 char high = seq.charAt(index++); 1519 if (index >= len) { 1520 return high; 1521 } 1522 char low = seq.charAt(index); 1523 if (isSurrogatePair(high, low)) { 1524 return toCodePoint(high, low); 1525 } 1526 return high; 1527 } 1528 1529 /** 1530 * <p> 1531 * Returns the code point at the index in the <code>char[]</code>. If 1532 * <code>char</code> unit at the index is a high-surrogate unit, the next 1533 * index is less than the length of the sequence and the <code>char</code> 1534 * unit at the next index is a low surrogate unit, then the code point 1535 * represented by the pair is returned; otherwise the <code>char</code> 1536 * unit at the index is returned. 1537 * </p> 1538 * 1539 * @param seq The sequence of <code>char</code> units. 1540 * @param index The index into the <code>seq</code> to retrieve and 1541 * convert. 1542 * @return The Unicode code point. 1543 * @throws NullPointerException if <code>seq</code> is <code>null</code>. 1544 * @throws IndexOutOfBoundsException if the <code>index</code> is negative 1545 * or greater than or equal to <code>seq.length()</code>. 1546 * @since 1.5 1547 */ 1548 public static int codePointAt(char[] seq, int index) { 1549 if (seq == null) { 1550 throw new NullPointerException(); 1551 } 1552 int len = seq.length; 1553 if (index < 0 || index >= len) { 1554 throw new IndexOutOfBoundsException(); 1555 } 1556 1557 char high = seq[index++]; 1558 if (index >= len) { 1559 return high; 1560 } 1561 char low = seq[index]; 1562 if (isSurrogatePair(high, low)) { 1563 return toCodePoint(high, low); 1564 } 1565 return high; 1566 } 1567 1568 /** 1569 * <p> 1570 * Returns the code point at the index in the <code>char[]</code> that's 1571 * within the limit. If <code>char</code> unit at the index is a 1572 * high-surrogate unit, the next index is less than the <code>limit</code> 1573 * and the <code>char</code> unit at the next index is a low surrogate 1574 * unit, then the code point represented by the pair is returned; otherwise 1575 * the <code>char</code> unit at the index is returned. 1576 * </p> 1577 * 1578 * @param seq The sequence of <code>char</code> units. 1579 * @param index The index into the <code>seq</code> to retrieve and 1580 * convert. 1581 * @param limit The exclusive index into the <code>seq</code> that marks 1582 * the end of the units that can be used. 1583 * @return The Unicode code point. 1584 * @throws NullPointerException if <code>seq</code> is <code>null</code>. 1585 * @throws IndexOutOfBoundsException if the <code>index</code> is 1586 * negative, greater than or equal to <code>limit</code>, 1587 * <code>limit</code> is negative or <code>limit</code> is 1588 * greater than the length of <code>seq</code>. 1589 * @since 1.5 1590 */ 1591 public static int codePointAt(char[] seq, int index, int limit) { 1592 if (index < 0 || index >= limit || limit < 0 || limit > seq.length) { 1593 throw new IndexOutOfBoundsException(); 1594 } 1595 1596 char high = seq[index++]; 1597 if (index >= limit) { 1598 return high; 1599 } 1600 char low = seq[index]; 1601 if (isSurrogatePair(high, low)) { 1602 return toCodePoint(high, low); 1603 } 1604 return high; 1605 } 1606 1607 /** 1608 * <p> 1609 * Returns the Unicode code point that proceeds the <code>index</code> in 1610 * the <code>CharSequence</code>. If the <code>char</code> unit at 1611 * <code>index - 1</code> is within the low surrogate range, the value 1612 * <code>index - 2</code> isn't negative and the <code>char</code> unit 1613 * at <code>index - 2</code> is within the high surrogate range, then the 1614 * supplementary code point made up of the surrogate pair is returned; 1615 * otherwise, the <code>char</code> value at <code>index - 1</code> is 1616 * returned. 1617 * </p> 1618 * 1619 * @param seq The <code>CharSequence</code> to search. 1620 * @param index The index into the <code>seq</code>. 1621 * @return A Unicode code point. 1622 * @throws NullPointerException if <code>seq</code> is <code>null</code>. 1623 * @throws IndexOutOfBoundsException if <code>index</code> is less than 1 1624 * or greater than <code>seq.length()</code>. 1625 * @since 1.5 1626 */ 1627 public static int codePointBefore(CharSequence seq, int index) { 1628 if (seq == null) { 1629 throw new NullPointerException(); 1630 } 1631 int len = seq.length(); 1632 if (index < 1 || index > len) { 1633 throw new IndexOutOfBoundsException(); 1634 } 1635 1636 char low = seq.charAt(--index); 1637 if (--index < 0) { 1638 return low; 1639 } 1640 char high = seq.charAt(index); 1641 if (isSurrogatePair(high, low)) { 1642 return toCodePoint(high, low); 1643 } 1644 return low; 1645 } 1646 1647 /** 1648 * <p> 1649 * Returns the Unicode code point that proceeds the <code>index</code> in 1650 * the <code>char[]</code>. If the <code>char</code> unit at 1651 * <code>index - 1</code> is within the low surrogate range, the value 1652 * <code>index - 2</code> isn't negative and the <code>char</code> unit 1653 * at <code>index - 2</code> is within the high surrogate range, then the 1654 * supplementary code point made up of the surrogate pair is returned; 1655 * otherwise, the <code>char</code> value at <code>index - 1</code> is 1656 * returned. 1657 * </p> 1658 * 1659 * @param seq The <code>char[]</code> to search. 1660 * @param index The index into the <code>seq</code>. 1661 * @return A Unicode code point. 1662 * @throws NullPointerException if <code>seq</code> is <code>null</code>. 1663 * @throws IndexOutOfBoundsException if <code>index</code> is less than 1 1664 * or greater than <code>seq.length</code>. 1665 * @since 1.5 1666 */ 1667 public static int codePointBefore(char[] seq, int index) { 1668 if (seq == null) { 1669 throw new NullPointerException(); 1670 } 1671 int len = seq.length; 1672 if (index < 1 || index > len) { 1673 throw new IndexOutOfBoundsException(); 1674 } 1675 1676 char low = seq[--index]; 1677 if (--index < 0) { 1678 return low; 1679 } 1680 char high = seq[index]; 1681 if (isSurrogatePair(high, low)) { 1682 return toCodePoint(high, low); 1683 } 1684 return low; 1685 } 1686 1687 /** 1688 * <p> 1689 * Returns the Unicode code point that proceeds the <code>index</code> in 1690 * the <code>char[]</code> and isn't less than <code>start</code>. If 1691 * the <code>char</code> unit at <code>index - 1</code> is within the 1692 * low surrogate range, the value <code>index - 2</code> isn't less than 1693 * <code>start</code> and the <code>char</code> unit at 1694 * <code>index - 2</code> is within the high surrogate range, then the 1695 * supplementary code point made up of the surrogate pair is returned; 1696 * otherwise, the <code>char</code> value at <code>index - 1</code> is 1697 * returned. 1698 * </p> 1699 * 1700 * @param seq The <code>char[]</code> to search. 1701 * @param index The index into the <code>seq</code>. 1702 * @return A Unicode code point. 1703 * @throws NullPointerException if <code>seq</code> is <code>null</code>. 1704 * @throws IndexOutOfBoundsException if <code>index</code> is less than or 1705 * equal to <code>start</code>, <code>index</code> is greater 1706 * than <code>seq.length</code>, <code>start</code> is not 1707 * negative and <code>start</code> is greater than 1708 * <code>seq.length</code>. 1709 * @since 1.5 1710 */ 1711 public static int codePointBefore(char[] seq, int index, int start) { 1712 if (seq == null) { 1713 throw new NullPointerException(); 1714 } 1715 int len = seq.length; 1716 if (index <= start || index > len || start < 0 || start >= len) { 1717 throw new IndexOutOfBoundsException(); 1718 } 1719 1720 char low = seq[--index]; 1721 if (--index < start) { 1722 return low; 1723 } 1724 char high = seq[index]; 1725 if (isSurrogatePair(high, low)) { 1726 return toCodePoint(high, low); 1727 } 1728 return low; 1729 } 1730 1731 /** 1732 * <p> 1733 * Converts the Unicode code point, <code>codePoint</code>, into a UTF-16 1734 * encoded sequence and copies the value(s) into the 1735 * <code>char[]</code> <code>dst</code>, starting at the index 1736 * <code>dstIndex</code>. 1737 * </p> 1738 * 1739 * @param codePoint The Unicode code point to encode. 1740 * @param dst The <code>char[]</code> to copy the encoded value into. 1741 * @param dstIndex The index to start copying into <code>dst</code>. 1742 * @return The number of <code>char</code> value units copied into 1743 * <code>dst</code>. 1744 * @throws IllegalArgumentException if <code>codePoint</code> is not a 1745 * valid Unicode code point. 1746 * @throws NullPointerException if <code>dst</code> is <code>null</code>. 1747 * @throws IndexOutOfBoundsException if <code>dstIndex</code> is negative, 1748 * greater than or equal to <code>dst.length</code> or equals 1749 * <code>dst.length - 1</code> when <code>codePoint</code> is a 1750 * {@link #isSupplementaryCodePoint(int) supplementary code point}. 1751 * @since 1.5 1752 */ 1753 public static int toChars(int codePoint, char[] dst, int dstIndex) { 1754 if (!isValidCodePoint(codePoint)) { 1755 throw new IllegalArgumentException(); 1756 } 1757 if (dst == null) { 1758 throw new NullPointerException(); 1759 } 1760 if (dstIndex < 0 || dstIndex >= dst.length) { 1761 throw new IndexOutOfBoundsException(); 1762 } 1763 1764 if (isSupplementaryCodePoint(codePoint)) { 1765 if (dstIndex == dst.length - 1) { 1766 throw new IndexOutOfBoundsException(); 1767 } 1768 // See RFC 2781, Section 2.1 1769 // http://www.faqs.org/rfcs/rfc2781.html 1770 int cpPrime = codePoint - 0x10000; 1771 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 1772 int low = 0xDC00 | (cpPrime & 0x3FF); 1773 dst[dstIndex] = (char) high; 1774 dst[dstIndex + 1] = (char) low; 1775 return 2; 1776 } 1777 1778 dst[dstIndex] = (char) codePoint; 1779 return 1; 1780 } 1781 1782 /** 1783 * <p> 1784 * Converts the Unicode code point, <code>codePoint</code>, into a UTF-16 1785 * encoded sequence that is returned as a <code>char[]</code>. 1786 * </p> 1787 * 1788 * @param codePoint The Unicode code point to encode. 1789 * @return The UTF-16 encoded <code>char</code> sequence; if code point is 1790 * a {@link #isSupplementaryCodePoint(int) supplementary code point}, 1791 * then a 2 <code>char</code> array is returned, otherwise a 1 1792 * <code>char</code> array is returned. 1793 * @throws IllegalArgumentException if <code>codePoint</code> is not a 1794 * valid Unicode code point. 1795 * @since 1.5 1796 */ 1797 public static char[] toChars(int codePoint) { 1798 if (!isValidCodePoint(codePoint)) { 1799 throw new IllegalArgumentException(); 1800 } 1801 1802 if (isSupplementaryCodePoint(codePoint)) { 1803 int cpPrime = codePoint - 0x10000; 1804 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 1805 int low = 0xDC00 | (cpPrime & 0x3FF); 1806 return new char[] { (char) high, (char) low }; 1807 } 1808 return new char[] { (char) codePoint }; 1809 } 1810 1811 /** 1812 * <p> 1813 * Counts the number of Unicode code points in the subsequence of the 1814 * <code>CharSequence</code>, as delineated by the 1815 * <code>beginIndex</code> and <code>endIndex</code>. Any surrogate 1816 * values with missing pair values will be counted as 1 code point. 1817 * </p> 1818 * 1819 * @param seq The <code>CharSequence</code> to look through. 1820 * @param beginIndex The inclusive index to begin counting at. 1821 * @param endIndex The exclusive index to stop counting at. 1822 * @return The number of Unicode code points. 1823 * @throws NullPointerException if <code>seq</code> is <code>null</code>. 1824 * @throws IndexOutOfBoundsException if <code>beginIndex</code> is 1825 * negative, greater than <code>seq.length()</code> or greater 1826 * than <code>endIndex</code>. 1827 * @since 1.5 1828 */ 1829 public static int codePointCount(CharSequence seq, int beginIndex, 1830 int endIndex) { 1831 if (seq == null) { 1832 throw new NullPointerException(); 1833 } 1834 int len = seq.length(); 1835 if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) { 1836 throw new IndexOutOfBoundsException(); 1837 } 1838 1839 int result = 0; 1840 for (int i = beginIndex; i < endIndex; i++) { 1841 char c = seq.charAt(i); 1842 if (isHighSurrogate(c)) { 1843 if (++i < endIndex) { 1844 c = seq.charAt(i); 1845 if (!isLowSurrogate(c)) { 1846 result++; 1847 } 1848 } 1849 } 1850 result++; 1851 } 1852 return result; 1853 } 1854 1855 /** 1856 * <p> 1857 * Counts the number of Unicode code points in the subsequence of the 1858 * <code>char[]</code>, as delineated by the <code>offset</code> and 1859 * <code>count</code>. Any surrogate values with missing pair values will 1860 * be counted as 1 code point. 1861 * </p> 1862 * 1863 * @param seq The <code>char[]</code> to look through. 1864 * @param offset The inclusive index to begin counting at. 1865 * @param count The number of <code>char</code> values to look through in 1866 * <code>seq</code>. 1867 * @return The number of Unicode code points. 1868 * @throws NullPointerException if <code>seq</code> is <code>null</code>. 1869 * @throws IndexOutOfBoundsException if <code>offset</code> or 1870 * <code>count</code> is negative or if <code>endIndex</code> is 1871 * greater than <code>seq.length</code>. 1872 * @since 1.5 1873 */ 1874 public static int codePointCount(char[] seq, int offset, int count) { 1875 if (seq == null) { 1876 throw new NullPointerException(); 1877 } 1878 int len = seq.length; 1879 int endIndex = offset + count; 1880 if (offset < 0 || count < 0 || endIndex > len) { 1881 throw new IndexOutOfBoundsException(); 1882 } 1883 1884 int result = 0; 1885 for (int i = offset; i < endIndex; i++) { 1886 char c = seq[i]; 1887 if (isHighSurrogate(c)) { 1888 if (++i < endIndex) { 1889 c = seq[i]; 1890 if (!isLowSurrogate(c)) { 1891 result++; 1892 } 1893 } 1894 } 1895 result++; 1896 } 1897 return result; 1898 } 1899 1900 /** 1901 * <p> 1902 * Determines the index into the <code>CharSequence</code> that is offset 1903 * (measured in code points and specified by <code>codePointOffset</code>), 1904 * from the <code>index</code> argument. 1905 * </p> 1906 * 1907 * @param seq The <code>CharSequence</code> to find the index within. 1908 * @param index The index to begin from, within the 1909 * <code>CharSequence</code>. 1910 * @param codePointOffset The number of code points to look back or 1911 * forwards; may be a negative or positive value. 1912 * @return The calculated index that is <code>codePointOffset</code> code 1913 * points from <code>index</code>. 1914 * @throws NullPointerException if <code>seq</code> is <code>null</code>. 1915 * @throws IndexOutOfBoundsException if <code>index</code> is negative, 1916 * greater than <code>seq.length()</code>, there aren't enough 1917 * values in <code>seq</code> after <code>index</code> or before 1918 * <code>index</code> if <code>codePointOffset</code> is 1919 * negative. 1920 * @since 1.5 1921 */ 1922 public static int offsetByCodePoints(CharSequence seq, int index, 1923 int codePointOffset) { 1924 if (seq == null) { 1925 throw new NullPointerException(); 1926 } 1927 int len = seq.length(); 1928 if (index < 0 || index > len) { 1929 throw new IndexOutOfBoundsException(); 1930 } 1931 1932 if (codePointOffset == 0) { 1933 return index; 1934 } 1935 1936 if (codePointOffset > 0) { 1937 int codePoints = codePointOffset; 1938 int i = index; 1939 while (codePoints > 0) { 1940 codePoints--; 1941 if (i >= len) { 1942 throw new IndexOutOfBoundsException(); 1943 } 1944 if (isHighSurrogate(seq.charAt(i))) { 1945 int next = i + 1; 1946 if (next < len && isLowSurrogate(seq.charAt(next))) { 1947 i++; 1948 } 1949 } 1950 i++; 1951 } 1952 return i; 1953 } 1954 1955 assert codePointOffset < 0; 1956 int codePoints = -codePointOffset; 1957 int i = index; 1958 while (codePoints > 0) { 1959 codePoints--; 1960 i--; 1961 if (i < 0) { 1962 throw new IndexOutOfBoundsException(); 1963 } 1964 if (isLowSurrogate(seq.charAt(i))) { 1965 int prev = i - 1; 1966 if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) { 1967 i--; 1968 } 1969 } 1970 } 1971 return i; 1972 } 1973 1974 /** 1975 * <p> 1976 * Determines the index into the <code>char[]</code> that is offset 1977 * (measured in code points and specified by <code>codePointOffset</code>), 1978 * from the <code>index</code> argument and is within the subsequence as 1979 * delineated by <code>start</code> and <code>count</code>. 1980 * </p> 1981 * 1982 * @param seq The <code>char[]</code> to find the index within. 1983 * 1984 * @param index The index to begin from, within the <code>char[]</code>. 1985 * @param codePointOffset The number of code points to look back or 1986 * forwards; may be a negative or positive value. 1987 * @param start The inclusive index that marks the beginning of the 1988 * subsequence. 1989 * @param count The number of <code>char</code> values to include within 1990 * the subsequence. 1991 * @return The calculated index that is <code>codePointOffset</code> code 1992 * points from <code>index</code>. 1993 * @throws NullPointerException if <code>seq</code> is <code>null</code>. 1994 * @throws IndexOutOfBoundsException if <code>start</code> or 1995 * <code>count</code> is negative, <code>start + count</code> 1996 * greater than <code>seq.length</code>, <code>index</code> is 1997 * less than <code>start</code>, <code>index</code> is greater 1998 * than <code>start + count</code> or there aren't enough values 1999 * in <code>seq</code> after <code>index</code> or before 2000 * <code>index</code> if <code>codePointOffset</code> is 2001 * negative. 2002 * @since 1.5 2003 */ 2004 public static int offsetByCodePoints(char[] seq, int start, int count, 2005 int index, int codePointOffset) { 2006 if (seq == null) { 2007 throw new NullPointerException(); 2008 } 2009 int end = start + count; 2010 if (start < 0 || count < 0 || end > seq.length || index < start 2011 || index > end) { 2012 throw new IndexOutOfBoundsException(); 2013 } 2014 2015 if (codePointOffset == 0) { 2016 return index; 2017 } 2018 2019 if (codePointOffset > 0) { 2020 int codePoints = codePointOffset; 2021 int i = index; 2022 while (codePoints > 0) { 2023 codePoints--; 2024 if (i >= end) { 2025 throw new IndexOutOfBoundsException(); 2026 } 2027 if (isHighSurrogate(seq[i])) { 2028 int next = i + 1; 2029 if (next < end && isLowSurrogate(seq[next])) { 2030 i++; 2031 } 2032 } 2033 i++; 2034 } 2035 return i; 2036 } 2037 2038 assert codePointOffset < 0; 2039 int codePoints = -codePointOffset; 2040 int i = index; 2041 while (codePoints > 0) { 2042 codePoints--; 2043 i--; 2044 if (i < start) { 2045 throw new IndexOutOfBoundsException(); 2046 } 2047 if (isLowSurrogate(seq[i])) { 2048 int prev = i - 1; 2049 if (prev >= start && isHighSurrogate(seq[prev])) { 2050 i--; 2051 } 2052 } 2053 } 2054 return i; 2055 } 2056 2057 /** 2058 * Convenient method to determine the value of character <code>c</code> in 2059 * the supplied radix. The value of <code>radix</code> must be between 2060 * MIN_RADIX and MAX_RADIX. 2061 * 2062 * @param c 2063 * the character 2064 * @param radix 2065 * the radix 2066 * @return if <code>radix</code> lies between {@link #MIN_RADIX} and 2067 * {@link #MAX_RADIX} then the value of the character in the radix, 2068 * otherwise -1. 2069 */ 2070 public static int digit(char c, int radix) { 2071 // BEGIN android-changed 2072 // if (radix >= MIN_RADIX && radix <= MAX_RADIX) { 2073 // if (c < 128) { 2074 // // Optimized for ASCII 2075 // int result = -1; 2076 // if ('0' <= c && c <= '9') { 2077 // result = c - '0'; 2078 // } else if ('a' <= c && c <= 'z') { 2079 // result = c - ('a' - 10); 2080 // } else if ('A' <= c && c <= 'Z') { 2081 // result = c - ('A' - 10); 2082 // } 2083 // return result < radix ? result : -1; 2084 // } 2085 // int result = BinarySearch.binarySearchRange(digitKeys, c); 2086 // if (result >= 0 && c <= digitValues[result * 2]) { 2087 // int value = (char) (c - digitValues[result * 2 + 1]); 2088 // if (value >= radix) { 2089 // return -1; 2090 // } 2091 // return value; 2092 // } 2093 // } 2094 // return -1; 2095 return UCharacter.digit(c, radix); 2096 // ENd android-changed 2097 } 2098 2099 /** 2100 * Convenient method to determine the value of character 2101 * <code>codePoint</code> in the supplied radix. The value of 2102 * <code>radix</code> must be between MIN_RADIX and MAX_RADIX. 2103 * 2104 * @param codePoint 2105 * the character, including supplementary characters 2106 * @param radix 2107 * the radix 2108 * @return if <code>radix</code> lies between {@link #MIN_RADIX} and 2109 * {@link #MAX_RADIX} then the value of the character in the radix, 2110 * otherwise -1. 2111 */ 2112 public static int digit(int codePoint, int radix) { 2113 return UCharacter.digit(codePoint, radix); 2114 } 2115 2116 /** 2117 * Compares the argument to the receiver, and returns true if they represent 2118 * the <em>same</em> object using a class specific comparison. 2119 * <p> 2120 * In this case, the argument must also be a Character, and the receiver and 2121 * argument must represent the same char value. 2122 * 2123 * @param object 2124 * the object to compare with this object 2125 * @return <code>true</code> if the object is the same as this object 2126 * <code>false</code> if it is different from this object 2127 * 2128 * @see #hashCode 2129 */ 2130 @Override 2131 public boolean equals(Object object) { 2132 return (object instanceof Character) 2133 && (value == ((Character) object).value); 2134 } 2135 2136 /** 2137 * Returns the character which represents the value in the specified radix. 2138 * The radix must be between MIN_RADIX and MAX_RADIX inclusive. 2139 * 2140 * @param digit 2141 * the integer value 2142 * @param radix 2143 * the radix 2144 * @return the character which represents the value in the radix 2145 */ 2146 public static char forDigit(int digit, int radix) { 2147 if (MIN_RADIX <= radix && radix <= MAX_RADIX) { 2148 if (0 <= digit && digit < radix) { 2149 return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10); 2150 } 2151 } 2152 return 0; 2153 } 2154 2155 /** 2156 * Gets the numeric value of the Unicode character. 2157 * 2158 * @param c 2159 * the character 2160 * @return a numeric int value >= 0, -1 if there is no numeric value, -2 if 2161 * the numeric value is not an int >= 0 2162 */ 2163 public static int getNumericValue(char c) { 2164 // BEGIN android-changed 2165 // if (c < 128) { 2166 // // Optimized for ASCII 2167 // if (c >= '0' && c <= '9') { 2168 // return c - '0'; 2169 // } 2170 // if (c >= 'a' && c <= 'z') { 2171 // return c - ('a' - 10); 2172 // } 2173 // if (c >= 'A' && c <= 'Z') { 2174 // return c - ('A' - 10); 2175 // } 2176 // return -1; 2177 // } 2178 // int result = BinarySearch.binarySearchRange(numericKeys, c); 2179 // if (result >= 0 && c <= numericValues[result * 2]) { 2180 // char difference = numericValues[result * 2 + 1]; 2181 // if (difference == 0) { 2182 // return -2; 2183 // } 2184 // // Value is always positive, must be negative value 2185 // if (difference > c) { 2186 // return c - (short) difference; 2187 // } 2188 // return c - difference; 2189 // } 2190 // return -1; 2191 return UCharacter.getNumericValue(c); 2192 // END android-changed 2193 } 2194 2195 /** 2196 * Gets the numeric value of the Unicode character. 2197 * 2198 * @param codePoint 2199 * the character, including supplementary characters 2200 * @return a numeric int value which is not negative, -1 if there is no numeric value, -2 if 2201 * the numeric value is negative 2202 */ 2203 public static int getNumericValue(int codePoint) { 2204 return UCharacter.getNumericValue(codePoint); 2205 } 2206 2207 /** 2208 * Gets the general Unicode category of the specified character. 2209 * 2210 * @param c 2211 * the character 2212 * @return the Unicode category 2213 */ 2214 public static int getType(char c) { 2215 // BEGIN android-changed 2216 // int result = BinarySearch.binarySearchRange(typeKeys, c); 2217 // int high = typeValues[result * 2]; 2218 // if (c <= high) { 2219 // int code = typeValues[result * 2 + 1]; 2220 // if (code < 0x100) { 2221 // return code; 2222 // } 2223 // return (c & 1) == 1 ? code >> 8 : code & 0xff; 2224 // } 2225 // return UNASSIGNED; 2226 return getType((int)c); 2227 // END android-changed 2228 } 2229 2230 /** 2231 * Gets the general Unicode category of the specified character. 2232 * 2233 * @param codePoint 2234 * the character, including supplementary characters 2235 * @return the Unicode category 2236 */ 2237 public static int getType(int codePoint) { 2238 int type = UCharacter.getType(codePoint); 2239 2240 // the type values returned by UCharacter are not compatible with what 2241 // the spec says. RI's Character type values skip the value 17. 2242 if (type <= Character.FORMAT) { 2243 return type; 2244 } 2245 return (type + 1); 2246 } 2247 2248 /** 2249 * Gets the Unicode directionality of the specified character. 2250 * 2251 * @param c 2252 * the character 2253 * @return the Unicode directionality 2254 */ 2255 public static byte getDirectionality(char c) { 2256 // BEGIN android-changed 2257 // int result = BinarySearch.binarySearchRange(bidiKeys, c); 2258 // int high = bidiValues[result * 2]; 2259 // if (c <= high) { 2260 // int code = bidiValues[result * 2 + 1]; 2261 // if (code < 0x100) { 2262 // return (byte) (code - 1); 2263 // } 2264 // return (byte) (((c & 1) == 1 ? code >> 8 : code & 0xff) - 1); 2265 // } 2266 // return DIRECTIONALITY_UNDEFINED; 2267 return getDirectionality((int)c); 2268 // END android-changed 2269 } 2270 2271 /** 2272 * Gets the Unicode directionality of the specified character. 2273 * 2274 * @param codePoint 2275 * the character, including supplementary characters 2276 * @return the Unicode directionality 2277 */ 2278 public static byte getDirectionality(int codePoint) { 2279 if (getType(codePoint) == Character.UNASSIGNED) { 2280 return Character.DIRECTIONALITY_UNDEFINED; 2281 } 2282 2283 byte UCDirectionality = UCharacter.getDirectionality(codePoint); 2284 if (UCDirectionality == -1) { 2285 return -1; 2286 } 2287 return DIRECTIONALITY[UCDirectionality]; 2288 } 2289 2290 /** 2291 * Returns whether the specified character is mirrored 2292 * 2293 * @param c 2294 * the character 2295 * @return true if the character is mirrored, false otherwise 2296 */ 2297 public static boolean isMirrored(char c) { 2298 // BEGIN android-changed 2299 // int value = c / 16; 2300 // if (value >= mirrored.length) { 2301 // return false; 2302 // } 2303 // int bit = 1 << (c % 16); 2304 // return (mirrored[value] & bit) != 0; 2305 return isMirrored((int)c); 2306 // ENd android-changed 2307 } 2308 2309 /** 2310 * Returns whether the specified character is mirrored 2311 * 2312 * @param codePoint 2313 * the character, including supplementary characters 2314 * @return true if the character is mirrored, false otherwise 2315 */ 2316 public static boolean isMirrored(int codePoint) { 2317 return UCharacter.isMirrored(codePoint); 2318 } 2319 2320 /** 2321 * Returns an integer hash code for the receiver. Any two objects which 2322 * answer <code>true</code> when passed to <code>equals</code> must 2323 * answer the same value for this method. 2324 * 2325 * @return the receiver's hash 2326 * 2327 * @see #equals 2328 */ 2329 @Override 2330 public int hashCode() { 2331 return value; 2332 } 2333 2334 /** 2335 * Returns whether the specified character is defined in the Unicode 2336 * specification. 2337 * 2338 * @param c 2339 * the character 2340 * @return true if the general Unicode category of the character is not 2341 * UNASSIGNED, false otherwise 2342 */ 2343 public static boolean isDefined(char c) { 2344 // BEGIN android-changed 2345 // return getType(c) != UNASSIGNED; 2346 return UCharacter.isDefined(c); 2347 // END android-changed 2348 } 2349 2350 /** 2351 * Returns whether the specified character is defined in the Unicode 2352 * specification. 2353 * 2354 * @param codePoint 2355 * the character, including supplementary characters 2356 * @return true if the general Unicode category of the character is not 2357 * UNASSIGNED, false otherwise 2358 */ 2359 public static boolean isDefined(int codePoint) { 2360 return UCharacter.isDefined(codePoint); 2361 } 2362 2363 /** 2364 * Returns whether the character is a digit. 2365 * 2366 * @param c 2367 * the character 2368 * @return true when the character is a digit, false otherwise 2369 */ 2370 public static boolean isDigit(char c) { 2371 // BEGIN android-changed 2372 // // Optimized case for ASCII 2373 // if ('0' <= c && c <= '9') { 2374 // return true; 2375 // } 2376 // if (c < 1632) { 2377 // return false; 2378 // } 2379 // return getType(c) == DECIMAL_DIGIT_NUMBER; 2380 return UCharacter.isDigit(c); 2381 // END android-changed 2382 } 2383 2384 /** 2385 * Returns whether the character is a digit. 2386 * 2387 * @param codePoint 2388 * the character, including supplementary characters 2389 * @return true when the character is a digit, false otherwise 2390 */ 2391 public static boolean isDigit(int codePoint) { 2392 return UCharacter.isDigit(codePoint); 2393 } 2394 2395 /** 2396 * Returns whether the specified character is ignorable in a Java or Unicode 2397 * identifier. 2398 * 2399 * @param c 2400 * the character 2401 * @return true when the character is ignorable, false otherwise 2402 */ 2403 public static boolean isIdentifierIgnorable(char c) { 2404 // BEGIN android-changed 2405 // return (c >= 0 && c <= 8) || (c >= 0xe && c <= 0x1b) 2406 // || (c >= 0x7f && c <= 0x9f) || getType(c) == FORMAT; 2407 return UCharacter.isIdentifierIgnorable(c); 2408 // END android-changed 2409 } 2410 2411 /** 2412 * Returns whether the specified character is ignorable in a Java or Unicode 2413 * identifier. 2414 * 2415 * @param codePoint 2416 * the character, including supplementary characters 2417 * @return true when the character is ignorable, false otherwise 2418 */ 2419 public static boolean isIdentifierIgnorable(int codePoint) { 2420 return UCharacter.isIdentifierIgnorable(codePoint); 2421 } 2422 2423 /** 2424 * Returns whether the character is an ISO control character. 2425 * 2426 * @param c 2427 * the character 2428 * @return <code>true</code> if <code>c</code> is an ISO control 2429 * character, otherwise <code>false</code> 2430 */ 2431 public static boolean isISOControl(char c) { 2432 return isISOControl((int)c); 2433 } 2434 2435 /** 2436 * Returns whether the character is an ISO control character. 2437 * 2438 * @param c 2439 * the character, including supplementary characters 2440 * @return <code>true</code> if <code>c</code> is an ISO control 2441 * character, otherwise <code>false</code> 2442 */ 2443 public static boolean isISOControl(int c) { 2444 return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f); 2445 } 2446 2447 /** 2448 * Returns whether the character is a valid part of a Unicode identifier as 2449 * other than the first character. 2450 * 2451 * @param c 2452 * the character 2453 * @return true when the character is valid as part of a Java identifier, 2454 * false otherwise 2455 */ 2456 public static boolean isJavaIdentifierPart(char c) { 2457 // Optimized case for ASCII 2458 if (c < 128) { 2459 return (typeTags[c] & ISJAVAPART) != 0; 2460 } 2461 2462 int type = getType(c); 2463 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2464 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 2465 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2466 || type == NON_SPACING_MARK || type == COMBINING_SPACING_MARK 2467 || (c >= 0x80 && c <= 0x9f) || type == FORMAT; 2468 } 2469 2470 /** 2471 * Returns whether the character is a valid part of a Unicode identifier as 2472 * other than the first character. 2473 * 2474 * @param codePoint 2475 * the character, including supplementary characters 2476 * @return true when the character is valid as part of a Java identifier, 2477 * false otherwise 2478 */ 2479 public static boolean isJavaIdentifierPart(int codePoint) { 2480 int type = getType(codePoint); 2481 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2482 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 2483 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2484 || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK 2485 || isIdentifierIgnorable(codePoint); 2486 } 2487 2488 /** 2489 * Returns whether the character is a valid start of a Unicode identifier 2490 * 2491 * @param c 2492 * the character 2493 * @return true when the character is a valid start of a Java identifier, 2494 * false otherwise 2495 */ 2496 public static boolean isJavaIdentifierStart(char c) { 2497 // Optimized case for ASCII 2498 if (c < 128) { 2499 return (typeTags[c] & ISJAVASTART) != 0; 2500 } 2501 2502 int type = getType(c); 2503 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2504 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 2505 || type == LETTER_NUMBER; 2506 } 2507 2508 /** 2509 * Returns whether the character is a valid start of a Unicode identifier 2510 * 2511 * @param codePoint 2512 * the character, including supplementary characters 2513 * @return true when the character is a valid start of a Java identifier, 2514 * false otherwise 2515 */ 2516 public static boolean isJavaIdentifierStart(int codePoint) { 2517 int type = getType(codePoint); 2518 return isLetter(codePoint) || type == CURRENCY_SYMBOL 2519 || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER; 2520 } 2521 2522 /** 2523 * Returns whether the character is a Java letter. 2524 * 2525 * @deprecated Use {@link #isJavaIdentifierStart(char)} 2526 */ 2527 @Deprecated 2528 public static boolean isJavaLetter(char c) { 2529 return isJavaIdentifierStart(c); 2530 } 2531 2532 /** 2533 * Returns whether the character is a Java letter or digit character. 2534 * 2535 * @deprecated Use {@link #isJavaIdentifierPart(char)} 2536 */ 2537 @Deprecated 2538 public static boolean isJavaLetterOrDigit(char c) { 2539 return isJavaIdentifierPart(c); 2540 } 2541 2542 /** 2543 * Returns whether the character is a letter. 2544 * 2545 * @param c 2546 * the character 2547 * @return true when the character is a letter, false otherwise 2548 */ 2549 public static boolean isLetter(char c) { 2550 // BEGIN android-changed 2551 // if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) { 2552 // return true; 2553 // } 2554 // if (c < 128) { 2555 // return false; 2556 // } 2557 // int type = getType(c); 2558 // return type >= UPPERCASE_LETTER && type <= OTHER_LETTER; 2559 return UCharacter.isLetter(c); 2560 // END android-changed 2561 } 2562 2563 /** 2564 * Returns whether the character is a letter. 2565 * 2566 * @param codePoint 2567 * the character, including supplementary characters 2568 * @return true when the character is a letter, false otherwise 2569 */ 2570 public static boolean isLetter(int codePoint) { 2571 return UCharacter.isLetter(codePoint); 2572 } 2573 2574 /** 2575 * Returns whether the character is a letter or a digit. 2576 * 2577 * @param c 2578 * the character 2579 * @return true when the character is a letter or a digit, false otherwise 2580 */ 2581 public static boolean isLetterOrDigit(char c) { 2582 // BEGIN android-changed 2583 // int type = getType(c); 2584 // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2585 // || type == DECIMAL_DIGIT_NUMBER; 2586 return UCharacter.isLetterOrDigit(c); 2587 // END andorid-changed 2588 } 2589 2590 /** 2591 * Returns whether the character is a letter or a digit. 2592 * 2593 * @param codePoint 2594 * the character, including supplementary characters 2595 * @return true when the character is a letter or a digit, false otherwise 2596 */ 2597 public static boolean isLetterOrDigit(int codePoint) { 2598 return UCharacter.isLetterOrDigit(codePoint); 2599 } 2600 2601 /** 2602 * Returns whether the character is a lower case letter. 2603 * 2604 * @param c 2605 * the character 2606 * @return true when the character is a lower case letter, false otherwise 2607 */ 2608 public static boolean isLowerCase(char c) { 2609 // BEGIN android-changed 2610 // // Optimized case for ASCII 2611 // if ('a' <= c && c <= 'z') { 2612 // return true; 2613 // } 2614 // if (c < 128) { 2615 // return false; 2616 // } 2617 // 2618 // return getType(c) == LOWERCASE_LETTER; 2619 return UCharacter.isLowerCase(c); 2620 // END android-changed 2621 } 2622 2623 /** 2624 * Returns whether the character is a lower case letter. 2625 * 2626 * @param codePoint 2627 * the character, including supplementary characters 2628 * @return true when the character is a lower case letter, false otherwise 2629 */ 2630 public static boolean isLowerCase(int codePoint) { 2631 return UCharacter.isLowerCase(codePoint); 2632 } 2633 2634 /** 2635 * Returns whether the character is a Java space. 2636 * 2637 * @deprecated Use {@link #isWhitespace(char)} 2638 */ 2639 @Deprecated 2640 public static boolean isSpace(char c) { 2641 return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' '; 2642 } 2643 2644 /** 2645 * Returns whether the character is a Unicode space character. A member of 2646 * one of the Unicode categories Space Separator, Line Separator, or 2647 * Paragraph Separator. 2648 * 2649 * @param c 2650 * the character 2651 * @return true when the character is a Unicode space character, false 2652 * otherwise 2653 */ 2654 public static boolean isSpaceChar(char c) { 2655 // BEGIN android-changed 2656 // if (c == 0x20 || c == 0xa0 || c == 0x1680) { 2657 // return true; 2658 // } 2659 // if (c < 0x2000) { 2660 // return false; 2661 // } 2662 // return c <= 0x200b || c == 0x2028 || c == 0x2029 || c == 0x202f 2663 // || c == 0x3000; 2664 return UCharacter.isSpaceChar(c); 2665 // END android-changed 2666 } 2667 2668 /** 2669 * Returns whether the character is a Unicode space character. A member of 2670 * one of the Unicode categories Space Separator, Line Separator, or 2671 * Paragraph Separator. 2672 * 2673 * @param codePoint 2674 * the character, including supplementary characters 2675 * @return true when the character is a Unicode space character, false 2676 * otherwise 2677 */ 2678 public static boolean isSpaceChar(int codePoint) { 2679 return UCharacter.isSpaceChar(codePoint); 2680 } 2681 2682 /** 2683 * Returns whether the character is a titlecase character. 2684 * 2685 * @param c 2686 * the character 2687 * @return true when the character is a titlecase character, false 2688 * otherwise 2689 */ 2690 public static boolean isTitleCase(char c) { 2691 // BEGIN android-changed 2692 // if (c == '\u01c5' || c == '\u01c8' || c == '\u01cb' || c == '\u01f2') { 2693 // return true; 2694 // } 2695 // if (c >= '\u1f88' && c <= '\u1ffc') { 2696 // // 0x1f88 - 0x1f8f, 0x1f98 - 0x1f9f, 0x1fa8 - 0x1faf 2697 // if (c > '\u1faf') { 2698 // return c == '\u1fbc' || c == '\u1fcc' || c == '\u1ffc'; 2699 // } 2700 // int last = c & 0xf; 2701 // return last >= 8 && last <= 0xf; 2702 // } 2703 // return false; 2704 return UCharacter.isTitleCase(c); 2705 // END android-changed 2706 } 2707 2708 /** 2709 * Returns whether the character is a titlecase character. 2710 * 2711 * @param codePoint 2712 * the character, including supplementary characters 2713 * @return true when the character is a titlecase character, false 2714 * otherwise 2715 */ 2716 public static boolean isTitleCase(int codePoint) { 2717 return UCharacter.isTitleCase(codePoint); 2718 } 2719 2720 /** 2721 * Returns whether the character is valid as part of a Unicode identifier as 2722 * other than the first character. 2723 * 2724 * @param c 2725 * the character 2726 * @return true when the character is valid as part of a Unicode identifier, 2727 * false otherwise 2728 */ 2729 public static boolean isUnicodeIdentifierPart(char c) { 2730 // BEGIN android-changed 2731 // int type = getType(c); 2732 // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2733 // || type == CONNECTOR_PUNCTUATION 2734 // || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2735 // || type == NON_SPACING_MARK || type == COMBINING_SPACING_MARK 2736 // || isIdentifierIgnorable(c); 2737 return UCharacter.isUnicodeIdentifierPart(c); 2738 // END android-changed 2739 } 2740 2741 /** 2742 * Returns whether the character is valid as part of a Unicode identifier as 2743 * other than the first character. 2744 * 2745 * @param codePoint 2746 * the character, including supplementary characters 2747 * @return true when the character is valid as part of a Unicode identifier, 2748 * false otherwise 2749 */ 2750 public static boolean isUnicodeIdentifierPart(int codePoint) { 2751 return UCharacter.isUnicodeIdentifierPart(codePoint); 2752 } 2753 2754 /** 2755 * Returns whether the character is a valid initial character for a Unicode 2756 * identifier. 2757 * 2758 * @param c 2759 * the character 2760 * @return true when the character is a valid start of a Unicode identifier, 2761 * false otherwise 2762 */ 2763 public static boolean isUnicodeIdentifierStart(char c) { 2764 // BEGIN android-changed 2765 // int type = getType(c); 2766 // return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2767 // || type == LETTER_NUMBER; 2768 return UCharacter.isUnicodeIdentifierStart(c); 2769 // END android-changed 2770 } 2771 2772 /** 2773 * Returns whether the character is a valid initial character for a Unicode 2774 * identifier. 2775 * 2776 * @param codePoint 2777 * the character, including supplementary characters 2778 * @return true when the character is a valid start of a Unicode identifier, 2779 * false otherwise 2780 */ 2781 public static boolean isUnicodeIdentifierStart(int codePoint) { 2782 return UCharacter.isUnicodeIdentifierStart(codePoint); 2783 } 2784 2785 /** 2786 * Returns whether the character is an upper case letter. 2787 * 2788 * @param c 2789 * the character 2790 * @return true when the character is a upper case letter, false otherwise 2791 */ 2792 public static boolean isUpperCase(char c) { 2793 // BEGIN android-changed 2794 // // Optimized case for ASCII 2795 // if ('A' <= c && c <= 'Z') { 2796 // return true; 2797 // } 2798 // if (c < 128) { 2799 // return false; 2800 // } 2801 // 2802 // return getType(c) == UPPERCASE_LETTER; 2803 return UCharacter.isUpperCase(c); 2804 // END android-changed 2805 } 2806 2807 /** 2808 * Returns whether the character is an upper case letter. 2809 * 2810 * @param codePoint 2811 * the character, including supplementary characters 2812 * @return true when the character is a upper case letter, false otherwise 2813 */ 2814 public static boolean isUpperCase(int codePoint) { 2815 return UCharacter.isUpperCase(codePoint); 2816 } 2817 2818 /** 2819 * Returns whether the character is a whitespace character in Java. 2820 * 2821 * @param c 2822 * the character 2823 * @return true if the supplied <code>c</code> is a whitespace character 2824 * in Java, otherwise false. 2825 */ 2826 public static boolean isWhitespace(char c) { 2827 // BEGIN android-changed 2828 // // Optimized case for ASCII 2829 // if ((c >= 0x1c && c <= 0x20) || (c >= 0x9 && c <= 0xd)) { 2830 // return true; 2831 // } 2832 // if (c == 0x1680) { 2833 // return true; 2834 // } 2835 // if (c < 0x2000 || c == 0x2007) { 2836 // return false; 2837 // } 2838 // return c <= 0x200b || c == 0x2028 || c == 0x2029 || c == 0x3000; 2839 return UCharacter.isWhitespace(c); 2840 // END android-changed 2841 } 2842 2843 /** 2844 * Returns whether the character is a whitespace character in Java. 2845 * 2846 * @param codePoint 2847 * the character, including supplementary characters 2848 * @return true if the supplied <code>c</code> is a whitespace character 2849 * in Java, otherwise false. 2850 */ 2851 public static boolean isWhitespace(int codePoint) { 2852 //FIXME depends on ICU when the codePoint is '\u2007' 2853 return UCharacter.isWhitespace(codePoint); 2854 } 2855 2856 /** 2857 * Reverse the order of the first and second bytes in character 2858 * @param c 2859 * the character 2860 * @return the character with reordered bytes. 2861 */ 2862 public static char reverseBytes(char c) { 2863 return (char)((c<<8) | (c>>8)); 2864 } 2865 2866 /** 2867 * Returns the lower case equivalent for the character when the character is 2868 * an upper case letter, otherwise returns the character. 2869 * 2870 * @param c 2871 * the character 2872 * @return if c is not a lower case character then its lower case 2873 * counterpart, otherwise just c 2874 */ 2875 public static char toLowerCase(char c) { 2876 // BEGIN android-changed 2877 // // Optimized case for ASCII 2878 // if ('A' <= c && c <= 'Z') { 2879 // return (char) (c + ('a' - 'A')); 2880 // } 2881 // if (c < 128) { 2882 // return c; 2883 // } 2884 // 2885 // int result = BinarySearch.binarySearchRange(lowercaseKeys, c); 2886 // if (result >= 0) { 2887 // boolean by2 = false; 2888 // char start = lowercaseKeys.charAt(result); 2889 // char end = lowercaseValues[result * 2]; 2890 // if ((start & 0x8000) != (end & 0x8000)) { 2891 // end ^= 0x8000; 2892 // by2 = true; 2893 // } 2894 // if (c <= end) { 2895 // if (by2 && (c & 1) != (start & 1)) { 2896 // return c; 2897 // } 2898 // char mapping = lowercaseValues[result * 2 + 1]; 2899 // return (char) (c + mapping); 2900 // } 2901 // } 2902 // return c; 2903 return (char)UCharacter.toLowerCase(c); 2904 // END android-changed 2905 } 2906 2907 /** 2908 * Returns the lower case equivalent for the character when the character is 2909 * an upper case letter, otherwise returns the character. 2910 * 2911 * @param codePoint 2912 * the character, including supplementary characters 2913 * @return if codePoint is not a lower case character then its lower case 2914 * counterpart, otherwise just codePoint 2915 */ 2916 public static int toLowerCase(int codePoint) { 2917 return UCharacter.toLowerCase(codePoint); 2918 } 2919 2920 /** 2921 * Returns a string containing a concise, human-readable description of the 2922 * receiver. 2923 * 2924 * @return a printable representation for the receiver. 2925 */ 2926 @Override 2927 public String toString() { 2928 return String.valueOf(value); 2929 } 2930 2931 /** 2932 * Converts the specified character to its string representation. 2933 * 2934 * @param value 2935 * the character 2936 * @return the character converted to a string 2937 */ 2938 public static String toString(char value) { 2939 return String.valueOf(value); 2940 } 2941 2942 /** 2943 * Returns the title case equivalent for the character, otherwise returns the 2944 * character. 2945 * 2946 * @param c 2947 * the character 2948 * @return the title case equivalent of the character 2949 */ 2950 public static char toTitleCase(char c) { 2951 // BEGIN android-changed 2952 // if (isTitleCase(c)) { 2953 // return c; 2954 // } 2955 // int result = BinarySearch.binarySearch(titlecaseKeys, c); 2956 // if (result >= 0) { 2957 // return titlecaseValues[result]; 2958 // } 2959 // return toUpperCase(c); 2960 return (char)UCharacter.toTitleCase(c); 2961 // ENd android-changed 2962 } 2963 2964 /** 2965 * Returns the title case equivalent for the character, otherwise returns the 2966 * character. 2967 * 2968 * @param codePoint 2969 * the character 2970 * @return the title case equivalent of the character 2971 */ 2972 public static int toTitleCase(int codePoint) { 2973 return UCharacter.toTitleCase(codePoint); 2974 } 2975 2976 /** 2977 * Returns the upper case equivalent for the character when the character is 2978 * a lower case letter, otherwise returns the character. 2979 * 2980 * @param c 2981 * the character 2982 * @return if c is not an upper case character then its upper case 2983 * counterpart, otherwise just c 2984 */ 2985 public static char toUpperCase(char c) { 2986 // BEGIN android-changed 2987 // // Optimized case for ASCII 2988 // if ('a' <= c && c <= 'z') { 2989 // return (char) (c - ('a' - 'A')); 2990 // } 2991 // if (c < 128) { 2992 // return c; 2993 // } 2994 // 2995 // int result = BinarySearch.binarySearchRange(uppercaseKeys, c); 2996 // if (result >= 0) { 2997 // boolean by2 = false; 2998 // char start = uppercaseKeys.charAt(result); 2999 // char end = uppercaseValues[result * 2]; 3000 // if ((start & 0x8000) != (end & 0x8000)) { 3001 // end ^= 0x8000; 3002 // by2 = true; 3003 // } 3004 // if (c <= end) { 3005 // if (by2 && (c & 1) != (start & 1)) { 3006 // return c; 3007 // } 3008 // char mapping = uppercaseValues[result * 2 + 1]; 3009 // return (char) (c + mapping); 3010 // } 3011 // } 3012 // return c; 3013 return (char)UCharacter.toUpperCase(c); 3014 // END android-changed 3015 } 3016 3017 /** 3018 * Returns the upper case equivalent for the character when the character is 3019 * a lower case letter, otherwise returns the character. 3020 * 3021 * @param codePoint 3022 * the character, including supplementary characters 3023 * @return if codePoint is not an upper case character then its upper case 3024 * counterpart, otherwise just codePoint 3025 */ 3026 public static int toUpperCase(int codePoint) { 3027 return UCharacter.toUpperCase(codePoint); 3028 } 3029 3030} 3031