Character.java revision b46dab348e2007bc08abaf7ecae34d89a2474e50
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.lang; 19 20import java.io.Serializable; 21 22/** 23 * The wrapper for the primitive type {@code char}. This class also provides a 24 * number of utility methods for working with characters. 25 * 26 * <p>Character data is kept up to date as Unicode evolves. 27 * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of 28 * the {@code Locale} documentation for details of the Unicode versions implemented by current 29 * and historical Android releases. 30 * 31 * <p>The Unicode specification, character tables, and other information are available at 32 * <a href="http://www.unicode.org/">http://www.unicode.org/</a>. 33 * 34 * <p>Unicode characters are referred to as <i>code points</i>. The range of valid 35 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i> 36 * is the code point range U+0000 to U+FFFF. Characters above the BMP are 37 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16 38 * encoding and {@code char} pairs are used to represent code points in the 39 * supplementary range. A pair of {@code char} values that represent a 40 * supplementary character are made up of a <i>high surrogate</i> with a value 41 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of 42 * 0xDC00 to 0xDFFF. 43 * <p> 44 * On the Java platform a {@code char} value represents either a single BMP code 45 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type 46 * is used to represent all Unicode code points. 47 * 48 * <a name="unicode_categories"><h3>Unicode categories</h3></a> 49 * <p>Here's a list of the Unicode character categories and the corresponding Java constant, 50 * grouped semantically to provide a convenient overview. This table is also useful in 51 * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}. 52 * <span class="datatable"> 53 * <style type="text/css"> 54 * .datatable td { padding-right: 20px; } 55 * </style> 56 * <p><table> 57 * <tr> <td> Cn </td> <td> Unassigned </td> <td>{@link #UNASSIGNED}</td> </tr> 58 * <tr> <td> Cc </td> <td> Control </td> <td>{@link #CONTROL}</td> </tr> 59 * <tr> <td> Cf </td> <td> Format </td> <td>{@link #FORMAT}</td> </tr> 60 * <tr> <td> Cf </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr> 61 * <tr> <td> Cf </td> <td> Surrogate </td> <td>{@link #SURROGATE}</td> </tr> 62 * <tr> <td><br></td> </tr> 63 * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr> 64 * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr> 65 * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr> 66 * <tr> <td> Lm </td> <td> Modifier letter </td> <td>{@link #MODIFIER_LETTER}</td> </tr> 67 * <tr> <td> Lo </td> <td> Other letter </td> <td>{@link #OTHER_LETTER}</td> </tr> 68 * <tr> <td><br></td> </tr> 69 * <tr> <td> Mn </td> <td> Non-spacing mark </td> <td>{@link #NON_SPACING_MARK}</td> </tr> 70 * <tr> <td> Me </td> <td> Enclosing mark </td> <td>{@link #ENCLOSING_MARK}</td> </tr> 71 * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr> 72 * <tr> <td><br></td> </tr> 73 * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr> 74 * <tr> <td> Nl </td> <td> Letter number </td> <td>{@link #LETTER_NUMBER}</td> </tr> 75 * <tr> <td> No </td> <td> Other number </td> <td>{@link #OTHER_NUMBER}</td> </tr> 76 * <tr> <td><br></td> </tr> 77 * <tr> <td> Pd </td> <td> Dash punctuation </td> <td>{@link #DASH_PUNCTUATION}</td> </tr> 78 * <tr> <td> Ps </td> <td> Start punctuation </td> <td>{@link #START_PUNCTUATION}</td> </tr> 79 * <tr> <td> Pe </td> <td> End punctuation </td> <td>{@link #END_PUNCTUATION}</td> </tr> 80 * <tr> <td> Pc </td> <td> Connector punctuation </td> <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr> 81 * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr> 82 * <tr> <td> Pf </td> <td> Final quote punctuation </td> <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr> 83 * <tr> <td> Po </td> <td> Other punctuation </td> <td>{@link #OTHER_PUNCTUATION}</td> </tr> 84 * <tr> <td><br></td> </tr> 85 * <tr> <td> Sm </td> <td> Math symbol </td> <td>{@link #MATH_SYMBOL}</td> </tr> 86 * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr> 87 * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr> 88 * <tr> <td> So </td> <td> Other symbol </td> <td>{@link #OTHER_SYMBOL}</td> </tr> 89 * <tr> <td><br></td> </tr> 90 * <tr> <td> Zs </td> <td> Space separator </td> <td>{@link #SPACE_SEPARATOR}</td> </tr> 91 * <tr> <td> Zl </td> <td> Line separator </td> <td>{@link #LINE_SEPARATOR}</td> </tr> 92 * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr> 93 * </table> 94 * </span> 95 * 96 * @since 1.0 97 */ 98public final class Character implements Serializable, Comparable<Character> { 99 private static final long serialVersionUID = 3786198910865385080L; 100 101 private final char value; 102 103 /** 104 * The minimum {@code Character} value. 105 */ 106 public static final char MIN_VALUE = '\u0000'; 107 108 /** 109 * The maximum {@code Character} value. 110 */ 111 public static final char MAX_VALUE = '\uffff'; 112 113 /** 114 * The minimum radix used for conversions between characters and integers. 115 */ 116 public static final int MIN_RADIX = 2; 117 118 /** 119 * The maximum radix used for conversions between characters and integers. 120 */ 121 public static final int MAX_RADIX = 36; 122 123 /** 124 * The {@link Class} object that represents the primitive type {@code char}. 125 */ 126 @SuppressWarnings("unchecked") 127 public static final Class<Character> TYPE 128 = (Class<Character>) char[].class.getComponentType(); 129 130 // Note: This can't be set to "char.class", since *that* is 131 // defined to be "java.lang.Character.TYPE"; 132 133 /** 134 * Unicode category constant Cn. 135 */ 136 public static final byte UNASSIGNED = 0; 137 138 /** 139 * Unicode category constant Lu. 140 */ 141 public static final byte UPPERCASE_LETTER = 1; 142 143 /** 144 * Unicode category constant Ll. 145 */ 146 public static final byte LOWERCASE_LETTER = 2; 147 148 /** 149 * Unicode category constant Lt. 150 */ 151 public static final byte TITLECASE_LETTER = 3; 152 153 /** 154 * Unicode category constant Lm. 155 */ 156 public static final byte MODIFIER_LETTER = 4; 157 158 /** 159 * Unicode category constant Lo. 160 */ 161 public static final byte OTHER_LETTER = 5; 162 163 /** 164 * Unicode category constant Mn. 165 */ 166 public static final byte NON_SPACING_MARK = 6; 167 168 /** 169 * Unicode category constant Me. 170 */ 171 public static final byte ENCLOSING_MARK = 7; 172 173 /** 174 * Unicode category constant Mc. 175 */ 176 public static final byte COMBINING_SPACING_MARK = 8; 177 178 /** 179 * Unicode category constant Nd. 180 */ 181 public static final byte DECIMAL_DIGIT_NUMBER = 9; 182 183 /** 184 * Unicode category constant Nl. 185 */ 186 public static final byte LETTER_NUMBER = 10; 187 188 /** 189 * Unicode category constant No. 190 */ 191 public static final byte OTHER_NUMBER = 11; 192 193 /** 194 * Unicode category constant Zs. 195 */ 196 public static final byte SPACE_SEPARATOR = 12; 197 198 /** 199 * Unicode category constant Zl. 200 */ 201 public static final byte LINE_SEPARATOR = 13; 202 203 /** 204 * Unicode category constant Zp. 205 */ 206 public static final byte PARAGRAPH_SEPARATOR = 14; 207 208 /** 209 * Unicode category constant Cc. 210 */ 211 public static final byte CONTROL = 15; 212 213 /** 214 * Unicode category constant Cf. 215 */ 216 public static final byte FORMAT = 16; 217 218 /** 219 * Unicode category constant Co. 220 */ 221 public static final byte PRIVATE_USE = 18; 222 223 /** 224 * Unicode category constant Cs. 225 */ 226 public static final byte SURROGATE = 19; 227 228 /** 229 * Unicode category constant Pd. 230 */ 231 public static final byte DASH_PUNCTUATION = 20; 232 233 /** 234 * Unicode category constant Ps. 235 */ 236 public static final byte START_PUNCTUATION = 21; 237 238 /** 239 * Unicode category constant Pe. 240 */ 241 public static final byte END_PUNCTUATION = 22; 242 243 /** 244 * Unicode category constant Pc. 245 */ 246 public static final byte CONNECTOR_PUNCTUATION = 23; 247 248 /** 249 * Unicode category constant Po. 250 */ 251 public static final byte OTHER_PUNCTUATION = 24; 252 253 /** 254 * Unicode category constant Sm. 255 */ 256 public static final byte MATH_SYMBOL = 25; 257 258 /** 259 * Unicode category constant Sc. 260 */ 261 public static final byte CURRENCY_SYMBOL = 26; 262 263 /** 264 * Unicode category constant Sk. 265 */ 266 public static final byte MODIFIER_SYMBOL = 27; 267 268 /** 269 * Unicode category constant So. 270 */ 271 public static final byte OTHER_SYMBOL = 28; 272 273 /** 274 * Unicode category constant Pi. 275 * 276 * @since 1.4 277 */ 278 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 279 280 /** 281 * Unicode category constant Pf. 282 * 283 * @since 1.4 284 */ 285 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 286 287 /** 288 * Unicode bidirectional constant. 289 * 290 * @since 1.4 291 */ 292 public static final byte DIRECTIONALITY_UNDEFINED = -1; 293 294 /** 295 * Unicode bidirectional constant L. 296 * 297 * @since 1.4 298 */ 299 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 300 301 /** 302 * Unicode bidirectional constant R. 303 * 304 * @since 1.4 305 */ 306 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 307 308 /** 309 * Unicode bidirectional constant AL. 310 * 311 * @since 1.4 312 */ 313 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 314 315 /** 316 * Unicode bidirectional constant EN. 317 * 318 * @since 1.4 319 */ 320 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 321 322 /** 323 * Unicode bidirectional constant ES. 324 * 325 * @since 1.4 326 */ 327 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 328 329 /** 330 * Unicode bidirectional constant ET. 331 * 332 * @since 1.4 333 */ 334 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 335 336 /** 337 * Unicode bidirectional constant AN. 338 * 339 * @since 1.4 340 */ 341 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 342 343 /** 344 * Unicode bidirectional constant CS. 345 * 346 * @since 1.4 347 */ 348 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 349 350 /** 351 * Unicode bidirectional constant NSM. 352 * 353 * @since 1.4 354 */ 355 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 356 357 /** 358 * Unicode bidirectional constant BN. 359 * 360 * @since 1.4 361 */ 362 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 363 364 /** 365 * Unicode bidirectional constant B. 366 * 367 * @since 1.4 368 */ 369 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 370 371 /** 372 * Unicode bidirectional constant S. 373 * 374 * @since 1.4 375 */ 376 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 377 378 /** 379 * Unicode bidirectional constant WS. 380 * 381 * @since 1.4 382 */ 383 public static final byte DIRECTIONALITY_WHITESPACE = 12; 384 385 /** 386 * Unicode bidirectional constant ON. 387 * 388 * @since 1.4 389 */ 390 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 391 392 /** 393 * Unicode bidirectional constant LRE. 394 * 395 * @since 1.4 396 */ 397 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 398 399 /** 400 * Unicode bidirectional constant LRO. 401 * 402 * @since 1.4 403 */ 404 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 405 406 /** 407 * Unicode bidirectional constant RLE. 408 * 409 * @since 1.4 410 */ 411 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 412 413 /** 414 * Unicode bidirectional constant RLO. 415 * 416 * @since 1.4 417 */ 418 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 419 420 /** 421 * Unicode bidirectional constant PDF. 422 * 423 * @since 1.4 424 */ 425 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 426 427 /** 428 * The minimum value of a high surrogate or leading surrogate unit in UTF-16 429 * encoding, {@code '\uD800'}. 430 * 431 * @since 1.5 432 */ 433 public static final char MIN_HIGH_SURROGATE = '\uD800'; 434 435 /** 436 * The maximum value of a high surrogate or leading surrogate unit in UTF-16 437 * encoding, {@code '\uDBFF'}. 438 * 439 * @since 1.5 440 */ 441 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 442 443 /** 444 * The minimum value of a low surrogate or trailing surrogate unit in UTF-16 445 * encoding, {@code '\uDC00'}. 446 * 447 * @since 1.5 448 */ 449 public static final char MIN_LOW_SURROGATE = '\uDC00'; 450 451 /** 452 * The maximum value of a low surrogate or trailing surrogate unit in UTF-16 453 * encoding, {@code '\uDFFF'}. 454 * 455 * @since 1.5 456 */ 457 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 458 459 /** 460 * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}. 461 * 462 * @since 1.5 463 */ 464 public static final char MIN_SURROGATE = '\uD800'; 465 466 /** 467 * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}. 468 * 469 * @since 1.5 470 */ 471 public static final char MAX_SURROGATE = '\uDFFF'; 472 473 /** 474 * The minimum value of a supplementary code point, {@code U+010000}. 475 * 476 * @since 1.5 477 */ 478 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; 479 480 /** 481 * The minimum code point value, {@code U+0000}. 482 * 483 * @since 1.5 484 */ 485 public static final int MIN_CODE_POINT = 0x000000; 486 487 /** 488 * The maximum code point value, {@code U+10FFFF}. 489 * 490 * @since 1.5 491 */ 492 public static final int MAX_CODE_POINT = 0x10FFFF; 493 494 /** 495 * The number of bits required to represent a {@code Character} value 496 * unsigned form. 497 * 498 * @since 1.5 499 */ 500 public static final int SIZE = 16; 501 502 private static final byte[] DIRECTIONALITY = new byte[] { 503 DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT, 504 DIRECTIONALITY_EUROPEAN_NUMBER, 505 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, 506 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, 507 DIRECTIONALITY_ARABIC_NUMBER, 508 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, 509 DIRECTIONALITY_PARAGRAPH_SEPARATOR, 510 DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE, 511 DIRECTIONALITY_OTHER_NEUTRALS, 512 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, 513 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, 514 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, 515 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, 516 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, 517 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, 518 DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL }; 519 520 /* 521 * Represents a subset of the Unicode character set. 522 */ 523 public static class Subset { 524 String name; 525 526 /** 527 * Constructs a new {@code Subset}. 528 * 529 * @param string 530 * this subset's name. 531 */ 532 protected Subset(String string) { 533 if (string == null) { 534 throw new NullPointerException(); 535 } 536 name = string; 537 } 538 539 /** 540 * Compares this character subset with the specified object. Uses 541 * {@link java.lang.Object#equals(Object)} to do the comparison. 542 * 543 * @param object 544 * the object to compare this character subset with. 545 * @return {@code true} if {@code object} is this subset, that is, if 546 * {@code object == this}; {@code false} otherwise. 547 */ 548 @Override 549 public final boolean equals(Object object) { 550 return super.equals(object); 551 } 552 553 /** 554 * Returns the integer hash code for this character subset. 555 * 556 * @return this subset's hash code, which is the hash code computed by 557 * {@link java.lang.Object#hashCode()}. 558 */ 559 @Override 560 public final int hashCode() { 561 return super.hashCode(); 562 } 563 564 /** 565 * Returns the string representation of this subset. 566 * 567 * @return this subset's name. 568 */ 569 @Override 570 public final String toString() { 571 return name; 572 } 573 } 574 575 /** 576 * Represents a block of Unicode characters, as defined by the Unicode 4.0.1 577 * specification. 578 * 579 * @since 1.2 580 */ 581 public static final class UnicodeBlock extends Subset { 582 /** 583 * The "Surrogates Area" Unicode Block. 584 * 585 * @deprecated As of Java 5, this block has been replaced by 586 * {@link #HIGH_SURROGATES}, 587 * {@link #HIGH_PRIVATE_USE_SURROGATES} and 588 * {@link #LOW_SURROGATES}. 589 */ 590 @Deprecated 591 public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA", 0x0, 0x0); 592 /** 593 * The "Basic Latin" Unicode Block. 594 * 595 * @since 1.2 596 */ 597 public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN", 0x0, 0x7f); 598 /** 599 * The "Latin-1 Supplement" Unicode Block. 600 * 601 * @since 1.2 602 */ 603 public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT", 0x80, 0xff); 604 /** 605 * The "Latin Extended-A" Unicode Block. 606 * 607 * @since 1.2 608 */ 609 public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A", 0x100, 0x17f); 610 /** 611 * The "Latin Extended-B" Unicode Block. 612 * 613 * @since 1.2 614 */ 615 public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B", 0x180, 0x24f); 616 /** 617 * The "IPA Extensions" Unicode Block. 618 * 619 * @since 1.2 620 */ 621 public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS", 0x250, 0x2af); 622 /** 623 * The "Spacing Modifier Letters" Unicode Block. 624 * 625 * @since 1.2 626 */ 627 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 0x2b0, 0x2ff); 628 /** 629 * The "Combining Diacritical Marks" Unicode Block. 630 * 631 * @since 1.2 632 */ 633 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 0x300, 0x36f); 634 /** 635 * The "Greek and Coptic" Unicode Block. Previously referred 636 * to as "Greek". 637 * 638 * @since 1.2 639 */ 640 public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK", 0x370, 0x3ff); 641 /** 642 * The "Cyrillic" Unicode Block. 643 * 644 * @since 1.2 645 */ 646 public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC", 0x400, 0x4ff); 647 /** 648 * The "Cyrillic Supplement" Unicode Block. Previously 649 * referred to as "Cyrillic Supplementary". 650 * 651 * @since 1.5 652 */ 653 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 0x500, 0x52f); 654 /** 655 * The "Armenian" Unicode Block. 656 * 657 * @since 1.2 658 */ 659 public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN", 0x530, 0x58f); 660 /** 661 * The "Hebrew" Unicode Block. 662 * 663 * @since 1.2 664 */ 665 public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW", 0x590, 0x5ff); 666 /** 667 * The "Arabic" Unicode Block. 668 * 669 * @since 1.2 670 */ 671 public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC", 0x600, 0x6ff); 672 /** 673 * The "Syriac" Unicode Block. 674 * 675 * @since 1.4 676 */ 677 public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC", 0x700, 0x74f); 678 /** 679 * The "Thaana" Unicode Block. 680 * 681 * @since 1.4 682 */ 683 public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA", 0x780, 0x7bf); 684 /** 685 * The "Devanagari" Unicode Block. 686 * 687 * @since 1.2 688 */ 689 public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI", 0x900, 0x97f); 690 /** 691 * The "Bengali" Unicode Block. 692 * 693 * @since 1.2 694 */ 695 public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI", 0x980, 0x9ff); 696 /** 697 * The "Gurmukhi" Unicode Block. 698 * 699 * @since 1.2 700 */ 701 public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI", 0xa00, 0xa7f); 702 /** 703 * The "Gujarati" Unicode Block. 704 * 705 * @since 1.2 706 */ 707 public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI", 0xa80, 0xaff); 708 /** 709 * The "Oriya" Unicode Block. 710 * 711 * @since 1.2 712 */ 713 public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA", 0xb00, 0xb7f); 714 /** 715 * The "Tamil" Unicode Block. 716 * 717 * @since 1.2 718 */ 719 public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL", 0xb80, 0xbff); 720 /** 721 * The "Telugu" Unicode Block. 722 * 723 * @since 1.2 724 */ 725 public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU", 0xc00, 0xc7f); 726 /** 727 * The "Kannada" Unicode Block. 728 * 729 * @since 1.2 730 */ 731 public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA", 0xc80, 0xcff); 732 /** 733 * The "Malayalam" Unicode Block. 734 * 735 * @since 1.2 736 */ 737 public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM", 0xd00, 0xd7f); 738 /** 739 * The "Sinhala" Unicode Block. 740 * 741 * @since 1.4 742 */ 743 public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA", 0xd80, 0xdff); 744 /** 745 * The "Thai" Unicode Block. 746 * 747 * @since 1.2 748 */ 749 public static final UnicodeBlock THAI = new UnicodeBlock("THAI", 0xe00, 0xe7f); 750 /** 751 * The "Lao" Unicode Block. 752 * 753 * @since 1.2 754 */ 755 public static final UnicodeBlock LAO = new UnicodeBlock("LAO", 0xe80, 0xeff); 756 /** 757 * The "Tibetan" Unicode Block. 758 * 759 * @since 1.2 760 */ 761 public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN", 0xf00, 0xfff); 762 /** 763 * The "Myanmar" Unicode Block. 764 * 765 * @since 1.4 766 */ 767 public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR", 0x1000, 0x109f); 768 /** 769 * The "Georgian" Unicode Block. 770 * 771 * @since 1.2 772 */ 773 public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN", 0x10a0, 0x10ff); 774 /** 775 * The "Hangul Jamo" Unicode Block. 776 * 777 * @since 1.2 778 */ 779 public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO", 0x1100, 0x11ff); 780 /** 781 * The "Ethiopic" Unicode Block. 782 * 783 * @since 1.4 784 */ 785 public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC", 0x1200, 0x137f); 786 /** 787 * The "Cherokee" Unicode Block. 788 * 789 * @since 1.4 790 */ 791 public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE", 0x13a0, 0x13ff); 792 /** 793 * The "Unified Canadian Aboriginal Syllabics" Unicode Block. 794 * 795 * @since 1.4 796 */ 797 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 0x1400, 0x167f); 798 /** 799 * The "Ogham" Unicode Block. 800 * 801 * @since 1.4 802 */ 803 public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM", 0x1680, 0x169f); 804 /** 805 * The "Runic" Unicode Block. 806 * 807 * @since 1.4 808 */ 809 public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC", 0x16a0, 0x16ff); 810 /** 811 * The "Tagalog" Unicode Block. 812 * 813 * @since 1.5 814 */ 815 public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG", 0x1700, 0x171f); 816 /** 817 * The "Hanunoo" Unicode Block. 818 * 819 * @since 1.5 820 */ 821 public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO", 0x1720, 0x173f); 822 /** 823 * The "Buhid" Unicode Block. 824 * 825 * @since 1.5 826 */ 827 public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID", 0x1740, 0x175f); 828 /** 829 * The "Tagbanwa" Unicode Block. 830 * 831 * @since 1.5 832 */ 833 public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA", 0x1760, 0x177f); 834 /** 835 * The "Khmer" Unicode Block. 836 * 837 * @since 1.4 838 */ 839 public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER", 0x1780, 0x17ff); 840 /** 841 * The "Mongolian" Unicode Block. 842 * 843 * @since 1.4 844 */ 845 public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN", 0x1800, 0x18af); 846 /** 847 * The "Limbu" Unicode Block. 848 * 849 * @since 1.5 850 */ 851 public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU", 0x1900, 0x194f); 852 /** 853 * The "Tai Le" Unicode Block. 854 * 855 * @since 1.5 856 */ 857 public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE", 0x1950, 0x197f); 858 /** 859 * The "Khmer Symbols" Unicode Block. 860 * 861 * @since 1.5 862 */ 863 public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS", 0x19e0, 0x19ff); 864 /** 865 * The "Phonetic Extensions" Unicode Block. 866 * 867 * @since 1.5 868 */ 869 public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS", 0x1d00, 0x1d7f); 870 /** 871 * The "Latin Extended Additional" Unicode Block. 872 * 873 * @since 1.2 874 */ 875 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 0x1e00, 0x1eff); 876 /** 877 * The "Greek Extended" Unicode Block. 878 * 879 * @since 1.2 880 */ 881 public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED", 0x1f00, 0x1fff); 882 /** 883 * The "General Punctuation" Unicode Block. 884 * 885 * @since 1.2 886 */ 887 public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION", 0x2000, 0x206f); 888 /** 889 * The "Superscripts and Subscripts" Unicode Block. 890 * 891 * @since 1.2 892 */ 893 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 0x2070, 0x209f); 894 /** 895 * The "Currency Symbols" Unicode Block. 896 * 897 * @since 1.2 898 */ 899 public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS", 0x20a0, 0x20cf); 900 /** 901 * The "Combining Diacritical Marks for Symbols" Unicode 902 * Block. Previously referred to as "Combining Marks for 903 * Symbols". 904 * 905 * @since 1.2 906 */ 907 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 0x20d0, 0x20ff); 908 /** 909 * The "Letterlike Symbols" Unicode Block. 910 * 911 * @since 1.2 912 */ 913 public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS", 0x2100, 0x214f); 914 /** 915 * The "Number Forms" Unicode Block. 916 * 917 * @since 1.2 918 */ 919 public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS", 0x2150, 0x218f); 920 /** 921 * The "Arrows" Unicode Block. 922 * 923 * @since 1.2 924 */ 925 public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS", 0x2190, 0x21ff); 926 /** 927 * The "Mathematical Operators" Unicode Block. 928 * 929 * @since 1.2 930 */ 931 public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS", 0x2200, 0x22ff); 932 /** 933 * The "Miscellaneous Technical" Unicode Block. 934 * 935 * @since 1.2 936 */ 937 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 0x2300, 0x23ff); 938 /** 939 * The "Control Pictures" Unicode Block. 940 * 941 * @since 1.2 942 */ 943 public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES", 0x2400, 0x243f); 944 /** 945 * The "Optical Character Recognition" Unicode Block. 946 * 947 * @since 1.2 948 */ 949 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 0x2440, 0x245f); 950 /** 951 * The "Enclosed Alphanumerics" Unicode Block. 952 * 953 * @since 1.2 954 */ 955 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 0x2460, 0x24ff); 956 /** 957 * The "Box Drawing" Unicode Block. 958 * 959 * @since 1.2 960 */ 961 public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING", 0x2500, 0x257f); 962 /** 963 * The "Block Elements" Unicode Block. 964 * 965 * @since 1.2 966 */ 967 public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS", 0x2580, 0x259f); 968 /** 969 * The "Geometric Shapes" Unicode Block. 970 * 971 * @since 1.2 972 */ 973 public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES", 0x25a0, 0x25ff); 974 /** 975 * The "Miscellaneous Symbols" Unicode Block. 976 * 977 * @since 1.2 978 */ 979 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 0x2600, 0x26ff); 980 /** 981 * The "Dingbats" Unicode Block. 982 * 983 * @since 1.2 984 */ 985 public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS", 0x2700, 0x27bf); 986 /** 987 * The "Miscellaneous Mathematical Symbols-A" Unicode Block. 988 * 989 * @since 1.5 990 */ 991 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 0x27c0, 0x27ef); 992 /** 993 * The "Supplemental Arrows-A" Unicode Block. 994 * 995 * @since 1.5 996 */ 997 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 0x27f0, 0x27ff); 998 /** 999 * The "Braille Patterns" Unicode Block. 1000 * 1001 * @since 1.4 1002 */ 1003 public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS", 0x2800, 0x28ff); 1004 /** 1005 * The "Supplemental Arrows-B" Unicode Block. 1006 * 1007 * @since 1.5 1008 */ 1009 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 0x2900, 0x297f); 1010 /** 1011 * The "Miscellaneous Mathematical Symbols-B" Unicode Block. 1012 * 1013 * @since 1.5 1014 */ 1015 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 0x2980, 0x29ff); 1016 /** 1017 * The "Supplemental Mathematical Operators" Unicode Block. 1018 * 1019 * @since 1.5 1020 */ 1021 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 0x2a00, 0x2aff); 1022 /** 1023 * The "Miscellaneous Symbols and Arrows" Unicode Block. 1024 * 1025 * @since 1.2 1026 */ 1027 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 0x2b00, 0x2bff); 1028 /** 1029 * The "CJK Radicals Supplement" Unicode Block. 1030 * 1031 * @since 1.4 1032 */ 1033 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 0x2e80, 0x2eff); 1034 /** 1035 * The "Kangxi Radicals" Unicode Block. 1036 * 1037 * @since 1.4 1038 */ 1039 public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS", 0x2f00, 0x2fdf); 1040 /** 1041 * The "Ideographic Description Characters" Unicode Block. 1042 * 1043 * @since 1.4 1044 */ 1045 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 0x2ff0, 0x2fff); 1046 /** 1047 * The "CJK Symbols and Punctuation" Unicode Block. 1048 * 1049 * @since 1.2 1050 */ 1051 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 0x3000, 0x303f); 1052 /** 1053 * The "Hiragana" Unicode Block. 1054 * 1055 * @since 1.2 1056 */ 1057 public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA", 0x3040, 0x309f); 1058 /** 1059 * The "Katakana" Unicode Block. 1060 * 1061 * @since 1.2 1062 */ 1063 public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA", 0x30a0, 0x30ff); 1064 /** 1065 * The "Bopomofo" Unicode Block. 1066 * 1067 * @since 1.2 1068 */ 1069 public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO", 0x3100, 0x312f); 1070 /** 1071 * The "Hangul Compatibility Jamo" Unicode Block. 1072 * 1073 * @since 1.2 1074 */ 1075 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 0x3130, 0x318f); 1076 /** 1077 * The "Kanbun" Unicode Block. 1078 * 1079 * @since 1.2 1080 */ 1081 public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN", 0x3190, 0x319f); 1082 /** 1083 * The "Bopomofo Extended" Unicode Block. 1084 * 1085 * @since 1.4 1086 */ 1087 public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED", 0x31a0, 0x31bf); 1088 /** 1089 * The "Katakana Phonetic Extensions" Unicode Block. 1090 * 1091 * @since 1.5 1092 */ 1093 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 0x31f0, 0x31ff); 1094 /** 1095 * The "Enclosed CJK Letters and Months" Unicode Block. 1096 * 1097 * @since 1.2 1098 */ 1099 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 0x3200, 0x32ff); 1100 /** 1101 * The "CJK Compatibility" Unicode Block. 1102 * 1103 * @since 1.2 1104 */ 1105 public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY", 0x3300, 0x33ff); 1106 /** 1107 * The "CJK Unified Ideographs Extension A" Unicode Block. 1108 * 1109 * @since 1.4 1110 */ 1111 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 0x3400, 0x4dbf); 1112 /** 1113 * The "Yijing Hexagram Symbols" Unicode Block. 1114 * 1115 * @since 1.5 1116 */ 1117 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 0x4dc0, 0x4dff); 1118 /** 1119 * The "CJK Unified Ideographs" Unicode Block. 1120 * 1121 * @since 1.2 1122 */ 1123 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 0x4e00, 0x9fff); 1124 /** 1125 * The "Yi Syllables" Unicode Block. 1126 * 1127 * @since 1.4 1128 */ 1129 public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES", 0xa000, 0xa48f); 1130 /** 1131 * The "Yi Radicals" Unicode Block. 1132 * 1133 * @since 1.4 1134 */ 1135 public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS", 0xa490, 0xa4cf); 1136 /** 1137 * The "Hangul Syllables" Unicode Block. 1138 * 1139 * @since 1.2 1140 */ 1141 public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES", 0xac00, 0xd7af); 1142 /** 1143 * The "High Surrogates" Unicode Block. This block represents 1144 * code point values in the high surrogate range 0xD800 to 0xDB7F 1145 */ 1146 public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES", 0xd800, 0xdb7f); 1147 /** 1148 * The "High Private Use Surrogates" Unicode Block. This block 1149 * represents code point values in the high surrogate range 0xDB80 to 1150 * 0xDBFF 1151 */ 1152 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 0xdb80, 0xdbff); 1153 /** 1154 * The "Low Surrogates" Unicode Block. This block represents 1155 * code point values in the low surrogate range 0xDC00 to 0xDFFF 1156 */ 1157 public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES", 0xdc00, 0xdfff); 1158 /** 1159 * The "Private Use Area" Unicode Block. 1160 * 1161 * @since 1.2 1162 */ 1163 public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA", 0xe000, 0xf8ff); 1164 /** 1165 * The "CJK Compatibility Ideographs" Unicode Block. 1166 * 1167 * @since 1.2 1168 */ 1169 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 0xf900, 0xfaff); 1170 /** 1171 * The "Alphabetic Presentation Forms" Unicode Block. 1172 * 1173 * @since 1.2 1174 */ 1175 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 0xfb00, 0xfb4f); 1176 /** 1177 * The "Arabic Presentation Forms-A" Unicode Block. 1178 * 1179 * @since 1.2 1180 */ 1181 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 0xfb50, 0xfdff); 1182 /** 1183 * The "Variation Selectors" Unicode Block. 1184 * 1185 * @since 1.5 1186 */ 1187 public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS", 0xfe00, 0xfe0f); 1188 /** 1189 * The "Combining Half Marks" Unicode Block. 1190 * 1191 * @since 1.2 1192 */ 1193 public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS", 0xfe20, 0xfe2f); 1194 /** 1195 * The "CJK Compatibility Forms" Unicode Block. 1196 * 1197 * @since 1.2 1198 */ 1199 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 0xfe30, 0xfe4f); 1200 /** 1201 * The "Small Form Variants" Unicode Block. 1202 * 1203 * @since 1.2 1204 */ 1205 public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS", 0xfe50, 0xfe6f); 1206 /** 1207 * The "Arabic Presentation Forms-B" Unicode Block. 1208 * 1209 * @since 1.2 1210 */ 1211 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 0xfe70, 0xfeff); 1212 /** 1213 * The "Halfwidth and Fullwidth Forms" Unicode Block. 1214 * 1215 * @since 1.2 1216 */ 1217 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 0xff00, 0xffef); 1218 /** 1219 * The "Specials" Unicode Block. 1220 * 1221 * @since 1.2 1222 */ 1223 public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS", 0xfff0, 0xffff); 1224 /** 1225 * The "Linear B Syllabary" Unicode Block. 1226 * 1227 * @since 1.2 1228 */ 1229 public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY", 0x10000, 0x1007f); 1230 /** 1231 * The "Linear B Ideograms" Unicode Block. 1232 * 1233 * @since 1.5 1234 */ 1235 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 0x10080, 0x100ff); 1236 /** 1237 * The "Aegean Numbers" Unicode Block. 1238 * 1239 * @since 1.5 1240 */ 1241 public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS", 0x10100, 0x1013f); 1242 /** 1243 * The "Old Italic" Unicode Block. 1244 * 1245 * @since 1.5 1246 */ 1247 public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC", 0x10300, 0x1032f); 1248 /** 1249 * The "Gothic" Unicode Block. 1250 * 1251 * @since 1.5 1252 */ 1253 public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC", 0x10330, 0x1034f); 1254 /** 1255 * The "Ugaritic" Unicode Block. 1256 * 1257 * @since 1.5 1258 */ 1259 public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC", 0x10380, 0x1039f); 1260 /** 1261 * The "Deseret" Unicode Block. 1262 * 1263 * @since 1.5 1264 */ 1265 public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET", 0x10400, 0x1044f); 1266 /** 1267 * The "Shavian" Unicode Block. 1268 * 1269 * @since 1.5 1270 */ 1271 public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN", 0x10450, 0x1047f); 1272 /** 1273 * The "Osmanya" Unicode Block. 1274 * 1275 * @since 1.5 1276 */ 1277 public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA", 0x10480, 0x104af); 1278 /** 1279 * The "Cypriot Syllabary" Unicode Block. 1280 * 1281 * @since 1.5 1282 */ 1283 public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY", 0x10800, 0x1083f); 1284 /** 1285 * The "Byzantine Musical Symbols" Unicode Block. 1286 * 1287 * @since 1.5 1288 */ 1289 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 0x1d000, 0x1d0ff); 1290 /** 1291 * The "Musical Symbols" Unicode Block. 1292 * 1293 * @since 1.5 1294 */ 1295 public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS", 0x1d100, 0x1d1ff); 1296 /** 1297 * The "Tai Xuan Jing Symbols" Unicode Block. 1298 * 1299 * @since 1.5 1300 */ 1301 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 0x1d300, 0x1d35f); 1302 /** 1303 * The "Mathematical Alphanumeric Symbols" Unicode Block. 1304 * 1305 * @since 1.5 1306 */ 1307 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 0x1d400, 0x1d7ff); 1308 /** 1309 * The "CJK Unified Ideographs Extension B" Unicode Block. 1310 * 1311 * @since 1.5 1312 */ 1313 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 0x20000, 0x2a6df); 1314 /** 1315 * The "CJK Compatibility Ideographs Supplement" Unicode Block. 1316 * 1317 * @since 1.5 1318 */ 1319 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 0x2f800, 0x2fa1f); 1320 /** 1321 * The "Tags" Unicode Block. 1322 * 1323 * @since 1.5 1324 */ 1325 public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS", 0xe0000, 0xe007f); 1326 /** 1327 * The "Variation Selectors Supplement" Unicode Block. 1328 * 1329 * @since 1.5 1330 */ 1331 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 0xe0100, 0xe01ef); 1332 /** 1333 * The "Supplementary Private Use Area-A" Unicode Block. 1334 * 1335 * @since 1.5 1336 */ 1337 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 0xf0000, 0xfffff); 1338 /** 1339 * The "Supplementary Private Use Area-B" Unicode Block. 1340 * 1341 * @since 1.5 1342 */ 1343 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 0x100000, 0x10ffff); 1344 1345 /* 1346 * All of the UnicodeBlocks with valid ranges in ascending order. 1347 */ 1348 private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] { 1349 null, 1350 UnicodeBlock.BASIC_LATIN, 1351 UnicodeBlock.LATIN_1_SUPPLEMENT, 1352 UnicodeBlock.LATIN_EXTENDED_A, 1353 UnicodeBlock.LATIN_EXTENDED_B, 1354 UnicodeBlock.IPA_EXTENSIONS, 1355 UnicodeBlock.SPACING_MODIFIER_LETTERS, 1356 UnicodeBlock.COMBINING_DIACRITICAL_MARKS, 1357 UnicodeBlock.GREEK, 1358 UnicodeBlock.CYRILLIC, 1359 UnicodeBlock.ARMENIAN, 1360 UnicodeBlock.HEBREW, 1361 UnicodeBlock.ARABIC, 1362 UnicodeBlock.SYRIAC, 1363 UnicodeBlock.THAANA, 1364 UnicodeBlock.DEVANAGARI, 1365 UnicodeBlock.BENGALI, 1366 UnicodeBlock.GURMUKHI, 1367 UnicodeBlock.GUJARATI, 1368 UnicodeBlock.ORIYA, 1369 UnicodeBlock.TAMIL, 1370 UnicodeBlock.TELUGU, 1371 UnicodeBlock.KANNADA, 1372 UnicodeBlock.MALAYALAM, 1373 UnicodeBlock.SINHALA, 1374 UnicodeBlock.THAI, 1375 UnicodeBlock.LAO, 1376 UnicodeBlock.TIBETAN, 1377 UnicodeBlock.MYANMAR, 1378 UnicodeBlock.GEORGIAN, 1379 UnicodeBlock.HANGUL_JAMO, 1380 UnicodeBlock.ETHIOPIC, 1381 UnicodeBlock.CHEROKEE, 1382 UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 1383 UnicodeBlock.OGHAM, 1384 UnicodeBlock.RUNIC, 1385 UnicodeBlock.KHMER, 1386 UnicodeBlock.MONGOLIAN, 1387 UnicodeBlock.LATIN_EXTENDED_ADDITIONAL, 1388 UnicodeBlock.GREEK_EXTENDED, 1389 UnicodeBlock.GENERAL_PUNCTUATION, 1390 UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS, 1391 UnicodeBlock.CURRENCY_SYMBOLS, 1392 UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS, 1393 UnicodeBlock.LETTERLIKE_SYMBOLS, 1394 UnicodeBlock.NUMBER_FORMS, 1395 UnicodeBlock.ARROWS, 1396 UnicodeBlock.MATHEMATICAL_OPERATORS, 1397 UnicodeBlock.MISCELLANEOUS_TECHNICAL, 1398 UnicodeBlock.CONTROL_PICTURES, 1399 UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION, 1400 UnicodeBlock.ENCLOSED_ALPHANUMERICS, 1401 UnicodeBlock.BOX_DRAWING, 1402 UnicodeBlock.BLOCK_ELEMENTS, 1403 UnicodeBlock.GEOMETRIC_SHAPES, 1404 UnicodeBlock.MISCELLANEOUS_SYMBOLS, 1405 UnicodeBlock.DINGBATS, 1406 UnicodeBlock.BRAILLE_PATTERNS, 1407 UnicodeBlock.CJK_RADICALS_SUPPLEMENT, 1408 UnicodeBlock.KANGXI_RADICALS, 1409 UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 1410 UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION, 1411 UnicodeBlock.HIRAGANA, 1412 UnicodeBlock.KATAKANA, 1413 UnicodeBlock.BOPOMOFO, 1414 UnicodeBlock.HANGUL_COMPATIBILITY_JAMO, 1415 UnicodeBlock.KANBUN, 1416 UnicodeBlock.BOPOMOFO_EXTENDED, 1417 UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS, 1418 UnicodeBlock.CJK_COMPATIBILITY, 1419 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 1420 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS, 1421 UnicodeBlock.YI_SYLLABLES, 1422 UnicodeBlock.YI_RADICALS, 1423 UnicodeBlock.HANGUL_SYLLABLES, 1424 UnicodeBlock.HIGH_SURROGATES, 1425 UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES, 1426 UnicodeBlock.LOW_SURROGATES, 1427 UnicodeBlock.PRIVATE_USE_AREA, 1428 UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS, 1429 UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS, 1430 UnicodeBlock.ARABIC_PRESENTATION_FORMS_A, 1431 UnicodeBlock.COMBINING_HALF_MARKS, 1432 UnicodeBlock.CJK_COMPATIBILITY_FORMS, 1433 UnicodeBlock.SMALL_FORM_VARIANTS, 1434 UnicodeBlock.ARABIC_PRESENTATION_FORMS_B, 1435 UnicodeBlock.SPECIALS, 1436 UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS, 1437 UnicodeBlock.OLD_ITALIC, 1438 UnicodeBlock.GOTHIC, 1439 UnicodeBlock.DESERET, 1440 UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS, 1441 UnicodeBlock.MUSICAL_SYMBOLS, 1442 UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 1443 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 1444 UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 1445 UnicodeBlock.TAGS, 1446 UnicodeBlock.CYRILLIC_SUPPLEMENTARY, 1447 UnicodeBlock.TAGALOG, 1448 UnicodeBlock.HANUNOO, 1449 UnicodeBlock.BUHID, 1450 UnicodeBlock.TAGBANWA, 1451 UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 1452 UnicodeBlock.SUPPLEMENTAL_ARROWS_A, 1453 UnicodeBlock.SUPPLEMENTAL_ARROWS_B, 1454 UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 1455 UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 1456 UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS, 1457 UnicodeBlock.VARIATION_SELECTORS, 1458 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A, 1459 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B, 1460 UnicodeBlock.LIMBU, 1461 UnicodeBlock.TAI_LE, 1462 UnicodeBlock.KHMER_SYMBOLS, 1463 UnicodeBlock.PHONETIC_EXTENSIONS, 1464 UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS, 1465 UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS, 1466 UnicodeBlock.LINEAR_B_SYLLABARY, 1467 UnicodeBlock.LINEAR_B_IDEOGRAMS, 1468 UnicodeBlock.AEGEAN_NUMBERS, 1469 UnicodeBlock.UGARITIC, 1470 UnicodeBlock.SHAVIAN, 1471 UnicodeBlock.OSMANYA, 1472 UnicodeBlock.CYPRIOT_SYLLABARY, 1473 UnicodeBlock.TAI_XUAN_JING_SYMBOLS, 1474 UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT 1475 }; 1476 1477 /** 1478 * Retrieves the constant that corresponds to the specified block name. 1479 * The block names are defined by the Unicode 4.0.1 specification in the 1480 * {@code Blocks-4.0.1.txt} file. 1481 * <p> 1482 * Block names may be one of the following: 1483 * <ul> 1484 * <li>Canonical block name, as defined by the Unicode specification; 1485 * case-insensitive.</li> 1486 * <li>Canonical block name without any spaces, as defined by the 1487 * Unicode specification; case-insensitive.</li> 1488 * <li>{@code UnicodeBlock} constant identifier. This is determined by 1489 * uppercasing the canonical name and replacing all spaces and hyphens 1490 * with underscores.</li> 1491 * </ul> 1492 * 1493 * @param blockName 1494 * the name of the block to retrieve. 1495 * @return the UnicodeBlock constant corresponding to {@code blockName}. 1496 * @throws NullPointerException 1497 * if {@code blockName} is {@code null}. 1498 * @throws IllegalArgumentException 1499 * if {@code blockName} is not a valid block name. 1500 * @since 1.5 1501 */ 1502 public static UnicodeBlock forName(String blockName) { 1503 if (blockName == null) { 1504 throw new NullPointerException(); 1505 } 1506 int block = forNameImpl(blockName); 1507 if (block == -1) { 1508 if (blockName.equals("SURROGATES_AREA")) { 1509 return SURROGATES_AREA; 1510 } else if(blockName.equalsIgnoreCase("greek")) { 1511 return GREEK; 1512 } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") || 1513 blockName.equals("Combining Marks for Symbols") || 1514 blockName.equals("CombiningMarksforSymbols")) { 1515 return COMBINING_MARKS_FOR_SYMBOLS; 1516 } 1517 throw new IllegalArgumentException(); 1518 } 1519 return BLOCKS[block]; 1520 } 1521 1522 /** 1523 * Gets the constant for the Unicode block that contains the specified 1524 * character. 1525 * 1526 * @param c 1527 * the character for which to get the {@code UnicodeBlock} 1528 * constant. 1529 * @return the {@code UnicodeBlock} constant for the block that contains 1530 * {@code c}, or {@code null} if {@code c} does not belong to 1531 * any defined block. 1532 */ 1533 public static UnicodeBlock of(char c) { 1534 return of((int) c); 1535 } 1536 1537 /** 1538 * Gets the constant for the Unicode block that contains the specified 1539 * Unicode code point. 1540 * 1541 * @param codePoint 1542 * the Unicode code point for which to get the 1543 * {@code UnicodeBlock} constant. 1544 * @return the {@code UnicodeBlock} constant for the block that contains 1545 * {@code codePoint}, or {@code null} if {@code codePoint} does 1546 * not belong to any defined block. 1547 * @throws IllegalArgumentException 1548 * if {@code codePoint} is not a valid Unicode code point. 1549 * @since 1.5 1550 */ 1551 public static UnicodeBlock of(int codePoint) { 1552 if (!isValidCodePoint(codePoint)) { 1553 throw new IllegalArgumentException(); 1554 } 1555 int block = ofImpl(codePoint); 1556 if (block == -1 || block >= BLOCKS.length) { 1557 return null; 1558 } 1559 return BLOCKS[block]; 1560 } 1561 1562 private UnicodeBlock(String blockName, int start, int end) { 1563 super(blockName); 1564 } 1565 } 1566 1567 private static native int forNameImpl(String blockName); 1568 1569 private static native int ofImpl(int codePoint); 1570 1571 /** 1572 * Constructs a new {@code Character} with the specified primitive char 1573 * value. 1574 * 1575 * @param value 1576 * the primitive char value to store in the new instance. 1577 */ 1578 public Character(char value) { 1579 this.value = value; 1580 } 1581 1582 /** 1583 * Gets the primitive value of this character. 1584 * 1585 * @return this object's primitive value. 1586 */ 1587 public char charValue() { 1588 return value; 1589 } 1590 1591 /** 1592 * Compares this object to the specified character object to determine their 1593 * relative order. 1594 * 1595 * @param c 1596 * the character object to compare this object to. 1597 * @return {@code 0} if the value of this character and the value of 1598 * {@code c} are equal; a positive value if the value of this 1599 * character is greater than the value of {@code c}; a negative 1600 * value if the value of this character is less than the value of 1601 * {@code c}. 1602 * @see java.lang.Comparable 1603 * @since 1.2 1604 */ 1605 public int compareTo(Character c) { 1606 return value - c.value; 1607 } 1608 1609 /** 1610 * Returns a {@code Character} instance for the {@code char} value passed. 1611 * <p> 1612 * If it is not necessary to get a new {@code Character} instance, it is 1613 * recommended to use this method instead of the constructor, since it 1614 * maintains a cache of instances which may result in better performance. 1615 * 1616 * @param c 1617 * the char value for which to get a {@code Character} instance. 1618 * @return the {@code Character} instance for {@code c}. 1619 * @since 1.5 1620 */ 1621 public static Character valueOf(char c) { 1622 return c < 128 ? SMALL_VALUES[c] : new Character(c); 1623 } 1624 1625 /** 1626 * A cache of instances used by {@link #valueOf(char)} and auto-boxing 1627 */ 1628 private static final Character[] SMALL_VALUES = new Character[128]; 1629 1630 static { 1631 for(int i = 0; i < 128; i++) { 1632 SMALL_VALUES[i] = new Character((char) i); 1633 } 1634 } 1635 /** 1636 * Indicates whether {@code codePoint} is a valid Unicode code point. 1637 * 1638 * @param codePoint 1639 * the code point to test. 1640 * @return {@code true} if {@code codePoint} is a valid Unicode code point; 1641 * {@code false} otherwise. 1642 * @since 1.5 1643 */ 1644 public static boolean isValidCodePoint(int codePoint) { 1645 return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1646 } 1647 1648 /** 1649 * Indicates whether {@code codePoint} is within the supplementary code 1650 * point range. 1651 * 1652 * @param codePoint 1653 * the code point to test. 1654 * @return {@code true} if {@code codePoint} is within the supplementary 1655 * code point range; {@code false} otherwise. 1656 * @since 1.5 1657 */ 1658 public static boolean isSupplementaryCodePoint(int codePoint) { 1659 return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1660 } 1661 1662 /** 1663 * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit 1664 * that is used for representing supplementary characters in UTF-16 1665 * encoding. 1666 * 1667 * @param ch 1668 * the character to test. 1669 * @return {@code true} if {@code ch} is a high-surrogate code unit; 1670 * {@code false} otherwise. 1671 * @see #isLowSurrogate(char) 1672 * @since 1.5 1673 */ 1674 public static boolean isHighSurrogate(char ch) { 1675 return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch); 1676 } 1677 1678 /** 1679 * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit 1680 * that is used for representing supplementary characters in UTF-16 1681 * encoding. 1682 * 1683 * @param ch 1684 * the character to test. 1685 * @return {@code true} if {@code ch} is a low-surrogate code unit; 1686 * {@code false} otherwise. 1687 * @see #isHighSurrogate(char) 1688 * @since 1.5 1689 */ 1690 public static boolean isLowSurrogate(char ch) { 1691 return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch); 1692 } 1693 1694 /** 1695 * Indicates whether the specified character pair is a valid surrogate pair. 1696 * 1697 * @param high 1698 * the high surrogate unit to test. 1699 * @param low 1700 * the low surrogate unit to test. 1701 * @return {@code true} if {@code high} is a high-surrogate code unit and 1702 * {@code low} is a low-surrogate code unit; {@code false} 1703 * otherwise. 1704 * @see #isHighSurrogate(char) 1705 * @see #isLowSurrogate(char) 1706 * @since 1.5 1707 */ 1708 public static boolean isSurrogatePair(char high, char low) { 1709 return (isHighSurrogate(high) && isLowSurrogate(low)); 1710 } 1711 1712 /** 1713 * Calculates the number of {@code char} values required to represent the 1714 * specified Unicode code point. This method checks if the {@code codePoint} 1715 * is greater than or equal to {@code 0x10000}, in which case {@code 2} is 1716 * returned, otherwise {@code 1}. To test if the code point is valid, use 1717 * the {@link #isValidCodePoint(int)} method. 1718 * 1719 * @param codePoint 1720 * the code point for which to calculate the number of required 1721 * chars. 1722 * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise. 1723 * @see #isValidCodePoint(int) 1724 * @see #isSupplementaryCodePoint(int) 1725 * @since 1.5 1726 */ 1727 public static int charCount(int codePoint) { 1728 return (codePoint >= 0x10000 ? 2 : 1); 1729 } 1730 1731 /** 1732 * Converts a surrogate pair into a Unicode code point. This method assumes 1733 * that the pair are valid surrogates. If the pair are <i>not</i> valid 1734 * surrogates, then the result is indeterminate. The 1735 * {@link #isSurrogatePair(char, char)} method should be used prior to this 1736 * method to validate the pair. 1737 * 1738 * @param high 1739 * the high surrogate unit. 1740 * @param low 1741 * the low surrogate unit. 1742 * @return the Unicode code point corresponding to the surrogate unit pair. 1743 * @see #isSurrogatePair(char, char) 1744 * @since 1.5 1745 */ 1746 public static int toCodePoint(char high, char low) { 1747 // See RFC 2781, Section 2.2 1748 // http://www.ietf.org/rfc/rfc2781.txt 1749 int h = (high & 0x3FF) << 10; 1750 int l = low & 0x3FF; 1751 return (h | l) + 0x10000; 1752 } 1753 1754 /** 1755 * Returns the code point at {@code index} in the specified sequence of 1756 * character units. If the unit at {@code index} is a high-surrogate unit, 1757 * {@code index + 1} is less than the length of the sequence and the unit at 1758 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1759 * point represented by the pair is returned; otherwise the {@code char} 1760 * value at {@code index} is returned. 1761 * 1762 * @param seq 1763 * the source sequence of {@code char} units. 1764 * @param index 1765 * the position in {@code seq} from which to retrieve the code 1766 * point. 1767 * @return the Unicode code point or {@code char} value at {@code index} in 1768 * {@code seq}. 1769 * @throws NullPointerException 1770 * if {@code seq} is {@code null}. 1771 * @throws IndexOutOfBoundsException 1772 * if the {@code index} is negative or greater than or equal to 1773 * the length of {@code seq}. 1774 * @since 1.5 1775 */ 1776 public static int codePointAt(CharSequence seq, int index) { 1777 if (seq == null) { 1778 throw new NullPointerException(); 1779 } 1780 int len = seq.length(); 1781 if (index < 0 || index >= len) { 1782 throw new IndexOutOfBoundsException(); 1783 } 1784 1785 char high = seq.charAt(index++); 1786 if (index >= len) { 1787 return high; 1788 } 1789 char low = seq.charAt(index); 1790 if (isSurrogatePair(high, low)) { 1791 return toCodePoint(high, low); 1792 } 1793 return high; 1794 } 1795 1796 /** 1797 * Returns the code point at {@code index} in the specified array of 1798 * character units. If the unit at {@code index} is a high-surrogate unit, 1799 * {@code index + 1} is less than the length of the array and the unit at 1800 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1801 * point represented by the pair is returned; otherwise the {@code char} 1802 * value at {@code index} is returned. 1803 * 1804 * @param seq 1805 * the source array of {@code char} units. 1806 * @param index 1807 * the position in {@code seq} from which to retrieve the code 1808 * point. 1809 * @return the Unicode code point or {@code char} value at {@code index} in 1810 * {@code seq}. 1811 * @throws NullPointerException 1812 * if {@code seq} is {@code null}. 1813 * @throws IndexOutOfBoundsException 1814 * if the {@code index} is negative or greater than or equal to 1815 * the length of {@code seq}. 1816 * @since 1.5 1817 */ 1818 public static int codePointAt(char[] seq, int index) { 1819 if (seq == null) { 1820 throw new NullPointerException(); 1821 } 1822 int len = seq.length; 1823 if (index < 0 || index >= len) { 1824 throw new IndexOutOfBoundsException(); 1825 } 1826 1827 char high = seq[index++]; 1828 if (index >= len) { 1829 return high; 1830 } 1831 char low = seq[index]; 1832 if (isSurrogatePair(high, low)) { 1833 return toCodePoint(high, low); 1834 } 1835 return high; 1836 } 1837 1838 /** 1839 * Returns the code point at {@code index} in the specified array of 1840 * character units, where {@code index} has to be less than {@code limit}. 1841 * If the unit at {@code index} is a high-surrogate unit, {@code index + 1} 1842 * is less than {@code limit} and the unit at {@code index + 1} is a 1843 * low-surrogate unit, then the supplementary code point represented by the 1844 * pair is returned; otherwise the {@code char} value at {@code index} is 1845 * returned. 1846 * 1847 * @param seq 1848 * the source array of {@code char} units. 1849 * @param index 1850 * the position in {@code seq} from which to get the code point. 1851 * @param limit 1852 * the index after the last unit in {@code seq} that can be used. 1853 * @return the Unicode code point or {@code char} value at {@code index} in 1854 * {@code seq}. 1855 * @throws NullPointerException 1856 * if {@code seq} is {@code null}. 1857 * @throws IndexOutOfBoundsException 1858 * if {@code index < 0}, {@code index >= limit}, 1859 * {@code limit < 0} or if {@code limit} is greater than the 1860 * length of {@code seq}. 1861 * @since 1.5 1862 */ 1863 public static int codePointAt(char[] seq, int index, int limit) { 1864 if (index < 0 || index >= limit || limit < 0 || limit > seq.length) { 1865 throw new IndexOutOfBoundsException(); 1866 } 1867 1868 char high = seq[index++]; 1869 if (index >= limit) { 1870 return high; 1871 } 1872 char low = seq[index]; 1873 if (isSurrogatePair(high, low)) { 1874 return toCodePoint(high, low); 1875 } 1876 return high; 1877 } 1878 1879 /** 1880 * Returns the code point that precedes {@code index} in the specified 1881 * sequence of character units. If the unit at {@code index - 1} is a 1882 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1883 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1884 * point represented by the pair is returned; otherwise the {@code char} 1885 * value at {@code index - 1} is returned. 1886 * 1887 * @param seq 1888 * the source sequence of {@code char} units. 1889 * @param index 1890 * the position in {@code seq} following the code 1891 * point that should be returned. 1892 * @return the Unicode code point or {@code char} value before {@code index} 1893 * in {@code seq}. 1894 * @throws NullPointerException 1895 * if {@code seq} is {@code null}. 1896 * @throws IndexOutOfBoundsException 1897 * if the {@code index} is less than 1 or greater than the 1898 * length of {@code seq}. 1899 * @since 1.5 1900 */ 1901 public static int codePointBefore(CharSequence seq, int index) { 1902 if (seq == null) { 1903 throw new NullPointerException(); 1904 } 1905 int len = seq.length(); 1906 if (index < 1 || index > len) { 1907 throw new IndexOutOfBoundsException(); 1908 } 1909 1910 char low = seq.charAt(--index); 1911 if (--index < 0) { 1912 return low; 1913 } 1914 char high = seq.charAt(index); 1915 if (isSurrogatePair(high, low)) { 1916 return toCodePoint(high, low); 1917 } 1918 return low; 1919 } 1920 1921 /** 1922 * Returns the code point that precedes {@code index} in the specified 1923 * array of character units. If the unit at {@code index - 1} is a 1924 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1925 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1926 * point represented by the pair is returned; otherwise the {@code char} 1927 * value at {@code index - 1} is returned. 1928 * 1929 * @param seq 1930 * the source array of {@code char} units. 1931 * @param index 1932 * the position in {@code seq} following the code 1933 * point that should be returned. 1934 * @return the Unicode code point or {@code char} value before {@code index} 1935 * in {@code seq}. 1936 * @throws NullPointerException 1937 * if {@code seq} is {@code null}. 1938 * @throws IndexOutOfBoundsException 1939 * if the {@code index} is less than 1 or greater than the 1940 * length of {@code seq}. 1941 * @since 1.5 1942 */ 1943 public static int codePointBefore(char[] seq, int index) { 1944 if (seq == null) { 1945 throw new NullPointerException(); 1946 } 1947 int len = seq.length; 1948 if (index < 1 || index > len) { 1949 throw new IndexOutOfBoundsException(); 1950 } 1951 1952 char low = seq[--index]; 1953 if (--index < 0) { 1954 return low; 1955 } 1956 char high = seq[index]; 1957 if (isSurrogatePair(high, low)) { 1958 return toCodePoint(high, low); 1959 } 1960 return low; 1961 } 1962 1963 /** 1964 * Returns the code point that precedes the {@code index} in the specified 1965 * array of character units and is not less than {@code start}. If the unit 1966 * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not 1967 * less than {@code start} and the unit at {@code index - 2} is a 1968 * high-surrogate unit, then the supplementary code point represented by the 1969 * pair is returned; otherwise the {@code char} value at {@code index - 1} 1970 * is returned. 1971 * 1972 * @param seq 1973 * the source array of {@code char} units. 1974 * @param index 1975 * the position in {@code seq} following the code point that 1976 * should be returned. 1977 * @param start 1978 * the index of the first element in {@code seq}. 1979 * @return the Unicode code point or {@code char} value before {@code index} 1980 * in {@code seq}. 1981 * @throws NullPointerException 1982 * if {@code seq} is {@code null}. 1983 * @throws IndexOutOfBoundsException 1984 * if the {@code index <= start}, {@code start < 0}, 1985 * {@code index} is greater than the length of {@code seq}, or 1986 * if {@code start} is equal or greater than the length of 1987 * {@code seq}. 1988 * @since 1.5 1989 */ 1990 public static int codePointBefore(char[] seq, int index, int start) { 1991 if (seq == null) { 1992 throw new NullPointerException(); 1993 } 1994 int len = seq.length; 1995 if (index <= start || index > len || start < 0 || start >= len) { 1996 throw new IndexOutOfBoundsException(); 1997 } 1998 1999 char low = seq[--index]; 2000 if (--index < start) { 2001 return low; 2002 } 2003 char high = seq[index]; 2004 if (isSurrogatePair(high, low)) { 2005 return toCodePoint(high, low); 2006 } 2007 return low; 2008 } 2009 2010 /** 2011 * Converts the specified Unicode code point into a UTF-16 encoded sequence 2012 * and copies the value(s) into the char array {@code dst}, starting at 2013 * index {@code dstIndex}. 2014 * 2015 * @param codePoint 2016 * the Unicode code point to encode. 2017 * @param dst 2018 * the destination array to copy the encoded value into. 2019 * @param dstIndex 2020 * the index in {@code dst} from where to start copying. 2021 * @return the number of {@code char} value units copied into {@code dst}. 2022 * @throws IllegalArgumentException 2023 * if {@code codePoint} is not a valid Unicode code point. 2024 * @throws NullPointerException 2025 * if {@code dst} is {@code null}. 2026 * @throws IndexOutOfBoundsException 2027 * if {@code dstIndex} is negative, greater than or equal to 2028 * {@code dst.length} or equals {@code dst.length - 1} when 2029 * {@code codePoint} is a 2030 * {@link #isSupplementaryCodePoint(int) supplementary code point}. 2031 * @since 1.5 2032 */ 2033 public static int toChars(int codePoint, char[] dst, int dstIndex) { 2034 if (!isValidCodePoint(codePoint)) { 2035 throw new IllegalArgumentException(); 2036 } 2037 if (dst == null) { 2038 throw new NullPointerException(); 2039 } 2040 if (dstIndex < 0 || dstIndex >= dst.length) { 2041 throw new IndexOutOfBoundsException(); 2042 } 2043 2044 if (isSupplementaryCodePoint(codePoint)) { 2045 if (dstIndex == dst.length - 1) { 2046 throw new IndexOutOfBoundsException(); 2047 } 2048 // See RFC 2781, Section 2.1 2049 // http://www.ietf.org/rfc/rfc2781.txt 2050 int cpPrime = codePoint - 0x10000; 2051 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 2052 int low = 0xDC00 | (cpPrime & 0x3FF); 2053 dst[dstIndex] = (char) high; 2054 dst[dstIndex + 1] = (char) low; 2055 return 2; 2056 } 2057 2058 dst[dstIndex] = (char) codePoint; 2059 return 1; 2060 } 2061 2062 /** 2063 * Converts the specified Unicode code point into a UTF-16 encoded sequence 2064 * and returns it as a char array. 2065 * 2066 * @param codePoint 2067 * the Unicode code point to encode. 2068 * @return the UTF-16 encoded char sequence. If {@code codePoint} is a 2069 * {@link #isSupplementaryCodePoint(int) supplementary code point}, 2070 * then the returned array contains two characters, otherwise it 2071 * contains just one character. 2072 * @throws IllegalArgumentException 2073 * if {@code codePoint} is not a valid Unicode code point. 2074 * @since 1.5 2075 */ 2076 public static char[] toChars(int codePoint) { 2077 if (!isValidCodePoint(codePoint)) { 2078 throw new IllegalArgumentException(); 2079 } 2080 2081 if (isSupplementaryCodePoint(codePoint)) { 2082 int cpPrime = codePoint - 0x10000; 2083 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 2084 int low = 0xDC00 | (cpPrime & 0x3FF); 2085 return new char[] { (char) high, (char) low }; 2086 } 2087 return new char[] { (char) codePoint }; 2088 } 2089 2090 /** 2091 * Counts the number of Unicode code points in the subsequence of the 2092 * specified character sequence, as delineated by {@code beginIndex} and 2093 * {@code endIndex}. Any surrogate values with missing pair values will be 2094 * counted as one code point. 2095 * 2096 * @param seq 2097 * the {@code CharSequence} to look through. 2098 * @param beginIndex 2099 * the inclusive index to begin counting at. 2100 * @param endIndex 2101 * the exclusive index to stop counting at. 2102 * @return the number of Unicode code points. 2103 * @throws NullPointerException 2104 * if {@code seq} is {@code null}. 2105 * @throws IndexOutOfBoundsException 2106 * if {@code beginIndex < 0}, {@code beginIndex > endIndex} or 2107 * if {@code endIndex} is greater than the length of {@code seq}. 2108 * @since 1.5 2109 */ 2110 public static int codePointCount(CharSequence seq, int beginIndex, 2111 int endIndex) { 2112 if (seq == null) { 2113 throw new NullPointerException(); 2114 } 2115 int len = seq.length(); 2116 if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) { 2117 throw new IndexOutOfBoundsException(); 2118 } 2119 2120 int result = 0; 2121 for (int i = beginIndex; i < endIndex; i++) { 2122 char c = seq.charAt(i); 2123 if (isHighSurrogate(c)) { 2124 if (++i < endIndex) { 2125 c = seq.charAt(i); 2126 if (!isLowSurrogate(c)) { 2127 result++; 2128 } 2129 } 2130 } 2131 result++; 2132 } 2133 return result; 2134 } 2135 2136 /** 2137 * Counts the number of Unicode code points in the subsequence of the 2138 * specified char array, as delineated by {@code offset} and {@code count}. 2139 * Any surrogate values with missing pair values will be counted as one code 2140 * point. 2141 * 2142 * @param seq 2143 * the char array to look through 2144 * @param offset 2145 * the inclusive index to begin counting at. 2146 * @param count 2147 * the number of {@code char} values to look through in 2148 * {@code seq}. 2149 * @return the number of Unicode code points. 2150 * @throws NullPointerException 2151 * if {@code seq} is {@code null}. 2152 * @throws IndexOutOfBoundsException 2153 * if {@code offset < 0}, {@code count < 0} or if 2154 * {@code offset + count} is greater than the length of 2155 * {@code seq}. 2156 * @since 1.5 2157 */ 2158 public static int codePointCount(char[] seq, int offset, int count) { 2159 if (seq == null) { 2160 throw new NullPointerException(); 2161 } 2162 int len = seq.length; 2163 int endIndex = offset + count; 2164 if (offset < 0 || count < 0 || endIndex > len) { 2165 throw new IndexOutOfBoundsException(); 2166 } 2167 2168 int result = 0; 2169 for (int i = offset; i < endIndex; i++) { 2170 char c = seq[i]; 2171 if (isHighSurrogate(c)) { 2172 if (++i < endIndex) { 2173 c = seq[i]; 2174 if (!isLowSurrogate(c)) { 2175 result++; 2176 } 2177 } 2178 } 2179 result++; 2180 } 2181 return result; 2182 } 2183 2184 /** 2185 * Determines the index in the specified character sequence that is offset 2186 * {@code codePointOffset} code points from {@code index}. 2187 * 2188 * @param seq 2189 * the character sequence to find the index in. 2190 * @param index 2191 * the start index in {@code seq}. 2192 * @param codePointOffset 2193 * the number of code points to look backwards or forwards; may 2194 * be a negative or positive value. 2195 * @return the index in {@code seq} that is {@code codePointOffset} code 2196 * points away from {@code index}. 2197 * @throws NullPointerException 2198 * if {@code seq} is {@code null}. 2199 * @throws IndexOutOfBoundsException 2200 * if {@code index < 0}, {@code index} is greater than the 2201 * length of {@code seq}, or if there are not enough values in 2202 * {@code seq} to skip {@code codePointOffset} code points 2203 * forwards or backwards (if {@code codePointOffset} is 2204 * negative) from {@code index}. 2205 * @since 1.5 2206 */ 2207 public static int offsetByCodePoints(CharSequence seq, int index, 2208 int codePointOffset) { 2209 if (seq == null) { 2210 throw new NullPointerException(); 2211 } 2212 int len = seq.length(); 2213 if (index < 0 || index > len) { 2214 throw new IndexOutOfBoundsException(); 2215 } 2216 2217 if (codePointOffset == 0) { 2218 return index; 2219 } 2220 2221 if (codePointOffset > 0) { 2222 int codePoints = codePointOffset; 2223 int i = index; 2224 while (codePoints > 0) { 2225 codePoints--; 2226 if (i >= len) { 2227 throw new IndexOutOfBoundsException(); 2228 } 2229 if (isHighSurrogate(seq.charAt(i))) { 2230 int next = i + 1; 2231 if (next < len && isLowSurrogate(seq.charAt(next))) { 2232 i++; 2233 } 2234 } 2235 i++; 2236 } 2237 return i; 2238 } 2239 2240 assert codePointOffset < 0; 2241 int codePoints = -codePointOffset; 2242 int i = index; 2243 while (codePoints > 0) { 2244 codePoints--; 2245 i--; 2246 if (i < 0) { 2247 throw new IndexOutOfBoundsException(); 2248 } 2249 if (isLowSurrogate(seq.charAt(i))) { 2250 int prev = i - 1; 2251 if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) { 2252 i--; 2253 } 2254 } 2255 } 2256 return i; 2257 } 2258 2259 /** 2260 * Determines the index in a subsequence of the specified character array 2261 * that is offset {@code codePointOffset} code points from {@code index}. 2262 * The subsequence is delineated by {@code start} and {@code count}. 2263 * 2264 * @param seq 2265 * the character array to find the index in. 2266 * @param start 2267 * the inclusive index that marks the beginning of the 2268 * subsequence. 2269 * @param count 2270 * the number of {@code char} values to include within the 2271 * subsequence. 2272 * @param index 2273 * the start index in the subsequence of the char array. 2274 * @param codePointOffset 2275 * the number of code points to look backwards or forwards; may 2276 * be a negative or positive value. 2277 * @return the index in {@code seq} that is {@code codePointOffset} code 2278 * points away from {@code index}. 2279 * @throws NullPointerException 2280 * if {@code seq} is {@code null}. 2281 * @throws IndexOutOfBoundsException 2282 * if {@code start < 0}, {@code count < 0}, 2283 * {@code index < start}, {@code index > start + count}, 2284 * {@code start + count} is greater than the length of 2285 * {@code seq}, or if there are not enough values in 2286 * {@code seq} to skip {@code codePointOffset} code points 2287 * forward or backward (if {@code codePointOffset} is 2288 * negative) from {@code index}. 2289 * @since 1.5 2290 */ 2291 public static int offsetByCodePoints(char[] seq, int start, int count, 2292 int index, int codePointOffset) { 2293 if (seq == null) { 2294 throw new NullPointerException(); 2295 } 2296 int end = start + count; 2297 if (start < 0 || count < 0 || end > seq.length || index < start 2298 || index > end) { 2299 throw new IndexOutOfBoundsException(); 2300 } 2301 2302 if (codePointOffset == 0) { 2303 return index; 2304 } 2305 2306 if (codePointOffset > 0) { 2307 int codePoints = codePointOffset; 2308 int i = index; 2309 while (codePoints > 0) { 2310 codePoints--; 2311 if (i >= end) { 2312 throw new IndexOutOfBoundsException(); 2313 } 2314 if (isHighSurrogate(seq[i])) { 2315 int next = i + 1; 2316 if (next < end && isLowSurrogate(seq[next])) { 2317 i++; 2318 } 2319 } 2320 i++; 2321 } 2322 return i; 2323 } 2324 2325 assert codePointOffset < 0; 2326 int codePoints = -codePointOffset; 2327 int i = index; 2328 while (codePoints > 0) { 2329 codePoints--; 2330 i--; 2331 if (i < start) { 2332 throw new IndexOutOfBoundsException(); 2333 } 2334 if (isLowSurrogate(seq[i])) { 2335 int prev = i - 1; 2336 if (prev >= start && isHighSurrogate(seq[prev])) { 2337 i--; 2338 } 2339 } 2340 } 2341 return i; 2342 } 2343 2344 /** 2345 * Convenience method to determine the value of the specified character 2346 * {@code c} in the supplied radix. The value of {@code radix} must be 2347 * between MIN_RADIX and MAX_RADIX. 2348 * 2349 * @param c 2350 * the character to determine the value of. 2351 * @param radix 2352 * the radix. 2353 * @return the value of {@code c} in {@code radix} if {@code radix} lies 2354 * between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise. 2355 */ 2356 public static int digit(char c, int radix) { 2357 return digit((int) c, radix); 2358 } 2359 2360 /** 2361 * Convenience method to determine the value of the character 2362 * {@code codePoint} in the supplied radix. The value of {@code radix} must 2363 * be between MIN_RADIX and MAX_RADIX. 2364 * 2365 * @param codePoint 2366 * the character, including supplementary characters. 2367 * @param radix 2368 * the radix. 2369 * @return if {@code radix} lies between {@link #MIN_RADIX} and 2370 * {@link #MAX_RADIX} then the value of the character in the radix; 2371 * -1 otherwise. 2372 */ 2373 public static int digit(int codePoint, int radix) { 2374 if (radix < MIN_RADIX || radix > MAX_RADIX) { 2375 return -1; 2376 } 2377 if (codePoint < 128) { 2378 // Optimized for ASCII 2379 int result = -1; 2380 if ('0' <= codePoint && codePoint <= '9') { 2381 result = codePoint - '0'; 2382 } else if ('a' <= codePoint && codePoint <= 'z') { 2383 result = 10 + (codePoint - 'a'); 2384 } else if ('A' <= codePoint && codePoint <= 'Z') { 2385 result = 10 + (codePoint - 'A'); 2386 } 2387 return result < radix ? result : -1; 2388 } 2389 return digitImpl(codePoint, radix); 2390 } 2391 2392 private static native int digitImpl(int codePoint, int radix); 2393 2394 /** 2395 * Compares this object with the specified object and indicates if they are 2396 * equal. In order to be equal, {@code object} must be an instance of 2397 * {@code Character} and have the same char value as this object. 2398 * 2399 * @param object 2400 * the object to compare this double with. 2401 * @return {@code true} if the specified object is equal to this 2402 * {@code Character}; {@code false} otherwise. 2403 */ 2404 @Override 2405 public boolean equals(Object object) { 2406 return (object instanceof Character) && (value == ((Character) object).value); 2407 } 2408 2409 /** 2410 * Returns the character which represents the specified digit in the 2411 * specified radix. The {@code radix} must be between {@code MIN_RADIX} and 2412 * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and 2413 * smaller than {@code radix}. If any of these conditions does not hold, 0 2414 * is returned. 2415 * 2416 * @param digit 2417 * the integer value. 2418 * @param radix 2419 * the radix. 2420 * @return the character which represents the {@code digit} in the 2421 * {@code radix}. 2422 */ 2423 public static char forDigit(int digit, int radix) { 2424 if (MIN_RADIX <= radix && radix <= MAX_RADIX) { 2425 if (digit >= 0 && digit < radix) { 2426 return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10); 2427 } 2428 } 2429 return 0; 2430 } 2431 2432 /** 2433 * Returns the numeric value of the specified Unicode character. 2434 * See {@link #getNumericValue(int)}. 2435 * 2436 * @param c the character 2437 * @return a non-negative numeric integer value if a numeric value for 2438 * {@code c} exists, -1 if there is no numeric value for {@code c}, 2439 * -2 if the numeric value can not be represented as an integer. 2440 */ 2441 public static int getNumericValue(char c) { 2442 return getNumericValue((int) c); 2443 } 2444 2445 /** 2446 * Gets the numeric value of the specified Unicode code point. For example, 2447 * the code point '\u216B' stands for the Roman number XII, which has the 2448 * numeric value 12. 2449 * 2450 * <p>There are two points of divergence between this method and the Unicode 2451 * specification. This method treats the letters a-z (in both upper and lower 2452 * cases, and their full-width variants) as numbers from 10 to 35. The 2453 * Unicode specification also supports the idea of code points with non-integer 2454 * numeric values; this method does not (except to the extent of returning -2 2455 * for such code points). 2456 * 2457 * @param codePoint the code point 2458 * @return a non-negative numeric integer value if a numeric value for 2459 * {@code codePoint} exists, -1 if there is no numeric value for 2460 * {@code codePoint}, -2 if the numeric value can not be 2461 * represented with an integer. 2462 */ 2463 public static int getNumericValue(int codePoint) { 2464 // This is both an optimization and papers over differences between Java and ICU. 2465 if (codePoint < 128) { 2466 if (codePoint >= '0' && codePoint <= '9') { 2467 return codePoint - '0'; 2468 } 2469 if (codePoint >= 'a' && codePoint <= 'z') { 2470 return codePoint - ('a' - 10); 2471 } 2472 if (codePoint >= 'A' && codePoint <= 'Z') { 2473 return codePoint - ('A' - 10); 2474 } 2475 return -1; 2476 } 2477 // Full-width uppercase A-Z. 2478 if (codePoint >= 0xff21 && codePoint <= 0xff3a) { 2479 return codePoint - 0xff17; 2480 } 2481 // Full-width lowercase a-z. 2482 if (codePoint >= 0xff41 && codePoint <= 0xff5a) { 2483 return codePoint - 0xff37; 2484 } 2485 return getNumericValueImpl(codePoint); 2486 } 2487 2488 private static native int getNumericValueImpl(int codePoint); 2489 2490 /** 2491 * Gets the general Unicode category of the specified character. 2492 * 2493 * @param c 2494 * the character to get the category of. 2495 * @return the Unicode category of {@code c}. 2496 */ 2497 public static int getType(char c) { 2498 return getType((int) c); 2499 } 2500 2501 /** 2502 * Gets the general Unicode category of the specified code point. 2503 * 2504 * @param codePoint 2505 * the Unicode code point to get the category of. 2506 * @return the Unicode category of {@code codePoint}. 2507 */ 2508 public static int getType(int codePoint) { 2509 int type = getTypeImpl(codePoint); 2510 // The type values returned by ICU are not RI-compatible. The RI skips the value 17. 2511 if (type <= Character.FORMAT) { 2512 return type; 2513 } 2514 return (type + 1); 2515 } 2516 2517 private static native int getTypeImpl(int codePoint); 2518 2519 /** 2520 * Gets the Unicode directionality of the specified character. 2521 * 2522 * @param c 2523 * the character to get the directionality of. 2524 * @return the Unicode directionality of {@code c}. 2525 */ 2526 public static byte getDirectionality(char c) { 2527 return getDirectionality((int)c); 2528 } 2529 2530 /** 2531 * Gets the Unicode directionality of the specified character. 2532 * 2533 * @param codePoint 2534 * the Unicode code point to get the directionality of. 2535 * @return the Unicode directionality of {@code codePoint}. 2536 */ 2537 public static byte getDirectionality(int codePoint) { 2538 if (getType(codePoint) == Character.UNASSIGNED) { 2539 return Character.DIRECTIONALITY_UNDEFINED; 2540 } 2541 2542 byte directionality = getDirectionalityImpl(codePoint); 2543 if (directionality == -1) { 2544 return -1; 2545 } 2546 return DIRECTIONALITY[directionality]; 2547 } 2548 2549 private static native byte getDirectionalityImpl(int codePoint); 2550 2551 /** 2552 * Indicates whether the specified character is mirrored. 2553 * 2554 * @param c 2555 * the character to check. 2556 * @return {@code true} if {@code c} is mirrored; {@code false} 2557 * otherwise. 2558 */ 2559 public static boolean isMirrored(char c) { 2560 return isMirrored((int) c); 2561 } 2562 2563 /** 2564 * Indicates whether the specified code point is mirrored. 2565 * 2566 * @param codePoint 2567 * the code point to check. 2568 * @return {@code true} if {@code codePoint} is mirrored, {@code false} 2569 * otherwise. 2570 */ 2571 public static boolean isMirrored(int codePoint) { 2572 return isMirroredImpl(codePoint); 2573 } 2574 2575 private static native boolean isMirroredImpl(int codePoint); 2576 2577 @Override 2578 public int hashCode() { 2579 return value; 2580 } 2581 2582 /** 2583 * Indicates whether the specified character is defined in the Unicode 2584 * specification. 2585 * 2586 * @param c 2587 * the character to check. 2588 * @return {@code true} if the general Unicode category of the character is 2589 * not {@code UNASSIGNED}; {@code false} otherwise. 2590 */ 2591 public static boolean isDefined(char c) { 2592 return isDefinedImpl(c); 2593 } 2594 2595 /** 2596 * Indicates whether the specified code point is defined in the Unicode 2597 * specification. 2598 * 2599 * @param codePoint 2600 * the code point to check. 2601 * @return {@code true} if the general Unicode category of the code point is 2602 * not {@code UNASSIGNED}; {@code false} otherwise. 2603 */ 2604 public static boolean isDefined(int codePoint) { 2605 return isDefinedImpl(codePoint); 2606 } 2607 2608 private static native boolean isDefinedImpl(int codePoint); 2609 2610 /** 2611 * Indicates whether the specified character is a digit. 2612 * 2613 * @param c 2614 * the character to check. 2615 * @return {@code true} if {@code c} is a digit; {@code false} 2616 * otherwise. 2617 */ 2618 public static boolean isDigit(char c) { 2619 return isDigit((int) c); 2620 } 2621 2622 /** 2623 * Indicates whether the specified code point is a digit. 2624 * 2625 * @param codePoint 2626 * the code point to check. 2627 * @return {@code true} if {@code codePoint} is a digit; {@code false} 2628 * otherwise. 2629 */ 2630 public static boolean isDigit(int codePoint) { 2631 // Optimized case for ASCII 2632 if ('0' <= codePoint && codePoint <= '9') { 2633 return true; 2634 } 2635 if (codePoint < 1632) { 2636 return false; 2637 } 2638 return isDigitImpl(codePoint); 2639 } 2640 2641 private static native boolean isDigitImpl(int codePoint); 2642 2643 /** 2644 * Indicates whether the specified character is ignorable in a Java or 2645 * Unicode identifier. 2646 * 2647 * @param c 2648 * the character to check. 2649 * @return {@code true} if {@code c} is ignorable; {@code false} otherwise. 2650 */ 2651 public static boolean isIdentifierIgnorable(char c) { 2652 return isIdentifierIgnorable((int) c); 2653 } 2654 2655 /** 2656 * Indicates whether the specified code point is ignorable in a Java or 2657 * Unicode identifier. 2658 * 2659 * @param codePoint 2660 * the code point to check. 2661 * @return {@code true} if {@code codePoint} is ignorable; {@code false} 2662 * otherwise. 2663 */ 2664 public static boolean isIdentifierIgnorable(int codePoint) { 2665 // This is both an optimization and papers over differences between Java and ICU. 2666 if (codePoint < 0x600) { 2667 return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) || 2668 (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad); 2669 } 2670 return isIdentifierIgnorableImpl(codePoint); 2671 } 2672 2673 private static native boolean isIdentifierIgnorableImpl(int codePoint); 2674 2675 /** 2676 * Indicates whether the specified character is an ISO control character. 2677 * 2678 * @param c 2679 * the character to check. 2680 * @return {@code true} if {@code c} is an ISO control character; 2681 * {@code false} otherwise. 2682 */ 2683 public static boolean isISOControl(char c) { 2684 return isISOControl((int) c); 2685 } 2686 2687 /** 2688 * Indicates whether the specified code point is an ISO control character. 2689 * 2690 * @param c 2691 * the code point to check. 2692 * @return {@code true} if {@code c} is an ISO control character; 2693 * {@code false} otherwise. 2694 */ 2695 public static boolean isISOControl(int c) { 2696 return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f); 2697 } 2698 2699 /** 2700 * Indicates whether the specified character is a valid part of a Java 2701 * identifier other than the first character. 2702 * 2703 * @param c 2704 * the character to check. 2705 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2706 * {@code false} otherwise. 2707 */ 2708 public static boolean isJavaIdentifierPart(char c) { 2709 // BEGIN android-changed 2710 return isJavaIdentifierPart((int) c); 2711 // END android-changed 2712 } 2713 2714 /** 2715 * Indicates whether the specified code point is a valid part of a Java 2716 * identifier other than the first character. 2717 * 2718 * @param codePoint 2719 * the code point to check. 2720 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2721 * {@code false} otherwise. 2722 */ 2723 public static boolean isJavaIdentifierPart(int codePoint) { 2724 // BEGIN android-changed: use precomputed bitmasks for the ASCII range. 2725 // Optimized case for ASCII 2726 if (codePoint < 64) { 2727 return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0; 2728 } else if (codePoint < 128) { 2729 return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 2730 } 2731 int type = getType(codePoint); 2732 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2733 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 2734 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2735 || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK 2736 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) 2737 || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT; 2738 // END android-changed 2739 } 2740 2741 /** 2742 * Indicates whether the specified character is a valid first character for 2743 * a Java identifier. 2744 * 2745 * @param c 2746 * the character to check. 2747 * @return {@code true} if {@code c} is a valid first character of a Java 2748 * identifier; {@code false} otherwise. 2749 */ 2750 public static boolean isJavaIdentifierStart(char c) { 2751 // BEGIN android-changed 2752 return isJavaIdentifierStart((int) c); 2753 // END android-changed 2754 } 2755 2756 /** 2757 * Indicates whether the specified code point is a valid first character for 2758 * a Java identifier. 2759 * 2760 * @param codePoint 2761 * the code point to check. 2762 * @return {@code true} if {@code codePoint} is a valid start of a Java 2763 * identifier; {@code false} otherwise. 2764 */ 2765 public static boolean isJavaIdentifierStart(int codePoint) { 2766 // BEGIN android-changed: use precomputed bitmasks for the ASCII range. 2767 // Optimized case for ASCII 2768 if (codePoint < 64) { 2769 return (codePoint == '$'); // There's only one character in this range. 2770 } else if (codePoint < 128) { 2771 return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 2772 } 2773 int type = getType(codePoint); 2774 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL 2775 || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER; 2776 // END android-changed 2777 } 2778 2779 /** 2780 * Indicates whether the specified character is a Java letter. 2781 * 2782 * @param c 2783 * the character to check. 2784 * @return {@code true} if {@code c} is a Java letter; {@code false} 2785 * otherwise. 2786 * @deprecated Use {@link #isJavaIdentifierStart(char)} 2787 */ 2788 @Deprecated 2789 public static boolean isJavaLetter(char c) { 2790 return isJavaIdentifierStart(c); 2791 } 2792 2793 /** 2794 * Indicates whether the specified character is a Java letter or digit 2795 * character. 2796 * 2797 * @param c 2798 * the character to check. 2799 * @return {@code true} if {@code c} is a Java letter or digit; 2800 * {@code false} otherwise. 2801 * @deprecated Use {@link #isJavaIdentifierPart(char)} 2802 */ 2803 @Deprecated 2804 public static boolean isJavaLetterOrDigit(char c) { 2805 return isJavaIdentifierPart(c); 2806 } 2807 2808 /** 2809 * Indicates whether the specified character is a letter. 2810 * 2811 * @param c 2812 * the character to check. 2813 * @return {@code true} if {@code c} is a letter; {@code false} otherwise. 2814 */ 2815 public static boolean isLetter(char c) { 2816 return isLetter((int) c); 2817 } 2818 2819 /** 2820 * Indicates whether the specified code point is a letter. 2821 * 2822 * @param codePoint 2823 * the code point to check. 2824 * @return {@code true} if {@code codePoint} is a letter; {@code false} 2825 * otherwise. 2826 */ 2827 public static boolean isLetter(int codePoint) { 2828 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 2829 return true; 2830 } 2831 if (codePoint < 128) { 2832 return false; 2833 } 2834 return isLetterImpl(codePoint); 2835 } 2836 2837 private static native boolean isLetterImpl(int codePoint); 2838 2839 /** 2840 * Indicates whether the specified character is a letter or a digit. 2841 * 2842 * @param c 2843 * the character to check. 2844 * @return {@code true} if {@code c} is a letter or a digit; {@code false} 2845 * otherwise. 2846 */ 2847 public static boolean isLetterOrDigit(char c) { 2848 return isLetterOrDigit((int) c); 2849 } 2850 2851 /** 2852 * Indicates whether the specified code point is a letter or a digit. 2853 * 2854 * @param codePoint 2855 * the code point to check. 2856 * @return {@code true} if {@code codePoint} is a letter or a digit; 2857 * {@code false} otherwise. 2858 */ 2859 public static boolean isLetterOrDigit(int codePoint) { 2860 // Optimized case for ASCII 2861 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 2862 return true; 2863 } 2864 if ('0' <= codePoint && codePoint <= '9') { 2865 return true; 2866 } 2867 if (codePoint < 128) { 2868 return false; 2869 } 2870 return isLetterOrDigitImpl(codePoint); 2871 } 2872 2873 private static native boolean isLetterOrDigitImpl(int codePoint); 2874 2875 /** 2876 * Indicates whether the specified character is a lower case letter. 2877 * 2878 * @param c 2879 * the character to check. 2880 * @return {@code true} if {@code c} is a lower case letter; {@code false} 2881 * otherwise. 2882 */ 2883 public static boolean isLowerCase(char c) { 2884 return isLowerCase((int) c); 2885 } 2886 2887 /** 2888 * Indicates whether the specified code point is a lower case letter. 2889 * 2890 * @param codePoint 2891 * the code point to check. 2892 * @return {@code true} if {@code codePoint} is a lower case letter; 2893 * {@code false} otherwise. 2894 */ 2895 public static boolean isLowerCase(int codePoint) { 2896 // Optimized case for ASCII 2897 if ('a' <= codePoint && codePoint <= 'z') { 2898 return true; 2899 } 2900 if (codePoint < 128) { 2901 return false; 2902 } 2903 return isLowerCaseImpl(codePoint); 2904 } 2905 2906 private static native boolean isLowerCaseImpl(int codePoint); 2907 2908 /** 2909 * Indicates whether the specified character is a Java space. 2910 * 2911 * @param c 2912 * the character to check. 2913 * @return {@code true} if {@code c} is a Java space; {@code false} 2914 * otherwise. 2915 * @deprecated Use {@link #isWhitespace(char)} 2916 */ 2917 @Deprecated 2918 public static boolean isSpace(char c) { 2919 return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' '; 2920 } 2921 2922 /** 2923 * Indicates whether the specified character is a Unicode space character. 2924 * That is, if it is a member of one of the Unicode categories Space 2925 * Separator, Line Separator, or Paragraph Separator. 2926 * 2927 * @param c 2928 * the character to check. 2929 * @return {@code true} if {@code c} is a Unicode space character, 2930 * {@code false} otherwise. 2931 */ 2932 public static boolean isSpaceChar(char c) { 2933 return isSpaceChar((int) c); 2934 } 2935 2936 /** 2937 * Indicates whether the specified code point is a Unicode space character. 2938 * That is, if it is a member of one of the Unicode categories Space 2939 * Separator, Line Separator, or Paragraph Separator. 2940 * 2941 * @param codePoint 2942 * the code point to check. 2943 * @return {@code true} if {@code codePoint} is a Unicode space character, 2944 * {@code false} otherwise. 2945 */ 2946 public static boolean isSpaceChar(int codePoint) { 2947 if (codePoint == 0x20 || codePoint == 0xa0 || codePoint == 0x1680) { 2948 return true; 2949 } 2950 if (codePoint < 0x2000) { 2951 return false; 2952 } 2953 if (codePoint <= 0xffff) { 2954 return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 || 2955 codePoint == 0x202f || codePoint == 0x3000; 2956 } 2957 return isSpaceCharImpl(codePoint); 2958 } 2959 2960 private static native boolean isSpaceCharImpl(int codePoint); 2961 2962 /** 2963 * Indicates whether the specified character is a titlecase character. 2964 * 2965 * @param c 2966 * the character to check. 2967 * @return {@code true} if {@code c} is a titlecase character, {@code false} 2968 * otherwise. 2969 */ 2970 public static boolean isTitleCase(char c) { 2971 return isTitleCaseImpl(c); 2972 } 2973 2974 /** 2975 * Indicates whether the specified code point is a titlecase character. 2976 * 2977 * @param codePoint 2978 * the code point to check. 2979 * @return {@code true} if {@code codePoint} is a titlecase character, 2980 * {@code false} otherwise. 2981 */ 2982 public static boolean isTitleCase(int codePoint) { 2983 return isTitleCaseImpl(codePoint); 2984 } 2985 2986 private static native boolean isTitleCaseImpl(int codePoint); 2987 2988 /** 2989 * Indicates whether the specified character is valid as part of a Unicode 2990 * identifier other than the first character. 2991 * 2992 * @param c 2993 * the character to check. 2994 * @return {@code true} if {@code c} is valid as part of a Unicode 2995 * identifier; {@code false} otherwise. 2996 */ 2997 public static boolean isUnicodeIdentifierPart(char c) { 2998 return isUnicodeIdentifierPartImpl(c); 2999 } 3000 3001 /** 3002 * Indicates whether the specified code point is valid as part of a Unicode 3003 * identifier other than the first character. 3004 * 3005 * @param codePoint 3006 * the code point to check. 3007 * @return {@code true} if {@code codePoint} is valid as part of a Unicode 3008 * identifier; {@code false} otherwise. 3009 */ 3010 public static boolean isUnicodeIdentifierPart(int codePoint) { 3011 return isUnicodeIdentifierPartImpl(codePoint); 3012 } 3013 3014 private static native boolean isUnicodeIdentifierPartImpl(int codePoint); 3015 3016 /** 3017 * Indicates whether the specified character is a valid initial character 3018 * for a Unicode identifier. 3019 * 3020 * @param c 3021 * the character to check. 3022 * @return {@code true} if {@code c} is a valid first character for a 3023 * Unicode identifier; {@code false} otherwise. 3024 */ 3025 public static boolean isUnicodeIdentifierStart(char c) { 3026 return isUnicodeIdentifierStartImpl(c); 3027 } 3028 3029 /** 3030 * Indicates whether the specified code point is a valid initial character 3031 * for a Unicode identifier. 3032 * 3033 * @param codePoint 3034 * the code point to check. 3035 * @return {@code true} if {@code codePoint} is a valid first character for 3036 * a Unicode identifier; {@code false} otherwise. 3037 */ 3038 public static boolean isUnicodeIdentifierStart(int codePoint) { 3039 return isUnicodeIdentifierStartImpl(codePoint); 3040 } 3041 3042 private static native boolean isUnicodeIdentifierStartImpl(int codePoint); 3043 3044 /** 3045 * Indicates whether the specified character is an upper case letter. 3046 * 3047 * @param c 3048 * the character to check. 3049 * @return {@code true} if {@code c} is a upper case letter; {@code false} 3050 * otherwise. 3051 */ 3052 public static boolean isUpperCase(char c) { 3053 return isUpperCase((int) c); 3054 } 3055 3056 /** 3057 * Indicates whether the specified code point is an upper case letter. 3058 * 3059 * @param codePoint 3060 * the code point to check. 3061 * @return {@code true} if {@code codePoint} is a upper case letter; 3062 * {@code false} otherwise. 3063 */ 3064 public static boolean isUpperCase(int codePoint) { 3065 // Optimized case for ASCII 3066 if ('A' <= codePoint && codePoint <= 'Z') { 3067 return true; 3068 } 3069 if (codePoint < 128) { 3070 return false; 3071 } 3072 return isUpperCaseImpl(codePoint); 3073 } 3074 3075 private static native boolean isUpperCaseImpl(int codePoint); 3076 3077 /** 3078 * Indicates whether the specified character is a whitespace character in 3079 * Java. 3080 * 3081 * @param c 3082 * the character to check. 3083 * @return {@code true} if the supplied {@code c} is a whitespace character 3084 * in Java; {@code false} otherwise. 3085 */ 3086 public static boolean isWhitespace(char c) { 3087 return isWhitespace((int) c); 3088 } 3089 3090 /** 3091 * Indicates whether the specified code point is a whitespace character in 3092 * Java. 3093 * 3094 * @param codePoint 3095 * the code point to check. 3096 * @return {@code true} if the supplied {@code c} is a whitespace character 3097 * in Java; {@code false} otherwise. 3098 */ 3099 public static boolean isWhitespace(int codePoint) { 3100 // This is both an optimization and papers over differences between Java and ICU. 3101 if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x9 && codePoint <= 0xd)) { 3102 return true; 3103 } 3104 if (codePoint == 0x1680) { 3105 return true; 3106 } 3107 if (codePoint < 0x2000 || codePoint == 0x2007) { 3108 return false; 3109 } 3110 if (codePoint <= 0xffff) { 3111 return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 || 3112 codePoint == 0x3000; 3113 } 3114 return isWhitespaceImpl(codePoint); 3115 } 3116 3117 private static native boolean isWhitespaceImpl(int codePoint); 3118 3119 /** 3120 * Reverses the order of the first and second byte in the specified 3121 * character. 3122 * 3123 * @param c 3124 * the character to reverse. 3125 * @return the character with reordered bytes. 3126 */ 3127 public static char reverseBytes(char c) { 3128 return (char)((c<<8) | (c>>8)); 3129 } 3130 3131 /** 3132 * Returns the lower case equivalent for the specified character if the 3133 * character is an upper case letter. Otherwise, the specified character is 3134 * returned unchanged. 3135 * 3136 * @param c 3137 * the character 3138 * @return if {@code c} is an upper case character then its lower case 3139 * counterpart, otherwise just {@code c}. 3140 */ 3141 public static char toLowerCase(char c) { 3142 return (char) toLowerCase((int) c); 3143 } 3144 3145 /** 3146 * Returns the lower case equivalent for the specified code point if it is 3147 * an upper case letter. Otherwise, the specified code point is returned 3148 * unchanged. 3149 * 3150 * @param codePoint 3151 * the code point to check. 3152 * @return if {@code codePoint} is an upper case character then its lower 3153 * case counterpart, otherwise just {@code codePoint}. 3154 */ 3155 public static int toLowerCase(int codePoint) { 3156 // Optimized case for ASCII 3157 if ('A' <= codePoint && codePoint <= 'Z') { 3158 return (char) (codePoint + ('a' - 'A')); 3159 } 3160 if (codePoint < 192) { 3161 return codePoint; 3162 } 3163 return toLowerCaseImpl(codePoint); 3164 } 3165 3166 private static native int toLowerCaseImpl(int codePoint); 3167 3168 @Override 3169 public String toString() { 3170 return String.valueOf(value); 3171 } 3172 3173 /** 3174 * Converts the specified character to its string representation. 3175 * 3176 * @param value 3177 * the character to convert. 3178 * @return the character converted to a string. 3179 */ 3180 public static String toString(char value) { 3181 return String.valueOf(value); 3182 } 3183 3184 /** 3185 * Returns the title case equivalent for the specified character if it 3186 * exists. Otherwise, the specified character is returned unchanged. 3187 * 3188 * @param c 3189 * the character to convert. 3190 * @return the title case equivalent of {@code c} if it exists, otherwise 3191 * {@code c}. 3192 */ 3193 public static char toTitleCase(char c) { 3194 return (char) toTitleCaseImpl(c); 3195 } 3196 3197 /** 3198 * Returns the title case equivalent for the specified code point if it 3199 * exists. Otherwise, the specified code point is returned unchanged. 3200 * 3201 * @param codePoint 3202 * the code point to convert. 3203 * @return the title case equivalent of {@code codePoint} if it exists, 3204 * otherwise {@code codePoint}. 3205 */ 3206 public static int toTitleCase(int codePoint) { 3207 return toTitleCaseImpl(codePoint); 3208 } 3209 3210 private static native int toTitleCaseImpl(int codePoint); 3211 3212 /** 3213 * Returns the upper case equivalent for the specified character if the 3214 * character is a lower case letter. Otherwise, the specified character is 3215 * returned unchanged. 3216 * 3217 * @param c 3218 * the character to convert. 3219 * @return if {@code c} is a lower case character then its upper case 3220 * counterpart, otherwise just {@code c}. 3221 */ 3222 public static char toUpperCase(char c) { 3223 return (char) toUpperCase((int) c); 3224 } 3225 3226 /** 3227 * Returns the upper case equivalent for the specified code point if the 3228 * code point is a lower case letter. Otherwise, the specified code point is 3229 * returned unchanged. 3230 * 3231 * @param codePoint 3232 * the code point to convert. 3233 * @return if {@code codePoint} is a lower case character then its upper 3234 * case counterpart, otherwise just {@code codePoint}. 3235 */ 3236 public static int toUpperCase(int codePoint) { 3237 // Optimized case for ASCII 3238 if ('a' <= codePoint && codePoint <= 'z') { 3239 return (char) (codePoint - ('a' - 'A')); 3240 } 3241 if (codePoint < 181) { 3242 return codePoint; 3243 } 3244 return toUpperCaseImpl(codePoint); 3245 } 3246 3247 private static native int toUpperCaseImpl(int codePoint); 3248} 3249