Character.java revision 276c5cd70991e814f085bf417cb647dce9bb55e4
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.lang; 19 20import java.io.Serializable; 21import java.util.Arrays; 22 23/** 24 * The wrapper for the primitive type {@code char}. This class also provides a 25 * number of utility methods for working with characters. 26 * 27 * <p>Character data is kept up to date as Unicode evolves. 28 * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of 29 * the {@code Locale} documentation for details of the Unicode versions implemented by current 30 * and historical Android releases. 31 * 32 * <p>The Unicode specification, character tables, and other information are available at 33 * <a href="http://www.unicode.org/">http://www.unicode.org/</a>. 34 * 35 * <p>Unicode characters are referred to as <i>code points</i>. The range of valid 36 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i> 37 * is the code point range U+0000 to U+FFFF. Characters above the BMP are 38 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16 39 * encoding and {@code char} pairs are used to represent code points in the 40 * supplementary range. A pair of {@code char} values that represent a 41 * supplementary character are made up of a <i>high surrogate</i> with a value 42 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of 43 * 0xDC00 to 0xDFFF. 44 * <p> 45 * On the Java platform a {@code char} value represents either a single BMP code 46 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type 47 * is used to represent all Unicode code points. 48 * 49 * <a name="unicode_categories"><h3>Unicode categories</h3></a> 50 * <p>Here's a list of the Unicode character categories and the corresponding Java constant, 51 * grouped semantically to provide a convenient overview. This table is also useful in 52 * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}. 53 * <span class="datatable"> 54 * <style type="text/css"> 55 * .datatable td { padding-right: 20px; } 56 * </style> 57 * <p><table> 58 * <tr> <td> Cn </td> <td> Unassigned </td> <td>{@link #UNASSIGNED}</td> </tr> 59 * <tr> <td> Cc </td> <td> Control </td> <td>{@link #CONTROL}</td> </tr> 60 * <tr> <td> Cf </td> <td> Format </td> <td>{@link #FORMAT}</td> </tr> 61 * <tr> <td> Cf </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr> 62 * <tr> <td> Cf </td> <td> Surrogate </td> <td>{@link #SURROGATE}</td> </tr> 63 * <tr> <td><br></td> </tr> 64 * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr> 65 * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr> 66 * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr> 67 * <tr> <td> Lm </td> <td> Modifier letter </td> <td>{@link #MODIFIER_LETTER}</td> </tr> 68 * <tr> <td> Lo </td> <td> Other letter </td> <td>{@link #OTHER_LETTER}</td> </tr> 69 * <tr> <td><br></td> </tr> 70 * <tr> <td> Mn </td> <td> Non-spacing mark </td> <td>{@link #NON_SPACING_MARK}</td> </tr> 71 * <tr> <td> Me </td> <td> Enclosing mark </td> <td>{@link #ENCLOSING_MARK}</td> </tr> 72 * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr> 73 * <tr> <td><br></td> </tr> 74 * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr> 75 * <tr> <td> Nl </td> <td> Letter number </td> <td>{@link #LETTER_NUMBER}</td> </tr> 76 * <tr> <td> No </td> <td> Other number </td> <td>{@link #OTHER_NUMBER}</td> </tr> 77 * <tr> <td><br></td> </tr> 78 * <tr> <td> Pd </td> <td> Dash punctuation </td> <td>{@link #DASH_PUNCTUATION}</td> </tr> 79 * <tr> <td> Ps </td> <td> Start punctuation </td> <td>{@link #START_PUNCTUATION}</td> </tr> 80 * <tr> <td> Pe </td> <td> End punctuation </td> <td>{@link #END_PUNCTUATION}</td> </tr> 81 * <tr> <td> Pc </td> <td> Connector punctuation </td> <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr> 82 * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr> 83 * <tr> <td> Pf </td> <td> Final quote punctuation </td> <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr> 84 * <tr> <td> Po </td> <td> Other punctuation </td> <td>{@link #OTHER_PUNCTUATION}</td> </tr> 85 * <tr> <td><br></td> </tr> 86 * <tr> <td> Sm </td> <td> Math symbol </td> <td>{@link #MATH_SYMBOL}</td> </tr> 87 * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr> 88 * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr> 89 * <tr> <td> So </td> <td> Other symbol </td> <td>{@link #OTHER_SYMBOL}</td> </tr> 90 * <tr> <td><br></td> </tr> 91 * <tr> <td> Zs </td> <td> Space separator </td> <td>{@link #SPACE_SEPARATOR}</td> </tr> 92 * <tr> <td> Zl </td> <td> Line separator </td> <td>{@link #LINE_SEPARATOR}</td> </tr> 93 * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr> 94 * </table> 95 * </span> 96 * 97 * @since 1.0 98 */ 99public final class Character implements Serializable, Comparable<Character> { 100 private static final long serialVersionUID = 3786198910865385080L; 101 102 private final char value; 103 104 /** 105 * The minimum {@code Character} value. 106 */ 107 public static final char MIN_VALUE = '\u0000'; 108 109 /** 110 * The maximum {@code Character} value. 111 */ 112 public static final char MAX_VALUE = '\uffff'; 113 114 /** 115 * The minimum radix used for conversions between characters and integers. 116 */ 117 public static final int MIN_RADIX = 2; 118 119 /** 120 * The maximum radix used for conversions between characters and integers. 121 */ 122 public static final int MAX_RADIX = 36; 123 124 /** 125 * The {@link Class} object that represents the primitive type {@code char}. 126 */ 127 @SuppressWarnings("unchecked") 128 public static final Class<Character> TYPE 129 = (Class<Character>) char[].class.getComponentType(); 130 // Note: Character.TYPE can't be set to "char.class", since *that* is 131 // defined to be "java.lang.Character.TYPE"; 132 133 /** 134 * Unicode category constant Cn. 135 */ 136 public static final byte UNASSIGNED = 0; 137 138 /** 139 * Unicode category constant Lu. 140 */ 141 public static final byte UPPERCASE_LETTER = 1; 142 143 /** 144 * Unicode category constant Ll. 145 */ 146 public static final byte LOWERCASE_LETTER = 2; 147 148 /** 149 * Unicode category constant Lt. 150 */ 151 public static final byte TITLECASE_LETTER = 3; 152 153 /** 154 * Unicode category constant Lm. 155 */ 156 public static final byte MODIFIER_LETTER = 4; 157 158 /** 159 * Unicode category constant Lo. 160 */ 161 public static final byte OTHER_LETTER = 5; 162 163 /** 164 * Unicode category constant Mn. 165 */ 166 public static final byte NON_SPACING_MARK = 6; 167 168 /** 169 * Unicode category constant Me. 170 */ 171 public static final byte ENCLOSING_MARK = 7; 172 173 /** 174 * Unicode category constant Mc. 175 */ 176 public static final byte COMBINING_SPACING_MARK = 8; 177 178 /** 179 * Unicode category constant Nd. 180 */ 181 public static final byte DECIMAL_DIGIT_NUMBER = 9; 182 183 /** 184 * Unicode category constant Nl. 185 */ 186 public static final byte LETTER_NUMBER = 10; 187 188 /** 189 * Unicode category constant No. 190 */ 191 public static final byte OTHER_NUMBER = 11; 192 193 /** 194 * Unicode category constant Zs. 195 */ 196 public static final byte SPACE_SEPARATOR = 12; 197 198 /** 199 * Unicode category constant Zl. 200 */ 201 public static final byte LINE_SEPARATOR = 13; 202 203 /** 204 * Unicode category constant Zp. 205 */ 206 public static final byte PARAGRAPH_SEPARATOR = 14; 207 208 /** 209 * Unicode category constant Cc. 210 */ 211 public static final byte CONTROL = 15; 212 213 /** 214 * Unicode category constant Cf. 215 */ 216 public static final byte FORMAT = 16; 217 218 /** 219 * Unicode category constant Co. 220 */ 221 public static final byte PRIVATE_USE = 18; 222 223 /** 224 * Unicode category constant Cs. 225 */ 226 public static final byte SURROGATE = 19; 227 228 /** 229 * Unicode category constant Pd. 230 */ 231 public static final byte DASH_PUNCTUATION = 20; 232 233 /** 234 * Unicode category constant Ps. 235 */ 236 public static final byte START_PUNCTUATION = 21; 237 238 /** 239 * Unicode category constant Pe. 240 */ 241 public static final byte END_PUNCTUATION = 22; 242 243 /** 244 * Unicode category constant Pc. 245 */ 246 public static final byte CONNECTOR_PUNCTUATION = 23; 247 248 /** 249 * Unicode category constant Po. 250 */ 251 public static final byte OTHER_PUNCTUATION = 24; 252 253 /** 254 * Unicode category constant Sm. 255 */ 256 public static final byte MATH_SYMBOL = 25; 257 258 /** 259 * Unicode category constant Sc. 260 */ 261 public static final byte CURRENCY_SYMBOL = 26; 262 263 /** 264 * Unicode category constant Sk. 265 */ 266 public static final byte MODIFIER_SYMBOL = 27; 267 268 /** 269 * Unicode category constant So. 270 */ 271 public static final byte OTHER_SYMBOL = 28; 272 273 /** 274 * Unicode category constant Pi. 275 * 276 * @since 1.4 277 */ 278 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 279 280 /** 281 * Unicode category constant Pf. 282 * 283 * @since 1.4 284 */ 285 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 286 287 /** 288 * Unicode bidirectional constant. 289 * 290 * @since 1.4 291 */ 292 public static final byte DIRECTIONALITY_UNDEFINED = -1; 293 294 /** 295 * Unicode bidirectional constant L. 296 * 297 * @since 1.4 298 */ 299 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 300 301 /** 302 * Unicode bidirectional constant R. 303 * 304 * @since 1.4 305 */ 306 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 307 308 /** 309 * Unicode bidirectional constant AL. 310 * 311 * @since 1.4 312 */ 313 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 314 315 /** 316 * Unicode bidirectional constant EN. 317 * 318 * @since 1.4 319 */ 320 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 321 322 /** 323 * Unicode bidirectional constant ES. 324 * 325 * @since 1.4 326 */ 327 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 328 329 /** 330 * Unicode bidirectional constant ET. 331 * 332 * @since 1.4 333 */ 334 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 335 336 /** 337 * Unicode bidirectional constant AN. 338 * 339 * @since 1.4 340 */ 341 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 342 343 /** 344 * Unicode bidirectional constant CS. 345 * 346 * @since 1.4 347 */ 348 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 349 350 /** 351 * Unicode bidirectional constant NSM. 352 * 353 * @since 1.4 354 */ 355 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 356 357 /** 358 * Unicode bidirectional constant BN. 359 * 360 * @since 1.4 361 */ 362 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 363 364 /** 365 * Unicode bidirectional constant B. 366 * 367 * @since 1.4 368 */ 369 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 370 371 /** 372 * Unicode bidirectional constant S. 373 * 374 * @since 1.4 375 */ 376 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 377 378 /** 379 * Unicode bidirectional constant WS. 380 * 381 * @since 1.4 382 */ 383 public static final byte DIRECTIONALITY_WHITESPACE = 12; 384 385 /** 386 * Unicode bidirectional constant ON. 387 * 388 * @since 1.4 389 */ 390 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 391 392 /** 393 * Unicode bidirectional constant LRE. 394 * 395 * @since 1.4 396 */ 397 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 398 399 /** 400 * Unicode bidirectional constant LRO. 401 * 402 * @since 1.4 403 */ 404 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 405 406 /** 407 * Unicode bidirectional constant RLE. 408 * 409 * @since 1.4 410 */ 411 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 412 413 /** 414 * Unicode bidirectional constant RLO. 415 * 416 * @since 1.4 417 */ 418 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 419 420 /** 421 * Unicode bidirectional constant PDF. 422 * 423 * @since 1.4 424 */ 425 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 426 427 /** 428 * The minimum value of a high surrogate or leading surrogate unit in UTF-16 429 * encoding, {@code '\uD800'}. 430 * 431 * @since 1.5 432 */ 433 public static final char MIN_HIGH_SURROGATE = '\uD800'; 434 435 /** 436 * The maximum value of a high surrogate or leading surrogate unit in UTF-16 437 * encoding, {@code '\uDBFF'}. 438 * 439 * @since 1.5 440 */ 441 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 442 443 /** 444 * The minimum value of a low surrogate or trailing surrogate unit in UTF-16 445 * encoding, {@code '\uDC00'}. 446 * 447 * @since 1.5 448 */ 449 public static final char MIN_LOW_SURROGATE = '\uDC00'; 450 451 /** 452 * The maximum value of a low surrogate or trailing surrogate unit in UTF-16 453 * encoding, {@code '\uDFFF'}. 454 * 455 * @since 1.5 456 */ 457 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 458 459 /** 460 * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}. 461 * 462 * @since 1.5 463 */ 464 public static final char MIN_SURROGATE = '\uD800'; 465 466 /** 467 * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}. 468 * 469 * @since 1.5 470 */ 471 public static final char MAX_SURROGATE = '\uDFFF'; 472 473 /** 474 * The minimum value of a supplementary code point, {@code U+010000}. 475 * 476 * @since 1.5 477 */ 478 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; 479 480 /** 481 * The minimum code point value, {@code U+0000}. 482 * 483 * @since 1.5 484 */ 485 public static final int MIN_CODE_POINT = 0x000000; 486 487 /** 488 * The maximum code point value, {@code U+10FFFF}. 489 * 490 * @since 1.5 491 */ 492 public static final int MAX_CODE_POINT = 0x10FFFF; 493 494 /** 495 * The number of bits required to represent a {@code Character} value 496 * unsigned form. 497 * 498 * @since 1.5 499 */ 500 public static final int SIZE = 16; 501 502 private static final byte[] DIRECTIONALITY = new byte[] { 503 DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT, 504 DIRECTIONALITY_EUROPEAN_NUMBER, 505 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, 506 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, 507 DIRECTIONALITY_ARABIC_NUMBER, 508 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, 509 DIRECTIONALITY_PARAGRAPH_SEPARATOR, 510 DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE, 511 DIRECTIONALITY_OTHER_NEUTRALS, 512 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, 513 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, 514 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, 515 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, 516 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, 517 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, 518 DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL }; 519 520 /* 521 * Represents a subset of the Unicode character set. 522 */ 523 public static class Subset { 524 String name; 525 526 /** 527 * Constructs a new {@code Subset}. 528 * 529 * @param string 530 * this subset's name. 531 */ 532 protected Subset(String string) { 533 if (string == null) { 534 throw new NullPointerException(); 535 } 536 name = string; 537 } 538 539 /** 540 * Compares this character subset with the specified object. Uses 541 * {@link java.lang.Object#equals(Object)} to do the comparison. 542 * 543 * @param object 544 * the object to compare this character subset with. 545 * @return {@code true} if {@code object} is this subset, that is, if 546 * {@code object == this}; {@code false} otherwise. 547 */ 548 @Override 549 public final boolean equals(Object object) { 550 return super.equals(object); 551 } 552 553 /** 554 * Returns the integer hash code for this character subset. 555 * 556 * @return this subset's hash code, which is the hash code computed by 557 * {@link java.lang.Object#hashCode()}. 558 */ 559 @Override 560 public final int hashCode() { 561 return super.hashCode(); 562 } 563 564 /** 565 * Returns the string representation of this subset. 566 * 567 * @return this subset's name. 568 */ 569 @Override 570 public final String toString() { 571 return name; 572 } 573 } 574 575 /** 576 * Represents a block of Unicode characters, as defined by the Unicode 4.0.1 577 * specification. 578 * 579 * @since 1.2 580 */ 581 public static final class UnicodeBlock extends Subset { 582 /** 583 * The "Surrogates Area" Unicode Block. 584 * 585 * @deprecated As of Java 5, this block has been replaced by 586 * {@link #HIGH_SURROGATES}, 587 * {@link #HIGH_PRIVATE_USE_SURROGATES} and 588 * {@link #LOW_SURROGATES}. 589 */ 590 @Deprecated 591 public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA", 0x0, 0x0); 592 /** 593 * The "Basic Latin" Unicode Block. 594 * 595 * @since 1.2 596 */ 597 public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN", 0x0, 0x7f); 598 /** 599 * The "Latin-1 Supplement" Unicode Block. 600 * 601 * @since 1.2 602 */ 603 public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT", 0x80, 0xff); 604 /** 605 * The "Latin Extended-A" Unicode Block. 606 * 607 * @since 1.2 608 */ 609 public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A", 0x100, 0x17f); 610 /** 611 * The "Latin Extended-B" Unicode Block. 612 * 613 * @since 1.2 614 */ 615 public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B", 0x180, 0x24f); 616 /** 617 * The "IPA Extensions" Unicode Block. 618 * 619 * @since 1.2 620 */ 621 public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS", 0x250, 0x2af); 622 /** 623 * The "Spacing Modifier Letters" Unicode Block. 624 * 625 * @since 1.2 626 */ 627 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS", 0x2b0, 0x2ff); 628 /** 629 * The "Combining Diacritical Marks" Unicode Block. 630 * 631 * @since 1.2 632 */ 633 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 0x300, 0x36f); 634 /** 635 * The "Greek and Coptic" Unicode Block. Previously referred 636 * to as "Greek". 637 * 638 * @since 1.2 639 */ 640 public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK", 0x370, 0x3ff); 641 /** 642 * The "Cyrillic" Unicode Block. 643 * 644 * @since 1.2 645 */ 646 public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC", 0x400, 0x4ff); 647 /** 648 * The "Cyrillic Supplement" Unicode Block. Previously 649 * referred to as "Cyrillic Supplementary". 650 * 651 * @since 1.5 652 */ 653 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 0x500, 0x52f); 654 /** 655 * The "Armenian" Unicode Block. 656 * 657 * @since 1.2 658 */ 659 public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN", 0x530, 0x58f); 660 /** 661 * The "Hebrew" Unicode Block. 662 * 663 * @since 1.2 664 */ 665 public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW", 0x590, 0x5ff); 666 /** 667 * The "Arabic" Unicode Block. 668 * 669 * @since 1.2 670 */ 671 public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC", 0x600, 0x6ff); 672 /** 673 * The "Syriac" Unicode Block. 674 * 675 * @since 1.4 676 */ 677 public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC", 0x700, 0x74f); 678 /** 679 * The "Thaana" Unicode Block. 680 * 681 * @since 1.4 682 */ 683 public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA", 0x780, 0x7bf); 684 /** 685 * The "Devanagari" Unicode Block. 686 * 687 * @since 1.2 688 */ 689 public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI", 0x900, 0x97f); 690 /** 691 * The "Bengali" Unicode Block. 692 * 693 * @since 1.2 694 */ 695 public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI", 0x980, 0x9ff); 696 /** 697 * The "Gurmukhi" Unicode Block. 698 * 699 * @since 1.2 700 */ 701 public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI", 0xa00, 0xa7f); 702 /** 703 * The "Gujarati" Unicode Block. 704 * 705 * @since 1.2 706 */ 707 public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI", 0xa80, 0xaff); 708 /** 709 * The "Oriya" Unicode Block. 710 * 711 * @since 1.2 712 */ 713 public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA", 0xb00, 0xb7f); 714 /** 715 * The "Tamil" Unicode Block. 716 * 717 * @since 1.2 718 */ 719 public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL", 0xb80, 0xbff); 720 /** 721 * The "Telugu" Unicode Block. 722 * 723 * @since 1.2 724 */ 725 public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU", 0xc00, 0xc7f); 726 /** 727 * The "Kannada" Unicode Block. 728 * 729 * @since 1.2 730 */ 731 public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA", 0xc80, 0xcff); 732 /** 733 * The "Malayalam" Unicode Block. 734 * 735 * @since 1.2 736 */ 737 public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM", 0xd00, 0xd7f); 738 /** 739 * The "Sinhala" Unicode Block. 740 * 741 * @since 1.4 742 */ 743 public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA", 0xd80, 0xdff); 744 /** 745 * The "Thai" Unicode Block. 746 * 747 * @since 1.2 748 */ 749 public static final UnicodeBlock THAI = new UnicodeBlock("THAI", 0xe00, 0xe7f); 750 /** 751 * The "Lao" Unicode Block. 752 * 753 * @since 1.2 754 */ 755 public static final UnicodeBlock LAO = new UnicodeBlock("LAO", 0xe80, 0xeff); 756 /** 757 * The "Tibetan" Unicode Block. 758 * 759 * @since 1.2 760 */ 761 public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN", 0xf00, 0xfff); 762 /** 763 * The "Myanmar" Unicode Block. 764 * 765 * @since 1.4 766 */ 767 public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR", 0x1000, 0x109f); 768 /** 769 * The "Georgian" Unicode Block. 770 * 771 * @since 1.2 772 */ 773 public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN", 0x10a0, 0x10ff); 774 /** 775 * The "Hangul Jamo" Unicode Block. 776 * 777 * @since 1.2 778 */ 779 public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO", 0x1100, 0x11ff); 780 /** 781 * The "Ethiopic" Unicode Block. 782 * 783 * @since 1.4 784 */ 785 public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC", 0x1200, 0x137f); 786 /** 787 * The "Cherokee" Unicode Block. 788 * 789 * @since 1.4 790 */ 791 public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE", 0x13a0, 0x13ff); 792 /** 793 * The "Unified Canadian Aboriginal Syllabics" Unicode Block. 794 * 795 * @since 1.4 796 */ 797 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 0x1400, 0x167f); 798 /** 799 * The "Ogham" Unicode Block. 800 * 801 * @since 1.4 802 */ 803 public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM", 0x1680, 0x169f); 804 /** 805 * The "Runic" Unicode Block. 806 * 807 * @since 1.4 808 */ 809 public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC", 0x16a0, 0x16ff); 810 /** 811 * The "Tagalog" Unicode Block. 812 * 813 * @since 1.5 814 */ 815 public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG", 0x1700, 0x171f); 816 /** 817 * The "Hanunoo" Unicode Block. 818 * 819 * @since 1.5 820 */ 821 public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO", 0x1720, 0x173f); 822 /** 823 * The "Buhid" Unicode Block. 824 * 825 * @since 1.5 826 */ 827 public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID", 0x1740, 0x175f); 828 /** 829 * The "Tagbanwa" Unicode Block. 830 * 831 * @since 1.5 832 */ 833 public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA", 0x1760, 0x177f); 834 /** 835 * The "Khmer" Unicode Block. 836 * 837 * @since 1.4 838 */ 839 public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER", 0x1780, 0x17ff); 840 /** 841 * The "Mongolian" Unicode Block. 842 * 843 * @since 1.4 844 */ 845 public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN", 0x1800, 0x18af); 846 /** 847 * The "Limbu" Unicode Block. 848 * 849 * @since 1.5 850 */ 851 public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU", 0x1900, 0x194f); 852 /** 853 * The "Tai Le" Unicode Block. 854 * 855 * @since 1.5 856 */ 857 public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE", 0x1950, 0x197f); 858 /** 859 * The "Khmer Symbols" Unicode Block. 860 * 861 * @since 1.5 862 */ 863 public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS", 0x19e0, 0x19ff); 864 /** 865 * The "Phonetic Extensions" Unicode Block. 866 * 867 * @since 1.5 868 */ 869 public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS", 0x1d00, 0x1d7f); 870 /** 871 * The "Latin Extended Additional" Unicode Block. 872 * 873 * @since 1.2 874 */ 875 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 0x1e00, 0x1eff); 876 /** 877 * The "Greek Extended" Unicode Block. 878 * 879 * @since 1.2 880 */ 881 public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED", 0x1f00, 0x1fff); 882 /** 883 * The "General Punctuation" Unicode Block. 884 * 885 * @since 1.2 886 */ 887 public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION", 0x2000, 0x206f); 888 /** 889 * The "Superscripts and Subscripts" Unicode Block. 890 * 891 * @since 1.2 892 */ 893 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 0x2070, 0x209f); 894 /** 895 * The "Currency Symbols" Unicode Block. 896 * 897 * @since 1.2 898 */ 899 public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS", 0x20a0, 0x20cf); 900 /** 901 * The "Combining Diacritical Marks for Symbols" Unicode 902 * Block. Previously referred to as "Combining Marks for 903 * Symbols". 904 * 905 * @since 1.2 906 */ 907 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 0x20d0, 0x20ff); 908 /** 909 * The "Letterlike Symbols" Unicode Block. 910 * 911 * @since 1.2 912 */ 913 public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS", 0x2100, 0x214f); 914 /** 915 * The "Number Forms" Unicode Block. 916 * 917 * @since 1.2 918 */ 919 public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS", 0x2150, 0x218f); 920 /** 921 * The "Arrows" Unicode Block. 922 * 923 * @since 1.2 924 */ 925 public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS", 0x2190, 0x21ff); 926 /** 927 * The "Mathematical Operators" Unicode Block. 928 * 929 * @since 1.2 930 */ 931 public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS", 0x2200, 0x22ff); 932 /** 933 * The "Miscellaneous Technical" Unicode Block. 934 * 935 * @since 1.2 936 */ 937 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 0x2300, 0x23ff); 938 /** 939 * The "Control Pictures" Unicode Block. 940 * 941 * @since 1.2 942 */ 943 public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES", 0x2400, 0x243f); 944 /** 945 * The "Optical Character Recognition" Unicode Block. 946 * 947 * @since 1.2 948 */ 949 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 0x2440, 0x245f); 950 /** 951 * The "Enclosed Alphanumerics" Unicode Block. 952 * 953 * @since 1.2 954 */ 955 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 0x2460, 0x24ff); 956 /** 957 * The "Box Drawing" Unicode Block. 958 * 959 * @since 1.2 960 */ 961 public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING", 0x2500, 0x257f); 962 /** 963 * The "Block Elements" Unicode Block. 964 * 965 * @since 1.2 966 */ 967 public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS", 0x2580, 0x259f); 968 /** 969 * The "Geometric Shapes" Unicode Block. 970 * 971 * @since 1.2 972 */ 973 public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES", 0x25a0, 0x25ff); 974 /** 975 * The "Miscellaneous Symbols" Unicode Block. 976 * 977 * @since 1.2 978 */ 979 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 0x2600, 0x26ff); 980 /** 981 * The "Dingbats" Unicode Block. 982 * 983 * @since 1.2 984 */ 985 public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS", 0x2700, 0x27bf); 986 /** 987 * The "Miscellaneous Mathematical Symbols-A" Unicode Block. 988 * 989 * @since 1.5 990 */ 991 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 0x27c0, 0x27ef); 992 /** 993 * The "Supplemental Arrows-A" Unicode Block. 994 * 995 * @since 1.5 996 */ 997 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 0x27f0, 0x27ff); 998 /** 999 * The "Braille Patterns" Unicode Block. 1000 * 1001 * @since 1.4 1002 */ 1003 public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS", 0x2800, 0x28ff); 1004 /** 1005 * The "Supplemental Arrows-B" Unicode Block. 1006 * 1007 * @since 1.5 1008 */ 1009 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 0x2900, 0x297f); 1010 /** 1011 * The "Miscellaneous Mathematical Symbols-B" Unicode Block. 1012 * 1013 * @since 1.5 1014 */ 1015 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 0x2980, 0x29ff); 1016 /** 1017 * The "Supplemental Mathematical Operators" Unicode Block. 1018 * 1019 * @since 1.5 1020 */ 1021 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 0x2a00, 0x2aff); 1022 /** 1023 * The "Miscellaneous Symbols and Arrows" Unicode Block. 1024 * 1025 * @since 1.2 1026 */ 1027 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 0x2b00, 0x2bff); 1028 /** 1029 * The "CJK Radicals Supplement" Unicode Block. 1030 * 1031 * @since 1.4 1032 */ 1033 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 0x2e80, 0x2eff); 1034 /** 1035 * The "Kangxi Radicals" Unicode Block. 1036 * 1037 * @since 1.4 1038 */ 1039 public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS", 0x2f00, 0x2fdf); 1040 /** 1041 * The "Ideographic Description Characters" Unicode Block. 1042 * 1043 * @since 1.4 1044 */ 1045 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 0x2ff0, 0x2fff); 1046 /** 1047 * The "CJK Symbols and Punctuation" Unicode Block. 1048 * 1049 * @since 1.2 1050 */ 1051 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 0x3000, 0x303f); 1052 /** 1053 * The "Hiragana" Unicode Block. 1054 * 1055 * @since 1.2 1056 */ 1057 public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA", 0x3040, 0x309f); 1058 /** 1059 * The "Katakana" Unicode Block. 1060 * 1061 * @since 1.2 1062 */ 1063 public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA", 0x30a0, 0x30ff); 1064 /** 1065 * The "Bopomofo" Unicode Block. 1066 * 1067 * @since 1.2 1068 */ 1069 public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO", 0x3100, 0x312f); 1070 /** 1071 * The "Hangul Compatibility Jamo" Unicode Block. 1072 * 1073 * @since 1.2 1074 */ 1075 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 0x3130, 0x318f); 1076 /** 1077 * The "Kanbun" Unicode Block. 1078 * 1079 * @since 1.2 1080 */ 1081 public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN", 0x3190, 0x319f); 1082 /** 1083 * The "Bopomofo Extended" Unicode Block. 1084 * 1085 * @since 1.4 1086 */ 1087 public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED", 0x31a0, 0x31bf); 1088 /** 1089 * The "Katakana Phonetic Extensions" Unicode Block. 1090 * 1091 * @since 1.5 1092 */ 1093 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 0x31f0, 0x31ff); 1094 /** 1095 * The "Enclosed CJK Letters and Months" Unicode Block. 1096 * 1097 * @since 1.2 1098 */ 1099 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 0x3200, 0x32ff); 1100 /** 1101 * The "CJK Compatibility" Unicode Block. 1102 * 1103 * @since 1.2 1104 */ 1105 public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY", 0x3300, 0x33ff); 1106 /** 1107 * The "CJK Unified Ideographs Extension A" Unicode Block. 1108 * 1109 * @since 1.4 1110 */ 1111 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 0x3400, 0x4dbf); 1112 /** 1113 * The "Yijing Hexagram Symbols" Unicode Block. 1114 * 1115 * @since 1.5 1116 */ 1117 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 0x4dc0, 0x4dff); 1118 /** 1119 * The "CJK Unified Ideographs" Unicode Block. 1120 * 1121 * @since 1.2 1122 */ 1123 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 0x4e00, 0x9fff); 1124 /** 1125 * The "Yi Syllables" Unicode Block. 1126 * 1127 * @since 1.4 1128 */ 1129 public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES", 0xa000, 0xa48f); 1130 /** 1131 * The "Yi Radicals" Unicode Block. 1132 * 1133 * @since 1.4 1134 */ 1135 public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS", 0xa490, 0xa4cf); 1136 /** 1137 * The "Hangul Syllables" Unicode Block. 1138 * 1139 * @since 1.2 1140 */ 1141 public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES", 0xac00, 0xd7af); 1142 /** 1143 * The "High Surrogates" Unicode Block. This block represents 1144 * code point values in the high surrogate range 0xD800 to 0xDB7F 1145 */ 1146 public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES", 0xd800, 0xdb7f); 1147 /** 1148 * The "High Private Use Surrogates" Unicode Block. This block 1149 * represents code point values in the high surrogate range 0xDB80 to 1150 * 0xDBFF 1151 */ 1152 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 0xdb80, 0xdbff); 1153 /** 1154 * The "Low Surrogates" Unicode Block. This block represents 1155 * code point values in the low surrogate range 0xDC00 to 0xDFFF 1156 */ 1157 public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES", 0xdc00, 0xdfff); 1158 /** 1159 * The "Private Use Area" Unicode Block. 1160 * 1161 * @since 1.2 1162 */ 1163 public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA", 0xe000, 0xf8ff); 1164 /** 1165 * The "CJK Compatibility Ideographs" Unicode Block. 1166 * 1167 * @since 1.2 1168 */ 1169 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 0xf900, 0xfaff); 1170 /** 1171 * The "Alphabetic Presentation Forms" Unicode Block. 1172 * 1173 * @since 1.2 1174 */ 1175 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 0xfb00, 0xfb4f); 1176 /** 1177 * The "Arabic Presentation Forms-A" Unicode Block. 1178 * 1179 * @since 1.2 1180 */ 1181 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 0xfb50, 0xfdff); 1182 /** 1183 * The "Variation Selectors" Unicode Block. 1184 * 1185 * @since 1.5 1186 */ 1187 public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS", 0xfe00, 0xfe0f); 1188 /** 1189 * The "Combining Half Marks" Unicode Block. 1190 * 1191 * @since 1.2 1192 */ 1193 public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS", 0xfe20, 0xfe2f); 1194 /** 1195 * The "CJK Compatibility Forms" Unicode Block. 1196 * 1197 * @since 1.2 1198 */ 1199 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 0xfe30, 0xfe4f); 1200 /** 1201 * The "Small Form Variants" Unicode Block. 1202 * 1203 * @since 1.2 1204 */ 1205 public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS", 0xfe50, 0xfe6f); 1206 /** 1207 * The "Arabic Presentation Forms-B" Unicode Block. 1208 * 1209 * @since 1.2 1210 */ 1211 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 0xfe70, 0xfeff); 1212 /** 1213 * The "Halfwidth and Fullwidth Forms" Unicode Block. 1214 * 1215 * @since 1.2 1216 */ 1217 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 0xff00, 0xffef); 1218 /** 1219 * The "Specials" Unicode Block. 1220 * 1221 * @since 1.2 1222 */ 1223 public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS", 0xfff0, 0xffff); 1224 /** 1225 * The "Linear B Syllabary" Unicode Block. 1226 * 1227 * @since 1.2 1228 */ 1229 public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY", 0x10000, 0x1007f); 1230 /** 1231 * The "Linear B Ideograms" Unicode Block. 1232 * 1233 * @since 1.5 1234 */ 1235 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS", 0x10080, 0x100ff); 1236 /** 1237 * The "Aegean Numbers" Unicode Block. 1238 * 1239 * @since 1.5 1240 */ 1241 public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS", 0x10100, 0x1013f); 1242 /** 1243 * The "Old Italic" Unicode Block. 1244 * 1245 * @since 1.5 1246 */ 1247 public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC", 0x10300, 0x1032f); 1248 /** 1249 * The "Gothic" Unicode Block. 1250 * 1251 * @since 1.5 1252 */ 1253 public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC", 0x10330, 0x1034f); 1254 /** 1255 * The "Ugaritic" Unicode Block. 1256 * 1257 * @since 1.5 1258 */ 1259 public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC", 0x10380, 0x1039f); 1260 /** 1261 * The "Deseret" Unicode Block. 1262 * 1263 * @since 1.5 1264 */ 1265 public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET", 0x10400, 0x1044f); 1266 /** 1267 * The "Shavian" Unicode Block. 1268 * 1269 * @since 1.5 1270 */ 1271 public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN", 0x10450, 0x1047f); 1272 /** 1273 * The "Osmanya" Unicode Block. 1274 * 1275 * @since 1.5 1276 */ 1277 public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA", 0x10480, 0x104af); 1278 /** 1279 * The "Cypriot Syllabary" Unicode Block. 1280 * 1281 * @since 1.5 1282 */ 1283 public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY", 0x10800, 0x1083f); 1284 /** 1285 * The "Byzantine Musical Symbols" Unicode Block. 1286 * 1287 * @since 1.5 1288 */ 1289 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 0x1d000, 0x1d0ff); 1290 /** 1291 * The "Musical Symbols" Unicode Block. 1292 * 1293 * @since 1.5 1294 */ 1295 public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS", 0x1d100, 0x1d1ff); 1296 /** 1297 * The "Tai Xuan Jing Symbols" Unicode Block. 1298 * 1299 * @since 1.5 1300 */ 1301 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 0x1d300, 0x1d35f); 1302 /** 1303 * The "Mathematical Alphanumeric Symbols" Unicode Block. 1304 * 1305 * @since 1.5 1306 */ 1307 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 0x1d400, 0x1d7ff); 1308 /** 1309 * The "CJK Unified Ideographs Extension B" Unicode Block. 1310 * 1311 * @since 1.5 1312 */ 1313 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 0x20000, 0x2a6df); 1314 /** 1315 * The "CJK Compatibility Ideographs Supplement" Unicode Block. 1316 * 1317 * @since 1.5 1318 */ 1319 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 0x2f800, 0x2fa1f); 1320 /** 1321 * The "Tags" Unicode Block. 1322 * 1323 * @since 1.5 1324 */ 1325 public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS", 0xe0000, 0xe007f); 1326 /** 1327 * The "Variation Selectors Supplement" Unicode Block. 1328 * 1329 * @since 1.5 1330 */ 1331 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 0xe0100, 0xe01ef); 1332 /** 1333 * The "Supplementary Private Use Area-A" Unicode Block. 1334 * 1335 * @since 1.5 1336 */ 1337 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 0xf0000, 0xfffff); 1338 /** 1339 * The "Supplementary Private Use Area-B" Unicode Block. 1340 * 1341 * @since 1.5 1342 */ 1343 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 0x100000, 0x10ffff); 1344 1345 /* 1346 * All of the UnicodeBlocks with valid ranges in ascending order. 1347 */ 1348 private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] { 1349 null, 1350 UnicodeBlock.BASIC_LATIN, 1351 UnicodeBlock.LATIN_1_SUPPLEMENT, 1352 UnicodeBlock.LATIN_EXTENDED_A, 1353 UnicodeBlock.LATIN_EXTENDED_B, 1354 UnicodeBlock.IPA_EXTENSIONS, 1355 UnicodeBlock.SPACING_MODIFIER_LETTERS, 1356 UnicodeBlock.COMBINING_DIACRITICAL_MARKS, 1357 UnicodeBlock.GREEK, 1358 UnicodeBlock.CYRILLIC, 1359 UnicodeBlock.ARMENIAN, 1360 UnicodeBlock.HEBREW, 1361 UnicodeBlock.ARABIC, 1362 UnicodeBlock.SYRIAC, 1363 UnicodeBlock.THAANA, 1364 UnicodeBlock.DEVANAGARI, 1365 UnicodeBlock.BENGALI, 1366 UnicodeBlock.GURMUKHI, 1367 UnicodeBlock.GUJARATI, 1368 UnicodeBlock.ORIYA, 1369 UnicodeBlock.TAMIL, 1370 UnicodeBlock.TELUGU, 1371 UnicodeBlock.KANNADA, 1372 UnicodeBlock.MALAYALAM, 1373 UnicodeBlock.SINHALA, 1374 UnicodeBlock.THAI, 1375 UnicodeBlock.LAO, 1376 UnicodeBlock.TIBETAN, 1377 UnicodeBlock.MYANMAR, 1378 UnicodeBlock.GEORGIAN, 1379 UnicodeBlock.HANGUL_JAMO, 1380 UnicodeBlock.ETHIOPIC, 1381 UnicodeBlock.CHEROKEE, 1382 UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 1383 UnicodeBlock.OGHAM, 1384 UnicodeBlock.RUNIC, 1385 UnicodeBlock.KHMER, 1386 UnicodeBlock.MONGOLIAN, 1387 UnicodeBlock.LATIN_EXTENDED_ADDITIONAL, 1388 UnicodeBlock.GREEK_EXTENDED, 1389 UnicodeBlock.GENERAL_PUNCTUATION, 1390 UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS, 1391 UnicodeBlock.CURRENCY_SYMBOLS, 1392 UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS, 1393 UnicodeBlock.LETTERLIKE_SYMBOLS, 1394 UnicodeBlock.NUMBER_FORMS, 1395 UnicodeBlock.ARROWS, 1396 UnicodeBlock.MATHEMATICAL_OPERATORS, 1397 UnicodeBlock.MISCELLANEOUS_TECHNICAL, 1398 UnicodeBlock.CONTROL_PICTURES, 1399 UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION, 1400 UnicodeBlock.ENCLOSED_ALPHANUMERICS, 1401 UnicodeBlock.BOX_DRAWING, 1402 UnicodeBlock.BLOCK_ELEMENTS, 1403 UnicodeBlock.GEOMETRIC_SHAPES, 1404 UnicodeBlock.MISCELLANEOUS_SYMBOLS, 1405 UnicodeBlock.DINGBATS, 1406 UnicodeBlock.BRAILLE_PATTERNS, 1407 UnicodeBlock.CJK_RADICALS_SUPPLEMENT, 1408 UnicodeBlock.KANGXI_RADICALS, 1409 UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 1410 UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION, 1411 UnicodeBlock.HIRAGANA, 1412 UnicodeBlock.KATAKANA, 1413 UnicodeBlock.BOPOMOFO, 1414 UnicodeBlock.HANGUL_COMPATIBILITY_JAMO, 1415 UnicodeBlock.KANBUN, 1416 UnicodeBlock.BOPOMOFO_EXTENDED, 1417 UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS, 1418 UnicodeBlock.CJK_COMPATIBILITY, 1419 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 1420 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS, 1421 UnicodeBlock.YI_SYLLABLES, 1422 UnicodeBlock.YI_RADICALS, 1423 UnicodeBlock.HANGUL_SYLLABLES, 1424 UnicodeBlock.HIGH_SURROGATES, 1425 UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES, 1426 UnicodeBlock.LOW_SURROGATES, 1427 UnicodeBlock.PRIVATE_USE_AREA, 1428 UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS, 1429 UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS, 1430 UnicodeBlock.ARABIC_PRESENTATION_FORMS_A, 1431 UnicodeBlock.COMBINING_HALF_MARKS, 1432 UnicodeBlock.CJK_COMPATIBILITY_FORMS, 1433 UnicodeBlock.SMALL_FORM_VARIANTS, 1434 UnicodeBlock.ARABIC_PRESENTATION_FORMS_B, 1435 UnicodeBlock.SPECIALS, 1436 UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS, 1437 UnicodeBlock.OLD_ITALIC, 1438 UnicodeBlock.GOTHIC, 1439 UnicodeBlock.DESERET, 1440 UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS, 1441 UnicodeBlock.MUSICAL_SYMBOLS, 1442 UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 1443 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 1444 UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 1445 UnicodeBlock.TAGS, 1446 UnicodeBlock.CYRILLIC_SUPPLEMENTARY, 1447 UnicodeBlock.TAGALOG, 1448 UnicodeBlock.HANUNOO, 1449 UnicodeBlock.BUHID, 1450 UnicodeBlock.TAGBANWA, 1451 UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 1452 UnicodeBlock.SUPPLEMENTAL_ARROWS_A, 1453 UnicodeBlock.SUPPLEMENTAL_ARROWS_B, 1454 UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 1455 UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 1456 UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS, 1457 UnicodeBlock.VARIATION_SELECTORS, 1458 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A, 1459 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B, 1460 UnicodeBlock.LIMBU, 1461 UnicodeBlock.TAI_LE, 1462 UnicodeBlock.KHMER_SYMBOLS, 1463 UnicodeBlock.PHONETIC_EXTENSIONS, 1464 UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS, 1465 UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS, 1466 UnicodeBlock.LINEAR_B_SYLLABARY, 1467 UnicodeBlock.LINEAR_B_IDEOGRAMS, 1468 UnicodeBlock.AEGEAN_NUMBERS, 1469 UnicodeBlock.UGARITIC, 1470 UnicodeBlock.SHAVIAN, 1471 UnicodeBlock.OSMANYA, 1472 UnicodeBlock.CYPRIOT_SYLLABARY, 1473 UnicodeBlock.TAI_XUAN_JING_SYMBOLS, 1474 UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT 1475 }; 1476 1477 /** 1478 * Retrieves the constant that corresponds to the specified block name. 1479 * The block names are defined by the Unicode 4.0.1 specification in the 1480 * {@code Blocks-4.0.1.txt} file. 1481 * <p> 1482 * Block names may be one of the following: 1483 * <ul> 1484 * <li>Canonical block name, as defined by the Unicode specification; 1485 * case-insensitive.</li> 1486 * <li>Canonical block name without any spaces, as defined by the 1487 * Unicode specification; case-insensitive.</li> 1488 * <li>{@code UnicodeBlock} constant identifier. This is determined by 1489 * uppercasing the canonical name and replacing all spaces and hyphens 1490 * with underscores.</li> 1491 * </ul> 1492 * 1493 * @param blockName 1494 * the name of the block to retrieve. 1495 * @return the UnicodeBlock constant corresponding to {@code blockName}. 1496 * @throws NullPointerException 1497 * if {@code blockName} is {@code null}. 1498 * @throws IllegalArgumentException 1499 * if {@code blockName} is not a valid block name. 1500 * @since 1.5 1501 */ 1502 public static UnicodeBlock forName(String blockName) { 1503 if (blockName == null) { 1504 throw new NullPointerException(); 1505 } 1506 int block = forNameImpl(blockName); 1507 if (block == -1) { 1508 if (blockName.equals("SURROGATES_AREA")) { 1509 return SURROGATES_AREA; 1510 } else if(blockName.equalsIgnoreCase("greek")) { 1511 return GREEK; 1512 } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") || 1513 blockName.equals("Combining Marks for Symbols") || 1514 blockName.equals("CombiningMarksforSymbols")) { 1515 return COMBINING_MARKS_FOR_SYMBOLS; 1516 } 1517 throw new IllegalArgumentException("Bad block name: " + blockName); 1518 } 1519 return BLOCKS[block]; 1520 } 1521 1522 /** 1523 * Gets the constant for the Unicode block that contains the specified 1524 * character. 1525 * 1526 * @param c 1527 * the character for which to get the {@code UnicodeBlock} 1528 * constant. 1529 * @return the {@code UnicodeBlock} constant for the block that contains 1530 * {@code c}, or {@code null} if {@code c} does not belong to 1531 * any defined block. 1532 */ 1533 public static UnicodeBlock of(char c) { 1534 return of((int) c); 1535 } 1536 1537 /** 1538 * Gets the constant for the Unicode block that contains the specified 1539 * Unicode code point. 1540 * 1541 * @param codePoint 1542 * the Unicode code point for which to get the 1543 * {@code UnicodeBlock} constant. 1544 * @return the {@code UnicodeBlock} constant for the block that contains 1545 * {@code codePoint}, or {@code null} if {@code codePoint} does 1546 * not belong to any defined block. 1547 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 1548 * @since 1.5 1549 */ 1550 public static UnicodeBlock of(int codePoint) { 1551 checkValidCodePoint(codePoint); 1552 int block = ofImpl(codePoint); 1553 if (block == -1 || block >= BLOCKS.length) { 1554 return null; 1555 } 1556 return BLOCKS[block]; 1557 } 1558 1559 private UnicodeBlock(String blockName, int start, int end) { 1560 super(blockName); 1561 } 1562 } 1563 1564 private static native int forNameImpl(String blockName); 1565 1566 private static native int ofImpl(int codePoint); 1567 1568 /** 1569 * Constructs a new {@code Character} with the specified primitive char 1570 * value. 1571 * 1572 * @param value 1573 * the primitive char value to store in the new instance. 1574 */ 1575 public Character(char value) { 1576 this.value = value; 1577 } 1578 1579 /** 1580 * Gets the primitive value of this character. 1581 * 1582 * @return this object's primitive value. 1583 */ 1584 public char charValue() { 1585 return value; 1586 } 1587 1588 private static void checkValidCodePoint(int codePoint) { 1589 if (!isValidCodePoint(codePoint)) { 1590 throw new IllegalArgumentException("Invalid code point: " + codePoint); 1591 } 1592 } 1593 1594 /** 1595 * Compares this object to the specified character object to determine their 1596 * relative order. 1597 * 1598 * @param c 1599 * the character object to compare this object to. 1600 * @return {@code 0} if the value of this character and the value of 1601 * {@code c} are equal; a positive value if the value of this 1602 * character is greater than the value of {@code c}; a negative 1603 * value if the value of this character is less than the value of 1604 * {@code c}. 1605 * @see java.lang.Comparable 1606 * @since 1.2 1607 */ 1608 public int compareTo(Character c) { 1609 return compare(value, c.value); 1610 } 1611 1612 /** 1613 * Compares two {@code char} values. 1614 * @return 0 if lhs = rhs, less than 0 if lhs < rhs, and greater than 0 if lhs > rhs. 1615 * @since 1.7 1616 * @hide 1.7 1617 */ 1618 public static int compare(char lhs, char rhs) { 1619 return lhs - rhs; 1620 } 1621 1622 /** 1623 * Returns a {@code Character} instance for the {@code char} value passed. 1624 * <p> 1625 * If it is not necessary to get a new {@code Character} instance, it is 1626 * recommended to use this method instead of the constructor, since it 1627 * maintains a cache of instances which may result in better performance. 1628 * 1629 * @param c 1630 * the char value for which to get a {@code Character} instance. 1631 * @return the {@code Character} instance for {@code c}. 1632 * @since 1.5 1633 */ 1634 public static Character valueOf(char c) { 1635 return c < 128 ? SMALL_VALUES[c] : new Character(c); 1636 } 1637 1638 /** 1639 * A cache of instances used by {@link #valueOf(char)} and auto-boxing 1640 */ 1641 private static final Character[] SMALL_VALUES = new Character[128]; 1642 1643 static { 1644 for(int i = 0; i < 128; i++) { 1645 SMALL_VALUES[i] = new Character((char) i); 1646 } 1647 } 1648 /** 1649 * Indicates whether {@code codePoint} is a valid Unicode code point. 1650 * 1651 * @param codePoint 1652 * the code point to test. 1653 * @return {@code true} if {@code codePoint} is a valid Unicode code point; 1654 * {@code false} otherwise. 1655 * @since 1.5 1656 */ 1657 public static boolean isValidCodePoint(int codePoint) { 1658 return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1659 } 1660 1661 /** 1662 * Indicates whether {@code codePoint} is within the supplementary code 1663 * point range. 1664 * 1665 * @param codePoint 1666 * the code point to test. 1667 * @return {@code true} if {@code codePoint} is within the supplementary 1668 * code point range; {@code false} otherwise. 1669 * @since 1.5 1670 */ 1671 public static boolean isSupplementaryCodePoint(int codePoint) { 1672 return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1673 } 1674 1675 /** 1676 * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit 1677 * that is used for representing supplementary characters in UTF-16 1678 * encoding. 1679 * 1680 * @param ch 1681 * the character to test. 1682 * @return {@code true} if {@code ch} is a high-surrogate code unit; 1683 * {@code false} otherwise. 1684 * @see #isLowSurrogate(char) 1685 * @since 1.5 1686 */ 1687 public static boolean isHighSurrogate(char ch) { 1688 return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch); 1689 } 1690 1691 /** 1692 * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit 1693 * that is used for representing supplementary characters in UTF-16 1694 * encoding. 1695 * 1696 * @param ch 1697 * the character to test. 1698 * @return {@code true} if {@code ch} is a low-surrogate code unit; 1699 * {@code false} otherwise. 1700 * @see #isHighSurrogate(char) 1701 * @since 1.5 1702 */ 1703 public static boolean isLowSurrogate(char ch) { 1704 return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch); 1705 } 1706 1707 /** 1708 * Tests whether the given character is a high or low surrogate. 1709 * @since 1.7 1710 * @hide 1.7 1711 */ 1712 public static boolean isSurrogate(char ch) { 1713 return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE; 1714 } 1715 1716 /** 1717 * Indicates whether the specified character pair is a valid surrogate pair. 1718 * 1719 * @param high 1720 * the high surrogate unit to test. 1721 * @param low 1722 * the low surrogate unit to test. 1723 * @return {@code true} if {@code high} is a high-surrogate code unit and 1724 * {@code low} is a low-surrogate code unit; {@code false} 1725 * otherwise. 1726 * @see #isHighSurrogate(char) 1727 * @see #isLowSurrogate(char) 1728 * @since 1.5 1729 */ 1730 public static boolean isSurrogatePair(char high, char low) { 1731 return (isHighSurrogate(high) && isLowSurrogate(low)); 1732 } 1733 1734 /** 1735 * Calculates the number of {@code char} values required to represent the 1736 * specified Unicode code point. This method checks if the {@code codePoint} 1737 * is greater than or equal to {@code 0x10000}, in which case {@code 2} is 1738 * returned, otherwise {@code 1}. To test if the code point is valid, use 1739 * the {@link #isValidCodePoint(int)} method. 1740 * 1741 * @param codePoint 1742 * the code point for which to calculate the number of required 1743 * chars. 1744 * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise. 1745 * @see #isValidCodePoint(int) 1746 * @see #isSupplementaryCodePoint(int) 1747 * @since 1.5 1748 */ 1749 public static int charCount(int codePoint) { 1750 return (codePoint >= 0x10000 ? 2 : 1); 1751 } 1752 1753 /** 1754 * Converts a surrogate pair into a Unicode code point. This method assumes 1755 * that the pair are valid surrogates. If the pair are <i>not</i> valid 1756 * surrogates, then the result is indeterminate. The 1757 * {@link #isSurrogatePair(char, char)} method should be used prior to this 1758 * method to validate the pair. 1759 * 1760 * @param high 1761 * the high surrogate unit. 1762 * @param low 1763 * the low surrogate unit. 1764 * @return the Unicode code point corresponding to the surrogate unit pair. 1765 * @see #isSurrogatePair(char, char) 1766 * @since 1.5 1767 */ 1768 public static int toCodePoint(char high, char low) { 1769 // See RFC 2781, Section 2.2 1770 // http://www.ietf.org/rfc/rfc2781.txt 1771 int h = (high & 0x3FF) << 10; 1772 int l = low & 0x3FF; 1773 return (h | l) + 0x10000; 1774 } 1775 1776 /** 1777 * Returns the code point at {@code index} in the specified sequence of 1778 * character units. If the unit at {@code index} is a high-surrogate unit, 1779 * {@code index + 1} is less than the length of the sequence and the unit at 1780 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1781 * point represented by the pair is returned; otherwise the {@code char} 1782 * value at {@code index} is returned. 1783 * 1784 * @param seq 1785 * the source sequence of {@code char} units. 1786 * @param index 1787 * the position in {@code seq} from which to retrieve the code 1788 * point. 1789 * @return the Unicode code point or {@code char} value at {@code index} in 1790 * {@code seq}. 1791 * @throws NullPointerException 1792 * if {@code seq} is {@code null}. 1793 * @throws IndexOutOfBoundsException 1794 * if the {@code index} is negative or greater than or equal to 1795 * the length of {@code seq}. 1796 * @since 1.5 1797 */ 1798 public static int codePointAt(CharSequence seq, int index) { 1799 if (seq == null) { 1800 throw new NullPointerException(); 1801 } 1802 int len = seq.length(); 1803 if (index < 0 || index >= len) { 1804 throw new IndexOutOfBoundsException(); 1805 } 1806 1807 char high = seq.charAt(index++); 1808 if (index >= len) { 1809 return high; 1810 } 1811 char low = seq.charAt(index); 1812 if (isSurrogatePair(high, low)) { 1813 return toCodePoint(high, low); 1814 } 1815 return high; 1816 } 1817 1818 /** 1819 * Returns the code point at {@code index} in the specified array of 1820 * character units. If the unit at {@code index} is a high-surrogate unit, 1821 * {@code index + 1} is less than the length of the array and the unit at 1822 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1823 * point represented by the pair is returned; otherwise the {@code char} 1824 * value at {@code index} is returned. 1825 * 1826 * @param seq 1827 * the source array of {@code char} units. 1828 * @param index 1829 * the position in {@code seq} from which to retrieve the code 1830 * point. 1831 * @return the Unicode code point or {@code char} value at {@code index} in 1832 * {@code seq}. 1833 * @throws NullPointerException 1834 * if {@code seq} is {@code null}. 1835 * @throws IndexOutOfBoundsException 1836 * if the {@code index} is negative or greater than or equal to 1837 * the length of {@code seq}. 1838 * @since 1.5 1839 */ 1840 public static int codePointAt(char[] seq, int index) { 1841 if (seq == null) { 1842 throw new NullPointerException(); 1843 } 1844 int len = seq.length; 1845 if (index < 0 || index >= len) { 1846 throw new IndexOutOfBoundsException(); 1847 } 1848 1849 char high = seq[index++]; 1850 if (index >= len) { 1851 return high; 1852 } 1853 char low = seq[index]; 1854 if (isSurrogatePair(high, low)) { 1855 return toCodePoint(high, low); 1856 } 1857 return high; 1858 } 1859 1860 /** 1861 * Returns the code point at {@code index} in the specified array of 1862 * character units, where {@code index} has to be less than {@code limit}. 1863 * If the unit at {@code index} is a high-surrogate unit, {@code index + 1} 1864 * is less than {@code limit} and the unit at {@code index + 1} is a 1865 * low-surrogate unit, then the supplementary code point represented by the 1866 * pair is returned; otherwise the {@code char} value at {@code index} is 1867 * returned. 1868 * 1869 * @param seq 1870 * the source array of {@code char} units. 1871 * @param index 1872 * the position in {@code seq} from which to get the code point. 1873 * @param limit 1874 * the index after the last unit in {@code seq} that can be used. 1875 * @return the Unicode code point or {@code char} value at {@code index} in 1876 * {@code seq}. 1877 * @throws NullPointerException 1878 * if {@code seq} is {@code null}. 1879 * @throws IndexOutOfBoundsException 1880 * if {@code index < 0}, {@code index >= limit}, 1881 * {@code limit < 0} or if {@code limit} is greater than the 1882 * length of {@code seq}. 1883 * @since 1.5 1884 */ 1885 public static int codePointAt(char[] seq, int index, int limit) { 1886 if (index < 0 || index >= limit || limit < 0 || limit > seq.length) { 1887 throw new IndexOutOfBoundsException(); 1888 } 1889 1890 char high = seq[index++]; 1891 if (index >= limit) { 1892 return high; 1893 } 1894 char low = seq[index]; 1895 if (isSurrogatePair(high, low)) { 1896 return toCodePoint(high, low); 1897 } 1898 return high; 1899 } 1900 1901 /** 1902 * Returns the code point that precedes {@code index} in the specified 1903 * sequence of character units. If the unit at {@code index - 1} is a 1904 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1905 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1906 * point represented by the pair is returned; otherwise the {@code char} 1907 * value at {@code index - 1} is returned. 1908 * 1909 * @param seq 1910 * the source sequence of {@code char} units. 1911 * @param index 1912 * the position in {@code seq} following the code 1913 * point that should be returned. 1914 * @return the Unicode code point or {@code char} value before {@code index} 1915 * in {@code seq}. 1916 * @throws NullPointerException 1917 * if {@code seq} is {@code null}. 1918 * @throws IndexOutOfBoundsException 1919 * if the {@code index} is less than 1 or greater than the 1920 * length of {@code seq}. 1921 * @since 1.5 1922 */ 1923 public static int codePointBefore(CharSequence seq, int index) { 1924 if (seq == null) { 1925 throw new NullPointerException(); 1926 } 1927 int len = seq.length(); 1928 if (index < 1 || index > len) { 1929 throw new IndexOutOfBoundsException(); 1930 } 1931 1932 char low = seq.charAt(--index); 1933 if (--index < 0) { 1934 return low; 1935 } 1936 char high = seq.charAt(index); 1937 if (isSurrogatePair(high, low)) { 1938 return toCodePoint(high, low); 1939 } 1940 return low; 1941 } 1942 1943 /** 1944 * Returns the code point that precedes {@code index} in the specified 1945 * array of character units. If the unit at {@code index - 1} is a 1946 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1947 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1948 * point represented by the pair is returned; otherwise the {@code char} 1949 * value at {@code index - 1} is returned. 1950 * 1951 * @param seq 1952 * the source array of {@code char} units. 1953 * @param index 1954 * the position in {@code seq} following the code 1955 * point that should be returned. 1956 * @return the Unicode code point or {@code char} value before {@code index} 1957 * in {@code seq}. 1958 * @throws NullPointerException 1959 * if {@code seq} is {@code null}. 1960 * @throws IndexOutOfBoundsException 1961 * if the {@code index} is less than 1 or greater than the 1962 * length of {@code seq}. 1963 * @since 1.5 1964 */ 1965 public static int codePointBefore(char[] seq, int index) { 1966 if (seq == null) { 1967 throw new NullPointerException(); 1968 } 1969 int len = seq.length; 1970 if (index < 1 || index > len) { 1971 throw new IndexOutOfBoundsException(); 1972 } 1973 1974 char low = seq[--index]; 1975 if (--index < 0) { 1976 return low; 1977 } 1978 char high = seq[index]; 1979 if (isSurrogatePair(high, low)) { 1980 return toCodePoint(high, low); 1981 } 1982 return low; 1983 } 1984 1985 /** 1986 * Returns the code point that precedes the {@code index} in the specified 1987 * array of character units and is not less than {@code start}. If the unit 1988 * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not 1989 * less than {@code start} and the unit at {@code index - 2} is a 1990 * high-surrogate unit, then the supplementary code point represented by the 1991 * pair is returned; otherwise the {@code char} value at {@code index - 1} 1992 * is returned. 1993 * 1994 * @param seq 1995 * the source array of {@code char} units. 1996 * @param index 1997 * the position in {@code seq} following the code point that 1998 * should be returned. 1999 * @param start 2000 * the index of the first element in {@code seq}. 2001 * @return the Unicode code point or {@code char} value before {@code index} 2002 * in {@code seq}. 2003 * @throws NullPointerException 2004 * if {@code seq} is {@code null}. 2005 * @throws IndexOutOfBoundsException 2006 * if the {@code index <= start}, {@code start < 0}, 2007 * {@code index} is greater than the length of {@code seq}, or 2008 * if {@code start} is equal or greater than the length of 2009 * {@code seq}. 2010 * @since 1.5 2011 */ 2012 public static int codePointBefore(char[] seq, int index, int start) { 2013 if (seq == null) { 2014 throw new NullPointerException(); 2015 } 2016 int len = seq.length; 2017 if (index <= start || index > len || start < 0 || start >= len) { 2018 throw new IndexOutOfBoundsException(); 2019 } 2020 2021 char low = seq[--index]; 2022 if (--index < start) { 2023 return low; 2024 } 2025 char high = seq[index]; 2026 if (isSurrogatePair(high, low)) { 2027 return toCodePoint(high, low); 2028 } 2029 return low; 2030 } 2031 2032 /** 2033 * Converts the specified Unicode code point into a UTF-16 encoded sequence 2034 * and copies the value(s) into the char array {@code dst}, starting at 2035 * index {@code dstIndex}. 2036 * 2037 * @param codePoint 2038 * the Unicode code point to encode. 2039 * @param dst 2040 * the destination array to copy the encoded value into. 2041 * @param dstIndex 2042 * the index in {@code dst} from where to start copying. 2043 * @return the number of {@code char} value units copied into {@code dst}. 2044 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2045 * @throws NullPointerException 2046 * if {@code dst} is {@code null}. 2047 * @throws IndexOutOfBoundsException 2048 * if {@code dstIndex} is negative, greater than or equal to 2049 * {@code dst.length} or equals {@code dst.length - 1} when 2050 * {@code codePoint} is a 2051 * {@link #isSupplementaryCodePoint(int) supplementary code point}. 2052 * @since 1.5 2053 */ 2054 public static int toChars(int codePoint, char[] dst, int dstIndex) { 2055 checkValidCodePoint(codePoint); 2056 if (dst == null) { 2057 throw new NullPointerException(); 2058 } 2059 if (dstIndex < 0 || dstIndex >= dst.length) { 2060 throw new IndexOutOfBoundsException(); 2061 } 2062 2063 if (isSupplementaryCodePoint(codePoint)) { 2064 if (dstIndex == dst.length - 1) { 2065 throw new IndexOutOfBoundsException(); 2066 } 2067 // See RFC 2781, Section 2.1 2068 // http://www.ietf.org/rfc/rfc2781.txt 2069 int cpPrime = codePoint - 0x10000; 2070 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 2071 int low = 0xDC00 | (cpPrime & 0x3FF); 2072 dst[dstIndex] = (char) high; 2073 dst[dstIndex + 1] = (char) low; 2074 return 2; 2075 } 2076 2077 dst[dstIndex] = (char) codePoint; 2078 return 1; 2079 } 2080 2081 /** 2082 * Converts the specified Unicode code point into a UTF-16 encoded sequence 2083 * and returns it as a char array. 2084 * 2085 * @param codePoint 2086 * the Unicode code point to encode. 2087 * @return the UTF-16 encoded char sequence. If {@code codePoint} is a 2088 * {@link #isSupplementaryCodePoint(int) supplementary code point}, 2089 * then the returned array contains two characters, otherwise it 2090 * contains just one character. 2091 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2092 * @since 1.5 2093 */ 2094 public static char[] toChars(int codePoint) { 2095 checkValidCodePoint(codePoint); 2096 if (isSupplementaryCodePoint(codePoint)) { 2097 int cpPrime = codePoint - 0x10000; 2098 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 2099 int low = 0xDC00 | (cpPrime & 0x3FF); 2100 return new char[] { (char) high, (char) low }; 2101 } 2102 return new char[] { (char) codePoint }; 2103 } 2104 2105 /** 2106 * Counts the number of Unicode code points in the subsequence of the 2107 * specified character sequence, as delineated by {@code beginIndex} and 2108 * {@code endIndex}. Any surrogate values with missing pair values will be 2109 * counted as one code point. 2110 * 2111 * @param seq 2112 * the {@code CharSequence} to look through. 2113 * @param beginIndex 2114 * the inclusive index to begin counting at. 2115 * @param endIndex 2116 * the exclusive index to stop counting at. 2117 * @return the number of Unicode code points. 2118 * @throws NullPointerException 2119 * if {@code seq} is {@code null}. 2120 * @throws IndexOutOfBoundsException 2121 * if {@code beginIndex < 0}, {@code beginIndex > endIndex} or 2122 * if {@code endIndex} is greater than the length of {@code seq}. 2123 * @since 1.5 2124 */ 2125 public static int codePointCount(CharSequence seq, int beginIndex, 2126 int endIndex) { 2127 if (seq == null) { 2128 throw new NullPointerException(); 2129 } 2130 int len = seq.length(); 2131 if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) { 2132 throw new IndexOutOfBoundsException(); 2133 } 2134 2135 int result = 0; 2136 for (int i = beginIndex; i < endIndex; i++) { 2137 char c = seq.charAt(i); 2138 if (isHighSurrogate(c)) { 2139 if (++i < endIndex) { 2140 c = seq.charAt(i); 2141 if (!isLowSurrogate(c)) { 2142 result++; 2143 } 2144 } 2145 } 2146 result++; 2147 } 2148 return result; 2149 } 2150 2151 /** 2152 * Counts the number of Unicode code points in the subsequence of the 2153 * specified char array, as delineated by {@code offset} and {@code count}. 2154 * Any surrogate values with missing pair values will be counted as one code 2155 * point. 2156 * 2157 * @param seq 2158 * the char array to look through 2159 * @param offset 2160 * the inclusive index to begin counting at. 2161 * @param count 2162 * the number of {@code char} values to look through in 2163 * {@code seq}. 2164 * @return the number of Unicode code points. 2165 * @throws NullPointerException 2166 * if {@code seq} is {@code null}. 2167 * @throws IndexOutOfBoundsException 2168 * if {@code offset < 0}, {@code count < 0} or if 2169 * {@code offset + count} is greater than the length of 2170 * {@code seq}. 2171 * @since 1.5 2172 */ 2173 public static int codePointCount(char[] seq, int offset, int count) { 2174 Arrays.checkOffsetAndCount(seq.length, offset, count); 2175 int endIndex = offset + count; 2176 int result = 0; 2177 for (int i = offset; i < endIndex; i++) { 2178 char c = seq[i]; 2179 if (isHighSurrogate(c)) { 2180 if (++i < endIndex) { 2181 c = seq[i]; 2182 if (!isLowSurrogate(c)) { 2183 result++; 2184 } 2185 } 2186 } 2187 result++; 2188 } 2189 return result; 2190 } 2191 2192 /** 2193 * Determines the index in the specified character sequence that is offset 2194 * {@code codePointOffset} code points from {@code index}. 2195 * 2196 * @param seq 2197 * the character sequence to find the index in. 2198 * @param index 2199 * the start index in {@code seq}. 2200 * @param codePointOffset 2201 * the number of code points to look backwards or forwards; may 2202 * be a negative or positive value. 2203 * @return the index in {@code seq} that is {@code codePointOffset} code 2204 * points away from {@code index}. 2205 * @throws NullPointerException 2206 * if {@code seq} is {@code null}. 2207 * @throws IndexOutOfBoundsException 2208 * if {@code index < 0}, {@code index} is greater than the 2209 * length of {@code seq}, or if there are not enough values in 2210 * {@code seq} to skip {@code codePointOffset} code points 2211 * forwards or backwards (if {@code codePointOffset} is 2212 * negative) from {@code index}. 2213 * @since 1.5 2214 */ 2215 public static int offsetByCodePoints(CharSequence seq, int index, int codePointOffset) { 2216 if (seq == null) { 2217 throw new NullPointerException(); 2218 } 2219 int len = seq.length(); 2220 if (index < 0 || index > len) { 2221 throw new IndexOutOfBoundsException(); 2222 } 2223 2224 if (codePointOffset == 0) { 2225 return index; 2226 } 2227 2228 if (codePointOffset > 0) { 2229 int codePoints = codePointOffset; 2230 int i = index; 2231 while (codePoints > 0) { 2232 codePoints--; 2233 if (i >= len) { 2234 throw new IndexOutOfBoundsException(); 2235 } 2236 if (isHighSurrogate(seq.charAt(i))) { 2237 int next = i + 1; 2238 if (next < len && isLowSurrogate(seq.charAt(next))) { 2239 i++; 2240 } 2241 } 2242 i++; 2243 } 2244 return i; 2245 } 2246 2247 int codePoints = -codePointOffset; 2248 int i = index; 2249 while (codePoints > 0) { 2250 codePoints--; 2251 i--; 2252 if (i < 0) { 2253 throw new IndexOutOfBoundsException(); 2254 } 2255 if (isLowSurrogate(seq.charAt(i))) { 2256 int prev = i - 1; 2257 if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) { 2258 i--; 2259 } 2260 } 2261 } 2262 return i; 2263 } 2264 2265 /** 2266 * Determines the index in a subsequence of the specified character array 2267 * that is offset {@code codePointOffset} code points from {@code index}. 2268 * The subsequence is delineated by {@code start} and {@code count}. 2269 * 2270 * @param seq 2271 * the character array to find the index in. 2272 * @param start 2273 * the inclusive index that marks the beginning of the 2274 * subsequence. 2275 * @param count 2276 * the number of {@code char} values to include within the 2277 * subsequence. 2278 * @param index 2279 * the start index in the subsequence of the char array. 2280 * @param codePointOffset 2281 * the number of code points to look backwards or forwards; may 2282 * be a negative or positive value. 2283 * @return the index in {@code seq} that is {@code codePointOffset} code 2284 * points away from {@code index}. 2285 * @throws NullPointerException 2286 * if {@code seq} is {@code null}. 2287 * @throws IndexOutOfBoundsException 2288 * if {@code start < 0}, {@code count < 0}, 2289 * {@code index < start}, {@code index > start + count}, 2290 * {@code start + count} is greater than the length of 2291 * {@code seq}, or if there are not enough values in 2292 * {@code seq} to skip {@code codePointOffset} code points 2293 * forward or backward (if {@code codePointOffset} is 2294 * negative) from {@code index}. 2295 * @since 1.5 2296 */ 2297 public static int offsetByCodePoints(char[] seq, int start, int count, 2298 int index, int codePointOffset) { 2299 Arrays.checkOffsetAndCount(seq.length, start, count); 2300 int end = start + count; 2301 if (index < start || index > end) { 2302 throw new IndexOutOfBoundsException(); 2303 } 2304 2305 if (codePointOffset == 0) { 2306 return index; 2307 } 2308 2309 if (codePointOffset > 0) { 2310 int codePoints = codePointOffset; 2311 int i = index; 2312 while (codePoints > 0) { 2313 codePoints--; 2314 if (i >= end) { 2315 throw new IndexOutOfBoundsException(); 2316 } 2317 if (isHighSurrogate(seq[i])) { 2318 int next = i + 1; 2319 if (next < end && isLowSurrogate(seq[next])) { 2320 i++; 2321 } 2322 } 2323 i++; 2324 } 2325 return i; 2326 } 2327 2328 int codePoints = -codePointOffset; 2329 int i = index; 2330 while (codePoints > 0) { 2331 codePoints--; 2332 i--; 2333 if (i < start) { 2334 throw new IndexOutOfBoundsException(); 2335 } 2336 if (isLowSurrogate(seq[i])) { 2337 int prev = i - 1; 2338 if (prev >= start && isHighSurrogate(seq[prev])) { 2339 i--; 2340 } 2341 } 2342 } 2343 return i; 2344 } 2345 2346 /** 2347 * Convenience method to determine the value of the specified character 2348 * {@code c} in the supplied radix. The value of {@code radix} must be 2349 * between MIN_RADIX and MAX_RADIX. 2350 * 2351 * @param c 2352 * the character to determine the value of. 2353 * @param radix 2354 * the radix. 2355 * @return the value of {@code c} in {@code radix} if {@code radix} lies 2356 * between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise. 2357 */ 2358 public static int digit(char c, int radix) { 2359 return digit((int) c, radix); 2360 } 2361 2362 /** 2363 * Convenience method to determine the value of the character 2364 * {@code codePoint} in the supplied radix. The value of {@code radix} must 2365 * be between MIN_RADIX and MAX_RADIX. 2366 * 2367 * @param codePoint 2368 * the character, including supplementary characters. 2369 * @param radix 2370 * the radix. 2371 * @return if {@code radix} lies between {@link #MIN_RADIX} and 2372 * {@link #MAX_RADIX} then the value of the character in the radix; 2373 * -1 otherwise. 2374 */ 2375 public static int digit(int codePoint, int radix) { 2376 if (radix < MIN_RADIX || radix > MAX_RADIX) { 2377 return -1; 2378 } 2379 if (codePoint < 128) { 2380 // Optimized for ASCII 2381 int result = -1; 2382 if ('0' <= codePoint && codePoint <= '9') { 2383 result = codePoint - '0'; 2384 } else if ('a' <= codePoint && codePoint <= 'z') { 2385 result = 10 + (codePoint - 'a'); 2386 } else if ('A' <= codePoint && codePoint <= 'Z') { 2387 result = 10 + (codePoint - 'A'); 2388 } 2389 return result < radix ? result : -1; 2390 } 2391 return digitImpl(codePoint, radix); 2392 } 2393 2394 private static native int digitImpl(int codePoint, int radix); 2395 2396 /** 2397 * Compares this object with the specified object and indicates if they are 2398 * equal. In order to be equal, {@code object} must be an instance of 2399 * {@code Character} and have the same char value as this object. 2400 * 2401 * @param object 2402 * the object to compare this double with. 2403 * @return {@code true} if the specified object is equal to this 2404 * {@code Character}; {@code false} otherwise. 2405 */ 2406 @Override 2407 public boolean equals(Object object) { 2408 return (object instanceof Character) && (value == ((Character) object).value); 2409 } 2410 2411 /** 2412 * Returns the character which represents the specified digit in the 2413 * specified radix. The {@code radix} must be between {@code MIN_RADIX} and 2414 * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and 2415 * smaller than {@code radix}. If any of these conditions does not hold, 0 2416 * is returned. 2417 * 2418 * @param digit 2419 * the integer value. 2420 * @param radix 2421 * the radix. 2422 * @return the character which represents the {@code digit} in the 2423 * {@code radix}. 2424 */ 2425 public static char forDigit(int digit, int radix) { 2426 if (MIN_RADIX <= radix && radix <= MAX_RADIX) { 2427 if (digit >= 0 && digit < radix) { 2428 return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10); 2429 } 2430 } 2431 return 0; 2432 } 2433 2434 /** 2435 * Returns the name of the given code point, or null if the code point is unassigned. 2436 * 2437 * <p>As a fallback mechanism this method returns strings consisting of the Unicode 2438 * block name (with underscores replaced by spaces), a single space, and the uppercase 2439 * hex value of the code point, using as few digits as necessary. 2440 * 2441 * <p>Examples: 2442 * <ul> 2443 * <li>{@code Character.getName(0)} returns "NULL". 2444 * <li>{@code Character.getName('e')} returns "LATIN SMALL LETTER E". 2445 * <li>{@code Character.getName('\u0666')} returns "ARABIC-INDIC DIGIT SIX". 2446 * <li>{@code Character.getName(0xe000)} returns "PRIVATE USE AREA E000". 2447 * </ul> 2448 * 2449 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2450 * @since 1.7 2451 * @hide 1.7 2452 */ 2453 public static String getName(int codePoint) { 2454 checkValidCodePoint(codePoint); 2455 if (getType(codePoint) == Character.UNASSIGNED) { 2456 return null; 2457 } 2458 String result = getNameImpl(codePoint); 2459 if (result == null) { 2460 String blockName = Character.UnicodeBlock.of(codePoint).toString().replace('_', ' '); 2461 result = blockName + " " + IntegralToString.intToHexString(codePoint, true); 2462 } 2463 return result; 2464 } 2465 2466 private static native String getNameImpl(int codePoint); 2467 2468 /** 2469 * Returns the numeric value of the specified Unicode character. 2470 * See {@link #getNumericValue(int)}. 2471 * 2472 * @param c the character 2473 * @return a non-negative numeric integer value if a numeric value for 2474 * {@code c} exists, -1 if there is no numeric value for {@code c}, 2475 * -2 if the numeric value can not be represented as an integer. 2476 */ 2477 public static int getNumericValue(char c) { 2478 return getNumericValue((int) c); 2479 } 2480 2481 /** 2482 * Gets the numeric value of the specified Unicode code point. For example, 2483 * the code point '\u216B' stands for the Roman number XII, which has the 2484 * numeric value 12. 2485 * 2486 * <p>There are two points of divergence between this method and the Unicode 2487 * specification. This method treats the letters a-z (in both upper and lower 2488 * cases, and their full-width variants) as numbers from 10 to 35. The 2489 * Unicode specification also supports the idea of code points with non-integer 2490 * numeric values; this method does not (except to the extent of returning -2 2491 * for such code points). 2492 * 2493 * @param codePoint the code point 2494 * @return a non-negative numeric integer value if a numeric value for 2495 * {@code codePoint} exists, -1 if there is no numeric value for 2496 * {@code codePoint}, -2 if the numeric value can not be 2497 * represented with an integer. 2498 */ 2499 public static int getNumericValue(int codePoint) { 2500 // This is both an optimization and papers over differences between Java and ICU. 2501 if (codePoint < 128) { 2502 if (codePoint >= '0' && codePoint <= '9') { 2503 return codePoint - '0'; 2504 } 2505 if (codePoint >= 'a' && codePoint <= 'z') { 2506 return codePoint - ('a' - 10); 2507 } 2508 if (codePoint >= 'A' && codePoint <= 'Z') { 2509 return codePoint - ('A' - 10); 2510 } 2511 return -1; 2512 } 2513 // Full-width uppercase A-Z. 2514 if (codePoint >= 0xff21 && codePoint <= 0xff3a) { 2515 return codePoint - 0xff17; 2516 } 2517 // Full-width lowercase a-z. 2518 if (codePoint >= 0xff41 && codePoint <= 0xff5a) { 2519 return codePoint - 0xff37; 2520 } 2521 return getNumericValueImpl(codePoint); 2522 } 2523 2524 private static native int getNumericValueImpl(int codePoint); 2525 2526 /** 2527 * Gets the general Unicode category of the specified character. 2528 * 2529 * @param c 2530 * the character to get the category of. 2531 * @return the Unicode category of {@code c}. 2532 */ 2533 public static int getType(char c) { 2534 return getType((int) c); 2535 } 2536 2537 /** 2538 * Gets the general Unicode category of the specified code point. 2539 * 2540 * @param codePoint 2541 * the Unicode code point to get the category of. 2542 * @return the Unicode category of {@code codePoint}. 2543 */ 2544 public static int getType(int codePoint) { 2545 int type = getTypeImpl(codePoint); 2546 // The type values returned by ICU are not RI-compatible. The RI skips the value 17. 2547 if (type <= Character.FORMAT) { 2548 return type; 2549 } 2550 return (type + 1); 2551 } 2552 2553 private static native int getTypeImpl(int codePoint); 2554 2555 /** 2556 * Gets the Unicode directionality of the specified character. 2557 * 2558 * @param c 2559 * the character to get the directionality of. 2560 * @return the Unicode directionality of {@code c}. 2561 */ 2562 public static byte getDirectionality(char c) { 2563 return getDirectionality((int)c); 2564 } 2565 2566 /** 2567 * Gets the Unicode directionality of the specified character. 2568 * 2569 * @param codePoint 2570 * the Unicode code point to get the directionality of. 2571 * @return the Unicode directionality of {@code codePoint}. 2572 */ 2573 public static byte getDirectionality(int codePoint) { 2574 if (getType(codePoint) == Character.UNASSIGNED) { 2575 return Character.DIRECTIONALITY_UNDEFINED; 2576 } 2577 2578 byte directionality = getDirectionalityImpl(codePoint); 2579 if (directionality == -1) { 2580 return -1; 2581 } 2582 return DIRECTIONALITY[directionality]; 2583 } 2584 2585 private static native byte getDirectionalityImpl(int codePoint); 2586 2587 /** 2588 * Indicates whether the specified character is mirrored. 2589 * 2590 * @param c 2591 * the character to check. 2592 * @return {@code true} if {@code c} is mirrored; {@code false} 2593 * otherwise. 2594 */ 2595 public static boolean isMirrored(char c) { 2596 return isMirrored((int) c); 2597 } 2598 2599 /** 2600 * Indicates whether the specified code point is mirrored. 2601 * 2602 * @param codePoint 2603 * the code point to check. 2604 * @return {@code true} if {@code codePoint} is mirrored, {@code false} 2605 * otherwise. 2606 */ 2607 public static boolean isMirrored(int codePoint) { 2608 return isMirroredImpl(codePoint); 2609 } 2610 2611 private static native boolean isMirroredImpl(int codePoint); 2612 2613 @Override 2614 public int hashCode() { 2615 return value; 2616 } 2617 2618 /** 2619 * Returns the high surrogate for the given code point. The result is meaningless if 2620 * the given code point is not a supplementary character. 2621 * @since 1.7 2622 * @hide 1.7 2623 */ 2624 public static char highSurrogate(int codePoint) { 2625 return (char) ((codePoint >> 10) + 0xd7c0); 2626 } 2627 2628 /** 2629 * Returns the low surrogate for the given code point. The result is meaningless if 2630 * the given code point is not a supplementary character. 2631 * @since 1.7 2632 * @hide 1.7 2633 */ 2634 public static char lowSurrogate(int codePoint) { 2635 return (char) ((codePoint & 0x3ff) | 0xdc00); 2636 } 2637 2638 /** 2639 * Tests whether the given code point is in the Basic Multilingual Plane (BMP). 2640 * Such code points can be represented by a single {@code char}. 2641 * @since 1.7 2642 * @hide 1.7 2643 */ 2644 public static boolean isBmpCodePoint(int codePoint) { 2645 return codePoint >= 0 && codePoint <= 0xffff; 2646 } 2647 2648 /** 2649 * Indicates whether the specified character is defined in the Unicode 2650 * specification. 2651 * 2652 * @param c 2653 * the character to check. 2654 * @return {@code true} if the general Unicode category of the character is 2655 * not {@code UNASSIGNED}; {@code false} otherwise. 2656 */ 2657 public static boolean isDefined(char c) { 2658 return isDefinedImpl(c); 2659 } 2660 2661 /** 2662 * Indicates whether the specified code point is defined in the Unicode 2663 * specification. 2664 * 2665 * @param codePoint 2666 * the code point to check. 2667 * @return {@code true} if the general Unicode category of the code point is 2668 * not {@code UNASSIGNED}; {@code false} otherwise. 2669 */ 2670 public static boolean isDefined(int codePoint) { 2671 return isDefinedImpl(codePoint); 2672 } 2673 2674 private static native boolean isDefinedImpl(int codePoint); 2675 2676 /** 2677 * Indicates whether the specified character is a digit. 2678 * 2679 * @param c 2680 * the character to check. 2681 * @return {@code true} if {@code c} is a digit; {@code false} 2682 * otherwise. 2683 */ 2684 public static boolean isDigit(char c) { 2685 return isDigit((int) c); 2686 } 2687 2688 /** 2689 * Indicates whether the specified code point is a digit. 2690 * 2691 * @param codePoint 2692 * the code point to check. 2693 * @return {@code true} if {@code codePoint} is a digit; {@code false} 2694 * otherwise. 2695 */ 2696 public static boolean isDigit(int codePoint) { 2697 // Optimized case for ASCII 2698 if ('0' <= codePoint && codePoint <= '9') { 2699 return true; 2700 } 2701 if (codePoint < 1632) { 2702 return false; 2703 } 2704 return isDigitImpl(codePoint); 2705 } 2706 2707 private static native boolean isDigitImpl(int codePoint); 2708 2709 /** 2710 * Indicates whether the specified character is ignorable in a Java or 2711 * Unicode identifier. 2712 * 2713 * @param c 2714 * the character to check. 2715 * @return {@code true} if {@code c} is ignorable; {@code false} otherwise. 2716 */ 2717 public static boolean isIdentifierIgnorable(char c) { 2718 return isIdentifierIgnorable((int) c); 2719 } 2720 2721 /** 2722 * Indicates whether the specified code point is ignorable in a Java or 2723 * Unicode identifier. 2724 * 2725 * @param codePoint 2726 * the code point to check. 2727 * @return {@code true} if {@code codePoint} is ignorable; {@code false} 2728 * otherwise. 2729 */ 2730 public static boolean isIdentifierIgnorable(int codePoint) { 2731 // This is both an optimization and papers over differences between Java and ICU. 2732 if (codePoint < 0x600) { 2733 return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) || 2734 (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad); 2735 } 2736 return isIdentifierIgnorableImpl(codePoint); 2737 } 2738 2739 private static native boolean isIdentifierIgnorableImpl(int codePoint); 2740 2741 /** 2742 * Indicates whether the specified character is an ISO control character. 2743 * 2744 * @param c 2745 * the character to check. 2746 * @return {@code true} if {@code c} is an ISO control character; 2747 * {@code false} otherwise. 2748 */ 2749 public static boolean isISOControl(char c) { 2750 return isISOControl((int) c); 2751 } 2752 2753 /** 2754 * Indicates whether the specified code point is an ISO control character. 2755 * 2756 * @param c 2757 * the code point to check. 2758 * @return {@code true} if {@code c} is an ISO control character; 2759 * {@code false} otherwise. 2760 */ 2761 public static boolean isISOControl(int c) { 2762 return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f); 2763 } 2764 2765 /** 2766 * Indicates whether the specified character is a valid part of a Java 2767 * identifier other than the first character. 2768 * 2769 * @param c 2770 * the character to check. 2771 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2772 * {@code false} otherwise. 2773 */ 2774 public static boolean isJavaIdentifierPart(char c) { 2775 return isJavaIdentifierPart((int) c); 2776 } 2777 2778 /** 2779 * Indicates whether the specified code point is a valid part of a Java 2780 * identifier other than the first character. 2781 * 2782 * @param codePoint 2783 * the code point to check. 2784 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2785 * {@code false} otherwise. 2786 */ 2787 public static boolean isJavaIdentifierPart(int codePoint) { 2788 // Use precomputed bitmasks to optimize the ASCII range. 2789 if (codePoint < 64) { 2790 return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0; 2791 } else if (codePoint < 128) { 2792 return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 2793 } 2794 int type = getType(codePoint); 2795 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2796 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 2797 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2798 || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK 2799 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) 2800 || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT; 2801 } 2802 2803 /** 2804 * Indicates whether the specified character is a valid first character for 2805 * a Java identifier. 2806 * 2807 * @param c 2808 * the character to check. 2809 * @return {@code true} if {@code c} is a valid first character of a Java 2810 * identifier; {@code false} otherwise. 2811 */ 2812 public static boolean isJavaIdentifierStart(char c) { 2813 return isJavaIdentifierStart((int) c); 2814 } 2815 2816 /** 2817 * Indicates whether the specified code point is a valid first character for 2818 * a Java identifier. 2819 * 2820 * @param codePoint 2821 * the code point to check. 2822 * @return {@code true} if {@code codePoint} is a valid start of a Java 2823 * identifier; {@code false} otherwise. 2824 */ 2825 public static boolean isJavaIdentifierStart(int codePoint) { 2826 // Use precomputed bitmasks to optimize the ASCII range. 2827 if (codePoint < 64) { 2828 return (codePoint == '$'); // There's only one character in this range. 2829 } else if (codePoint < 128) { 2830 return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 2831 } 2832 int type = getType(codePoint); 2833 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL 2834 || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER; 2835 } 2836 2837 /** 2838 * Indicates whether the specified character is a Java letter. 2839 * 2840 * @param c 2841 * the character to check. 2842 * @return {@code true} if {@code c} is a Java letter; {@code false} 2843 * otherwise. 2844 * @deprecated Use {@link #isJavaIdentifierStart(char)} 2845 */ 2846 @Deprecated 2847 public static boolean isJavaLetter(char c) { 2848 return isJavaIdentifierStart(c); 2849 } 2850 2851 /** 2852 * Indicates whether the specified character is a Java letter or digit 2853 * character. 2854 * 2855 * @param c 2856 * the character to check. 2857 * @return {@code true} if {@code c} is a Java letter or digit; 2858 * {@code false} otherwise. 2859 * @deprecated Use {@link #isJavaIdentifierPart(char)} 2860 */ 2861 @Deprecated 2862 public static boolean isJavaLetterOrDigit(char c) { 2863 return isJavaIdentifierPart(c); 2864 } 2865 2866 /** 2867 * Indicates whether the specified character is a letter. 2868 * 2869 * @param c 2870 * the character to check. 2871 * @return {@code true} if {@code c} is a letter; {@code false} otherwise. 2872 */ 2873 public static boolean isLetter(char c) { 2874 return isLetter((int) c); 2875 } 2876 2877 /** 2878 * Indicates whether the specified code point is a letter. 2879 * 2880 * @param codePoint 2881 * the code point to check. 2882 * @return {@code true} if {@code codePoint} is a letter; {@code false} 2883 * otherwise. 2884 */ 2885 public static boolean isLetter(int codePoint) { 2886 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 2887 return true; 2888 } 2889 if (codePoint < 128) { 2890 return false; 2891 } 2892 return isLetterImpl(codePoint); 2893 } 2894 2895 private static native boolean isLetterImpl(int codePoint); 2896 2897 /** 2898 * Indicates whether the specified character is a letter or a digit. 2899 * 2900 * @param c 2901 * the character to check. 2902 * @return {@code true} if {@code c} is a letter or a digit; {@code false} 2903 * otherwise. 2904 */ 2905 public static boolean isLetterOrDigit(char c) { 2906 return isLetterOrDigit((int) c); 2907 } 2908 2909 /** 2910 * Indicates whether the specified code point is a letter or a digit. 2911 * 2912 * @param codePoint 2913 * the code point to check. 2914 * @return {@code true} if {@code codePoint} is a letter or a digit; 2915 * {@code false} otherwise. 2916 */ 2917 public static boolean isLetterOrDigit(int codePoint) { 2918 // Optimized case for ASCII 2919 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 2920 return true; 2921 } 2922 if ('0' <= codePoint && codePoint <= '9') { 2923 return true; 2924 } 2925 if (codePoint < 128) { 2926 return false; 2927 } 2928 return isLetterOrDigitImpl(codePoint); 2929 } 2930 2931 private static native boolean isLetterOrDigitImpl(int codePoint); 2932 2933 /** 2934 * Indicates whether the specified character is a lower case letter. 2935 * 2936 * @param c 2937 * the character to check. 2938 * @return {@code true} if {@code c} is a lower case letter; {@code false} 2939 * otherwise. 2940 */ 2941 public static boolean isLowerCase(char c) { 2942 return isLowerCase((int) c); 2943 } 2944 2945 /** 2946 * Indicates whether the specified code point is a lower case letter. 2947 * 2948 * @param codePoint 2949 * the code point to check. 2950 * @return {@code true} if {@code codePoint} is a lower case letter; 2951 * {@code false} otherwise. 2952 */ 2953 public static boolean isLowerCase(int codePoint) { 2954 // Optimized case for ASCII 2955 if ('a' <= codePoint && codePoint <= 'z') { 2956 return true; 2957 } 2958 if (codePoint < 128) { 2959 return false; 2960 } 2961 return isLowerCaseImpl(codePoint); 2962 } 2963 2964 private static native boolean isLowerCaseImpl(int codePoint); 2965 2966 /** 2967 * Indicates whether the specified character is a Java space. 2968 * 2969 * @param c 2970 * the character to check. 2971 * @return {@code true} if {@code c} is a Java space; {@code false} 2972 * otherwise. 2973 * @deprecated Use {@link #isWhitespace(char)} 2974 */ 2975 @Deprecated 2976 public static boolean isSpace(char c) { 2977 return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' '; 2978 } 2979 2980 /** 2981 * Indicates whether the specified character is a Unicode space character. 2982 * That is, if it is a member of one of the Unicode categories Space 2983 * Separator, Line Separator, or Paragraph Separator. 2984 * 2985 * @param c 2986 * the character to check. 2987 * @return {@code true} if {@code c} is a Unicode space character, 2988 * {@code false} otherwise. 2989 */ 2990 public static boolean isSpaceChar(char c) { 2991 return isSpaceChar((int) c); 2992 } 2993 2994 /** 2995 * Indicates whether the specified code point is a Unicode space character. 2996 * That is, if it is a member of one of the Unicode categories Space 2997 * Separator, Line Separator, or Paragraph Separator. 2998 * 2999 * @param codePoint 3000 * the code point to check. 3001 * @return {@code true} if {@code codePoint} is a Unicode space character, 3002 * {@code false} otherwise. 3003 */ 3004 public static boolean isSpaceChar(int codePoint) { 3005 if (codePoint == 0x20 || codePoint == 0xa0 || codePoint == 0x1680) { 3006 return true; 3007 } 3008 if (codePoint < 0x2000) { 3009 return false; 3010 } 3011 if (codePoint <= 0xffff) { 3012 return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 || 3013 codePoint == 0x202f || codePoint == 0x3000; 3014 } 3015 return isSpaceCharImpl(codePoint); 3016 } 3017 3018 private static native boolean isSpaceCharImpl(int codePoint); 3019 3020 /** 3021 * Indicates whether the specified character is a titlecase character. 3022 * 3023 * @param c 3024 * the character to check. 3025 * @return {@code true} if {@code c} is a titlecase character, {@code false} 3026 * otherwise. 3027 */ 3028 public static boolean isTitleCase(char c) { 3029 return isTitleCaseImpl(c); 3030 } 3031 3032 /** 3033 * Indicates whether the specified code point is a titlecase character. 3034 * 3035 * @param codePoint 3036 * the code point to check. 3037 * @return {@code true} if {@code codePoint} is a titlecase character, 3038 * {@code false} otherwise. 3039 */ 3040 public static boolean isTitleCase(int codePoint) { 3041 return isTitleCaseImpl(codePoint); 3042 } 3043 3044 private static native boolean isTitleCaseImpl(int codePoint); 3045 3046 /** 3047 * Indicates whether the specified character is valid as part of a Unicode 3048 * identifier other than the first character. 3049 * 3050 * @param c 3051 * the character to check. 3052 * @return {@code true} if {@code c} is valid as part of a Unicode 3053 * identifier; {@code false} otherwise. 3054 */ 3055 public static boolean isUnicodeIdentifierPart(char c) { 3056 return isUnicodeIdentifierPartImpl(c); 3057 } 3058 3059 /** 3060 * Indicates whether the specified code point is valid as part of a Unicode 3061 * identifier other than the first character. 3062 * 3063 * @param codePoint 3064 * the code point to check. 3065 * @return {@code true} if {@code codePoint} is valid as part of a Unicode 3066 * identifier; {@code false} otherwise. 3067 */ 3068 public static boolean isUnicodeIdentifierPart(int codePoint) { 3069 return isUnicodeIdentifierPartImpl(codePoint); 3070 } 3071 3072 private static native boolean isUnicodeIdentifierPartImpl(int codePoint); 3073 3074 /** 3075 * Indicates whether the specified character is a valid initial character 3076 * for a Unicode identifier. 3077 * 3078 * @param c 3079 * the character to check. 3080 * @return {@code true} if {@code c} is a valid first character for a 3081 * Unicode identifier; {@code false} otherwise. 3082 */ 3083 public static boolean isUnicodeIdentifierStart(char c) { 3084 return isUnicodeIdentifierStartImpl(c); 3085 } 3086 3087 /** 3088 * Indicates whether the specified code point is a valid initial character 3089 * for a Unicode identifier. 3090 * 3091 * @param codePoint 3092 * the code point to check. 3093 * @return {@code true} if {@code codePoint} is a valid first character for 3094 * a Unicode identifier; {@code false} otherwise. 3095 */ 3096 public static boolean isUnicodeIdentifierStart(int codePoint) { 3097 return isUnicodeIdentifierStartImpl(codePoint); 3098 } 3099 3100 private static native boolean isUnicodeIdentifierStartImpl(int codePoint); 3101 3102 /** 3103 * Indicates whether the specified character is an upper case letter. 3104 * 3105 * @param c 3106 * the character to check. 3107 * @return {@code true} if {@code c} is a upper case letter; {@code false} 3108 * otherwise. 3109 */ 3110 public static boolean isUpperCase(char c) { 3111 return isUpperCase((int) c); 3112 } 3113 3114 /** 3115 * Indicates whether the specified code point is an upper case letter. 3116 * 3117 * @param codePoint 3118 * the code point to check. 3119 * @return {@code true} if {@code codePoint} is a upper case letter; 3120 * {@code false} otherwise. 3121 */ 3122 public static boolean isUpperCase(int codePoint) { 3123 // Optimized case for ASCII 3124 if ('A' <= codePoint && codePoint <= 'Z') { 3125 return true; 3126 } 3127 if (codePoint < 128) { 3128 return false; 3129 } 3130 return isUpperCaseImpl(codePoint); 3131 } 3132 3133 private static native boolean isUpperCaseImpl(int codePoint); 3134 3135 /** 3136 * Indicates whether the specified character is a whitespace character in 3137 * Java. 3138 * 3139 * @param c 3140 * the character to check. 3141 * @return {@code true} if the supplied {@code c} is a whitespace character 3142 * in Java; {@code false} otherwise. 3143 */ 3144 public static boolean isWhitespace(char c) { 3145 return isWhitespace((int) c); 3146 } 3147 3148 /** 3149 * Indicates whether the specified code point is a whitespace character in 3150 * Java. 3151 * 3152 * @param codePoint 3153 * the code point to check. 3154 * @return {@code true} if the supplied {@code c} is a whitespace character 3155 * in Java; {@code false} otherwise. 3156 */ 3157 public static boolean isWhitespace(int codePoint) { 3158 // This is both an optimization and papers over differences between Java and ICU. 3159 if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x9 && codePoint <= 0xd)) { 3160 return true; 3161 } 3162 if (codePoint == 0x1680) { 3163 return true; 3164 } 3165 if (codePoint < 0x2000 || codePoint == 0x2007) { 3166 return false; 3167 } 3168 if (codePoint <= 0xffff) { 3169 return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 || 3170 codePoint == 0x3000; 3171 } 3172 return isWhitespaceImpl(codePoint); 3173 } 3174 3175 private static native boolean isWhitespaceImpl(int codePoint); 3176 3177 /** 3178 * Reverses the order of the first and second byte in the specified 3179 * character. 3180 * 3181 * @param c 3182 * the character to reverse. 3183 * @return the character with reordered bytes. 3184 */ 3185 public static char reverseBytes(char c) { 3186 return (char)((c<<8) | (c>>8)); 3187 } 3188 3189 /** 3190 * Returns the lower case equivalent for the specified character if the 3191 * character is an upper case letter. Otherwise, the specified character is 3192 * returned unchanged. 3193 * 3194 * @param c 3195 * the character 3196 * @return if {@code c} is an upper case character then its lower case 3197 * counterpart, otherwise just {@code c}. 3198 */ 3199 public static char toLowerCase(char c) { 3200 return (char) toLowerCase((int) c); 3201 } 3202 3203 /** 3204 * Returns the lower case equivalent for the specified code point if it is 3205 * an upper case letter. Otherwise, the specified code point is returned 3206 * unchanged. 3207 * 3208 * @param codePoint 3209 * the code point to check. 3210 * @return if {@code codePoint} is an upper case character then its lower 3211 * case counterpart, otherwise just {@code codePoint}. 3212 */ 3213 public static int toLowerCase(int codePoint) { 3214 // Optimized case for ASCII 3215 if ('A' <= codePoint && codePoint <= 'Z') { 3216 return (char) (codePoint + ('a' - 'A')); 3217 } 3218 if (codePoint < 192) { 3219 return codePoint; 3220 } 3221 return toLowerCaseImpl(codePoint); 3222 } 3223 3224 private static native int toLowerCaseImpl(int codePoint); 3225 3226 @Override 3227 public String toString() { 3228 return String.valueOf(value); 3229 } 3230 3231 /** 3232 * Converts the specified character to its string representation. 3233 * 3234 * @param value 3235 * the character to convert. 3236 * @return the character converted to a string. 3237 */ 3238 public static String toString(char value) { 3239 return String.valueOf(value); 3240 } 3241 3242 /** 3243 * Returns the title case equivalent for the specified character if it 3244 * exists. Otherwise, the specified character is returned unchanged. 3245 * 3246 * @param c 3247 * the character to convert. 3248 * @return the title case equivalent of {@code c} if it exists, otherwise 3249 * {@code c}. 3250 */ 3251 public static char toTitleCase(char c) { 3252 return (char) toTitleCaseImpl(c); 3253 } 3254 3255 /** 3256 * Returns the title case equivalent for the specified code point if it 3257 * exists. Otherwise, the specified code point is returned unchanged. 3258 * 3259 * @param codePoint 3260 * the code point to convert. 3261 * @return the title case equivalent of {@code codePoint} if it exists, 3262 * otherwise {@code codePoint}. 3263 */ 3264 public static int toTitleCase(int codePoint) { 3265 return toTitleCaseImpl(codePoint); 3266 } 3267 3268 private static native int toTitleCaseImpl(int codePoint); 3269 3270 /** 3271 * Returns the upper case equivalent for the specified character if the 3272 * character is a lower case letter. Otherwise, the specified character is 3273 * returned unchanged. 3274 * 3275 * @param c 3276 * the character to convert. 3277 * @return if {@code c} is a lower case character then its upper case 3278 * counterpart, otherwise just {@code c}. 3279 */ 3280 public static char toUpperCase(char c) { 3281 return (char) toUpperCase((int) c); 3282 } 3283 3284 /** 3285 * Returns the upper case equivalent for the specified code point if the 3286 * code point is a lower case letter. Otherwise, the specified code point is 3287 * returned unchanged. 3288 * 3289 * @param codePoint 3290 * the code point to convert. 3291 * @return if {@code codePoint} is a lower case character then its upper 3292 * case counterpart, otherwise just {@code codePoint}. 3293 */ 3294 public static int toUpperCase(int codePoint) { 3295 // Optimized case for ASCII 3296 if ('a' <= codePoint && codePoint <= 'z') { 3297 return (char) (codePoint - ('a' - 'A')); 3298 } 3299 if (codePoint < 181) { 3300 return codePoint; 3301 } 3302 return toUpperCaseImpl(codePoint); 3303 } 3304 3305 private static native int toUpperCaseImpl(int codePoint); 3306} 3307