Character.java revision fed2ee9e249b952c76f11110c41b33c4829aa56f
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.lang; 19 20import java.io.Serializable; 21import java.util.Arrays; 22 23/** 24 * The wrapper for the primitive type {@code char}. This class also provides a 25 * number of utility methods for working with characters. 26 * 27 * <p>Character data is kept up to date as Unicode evolves. 28 * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of 29 * the {@code Locale} documentation for details of the Unicode versions implemented by current 30 * and historical Android releases. 31 * 32 * <p>The Unicode specification, character tables, and other information are available at 33 * <a href="http://www.unicode.org/">http://www.unicode.org/</a>. 34 * 35 * <p>Unicode characters are referred to as <i>code points</i>. The range of valid 36 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i> 37 * is the code point range U+0000 to U+FFFF. Characters above the BMP are 38 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16 39 * encoding and {@code char} pairs are used to represent code points in the 40 * supplementary range. A pair of {@code char} values that represent a 41 * supplementary character are made up of a <i>high surrogate</i> with a value 42 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of 43 * 0xDC00 to 0xDFFF. 44 * <p> 45 * On the Java platform a {@code char} value represents either a single BMP code 46 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type 47 * is used to represent all Unicode code points. 48 * 49 * <a name="unicode_categories"><h3>Unicode categories</h3></a> 50 * <p>Here's a list of the Unicode character categories and the corresponding Java constant, 51 * grouped semantically to provide a convenient overview. This table is also useful in 52 * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}. 53 * <span class="datatable"> 54 * <style type="text/css"> 55 * .datatable td { padding-right: 20px; } 56 * </style> 57 * <p><table> 58 * <tr> <td> Cn </td> <td> Unassigned </td> <td>{@link #UNASSIGNED}</td> </tr> 59 * <tr> <td> Cc </td> <td> Control </td> <td>{@link #CONTROL}</td> </tr> 60 * <tr> <td> Cf </td> <td> Format </td> <td>{@link #FORMAT}</td> </tr> 61 * <tr> <td> Co </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr> 62 * <tr> <td> Cs </td> <td> Surrogate </td> <td>{@link #SURROGATE}</td> </tr> 63 * <tr> <td><br></td> </tr> 64 * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr> 65 * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr> 66 * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr> 67 * <tr> <td> Lm </td> <td> Modifier letter </td> <td>{@link #MODIFIER_LETTER}</td> </tr> 68 * <tr> <td> Lo </td> <td> Other letter </td> <td>{@link #OTHER_LETTER}</td> </tr> 69 * <tr> <td><br></td> </tr> 70 * <tr> <td> Mn </td> <td> Non-spacing mark </td> <td>{@link #NON_SPACING_MARK}</td> </tr> 71 * <tr> <td> Me </td> <td> Enclosing mark </td> <td>{@link #ENCLOSING_MARK}</td> </tr> 72 * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr> 73 * <tr> <td><br></td> </tr> 74 * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr> 75 * <tr> <td> Nl </td> <td> Letter number </td> <td>{@link #LETTER_NUMBER}</td> </tr> 76 * <tr> <td> No </td> <td> Other number </td> <td>{@link #OTHER_NUMBER}</td> </tr> 77 * <tr> <td><br></td> </tr> 78 * <tr> <td> Pd </td> <td> Dash punctuation </td> <td>{@link #DASH_PUNCTUATION}</td> </tr> 79 * <tr> <td> Ps </td> <td> Start punctuation </td> <td>{@link #START_PUNCTUATION}</td> </tr> 80 * <tr> <td> Pe </td> <td> End punctuation </td> <td>{@link #END_PUNCTUATION}</td> </tr> 81 * <tr> <td> Pc </td> <td> Connector punctuation </td> <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr> 82 * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr> 83 * <tr> <td> Pf </td> <td> Final quote punctuation </td> <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr> 84 * <tr> <td> Po </td> <td> Other punctuation </td> <td>{@link #OTHER_PUNCTUATION}</td> </tr> 85 * <tr> <td><br></td> </tr> 86 * <tr> <td> Sm </td> <td> Math symbol </td> <td>{@link #MATH_SYMBOL}</td> </tr> 87 * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr> 88 * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr> 89 * <tr> <td> So </td> <td> Other symbol </td> <td>{@link #OTHER_SYMBOL}</td> </tr> 90 * <tr> <td><br></td> </tr> 91 * <tr> <td> Zs </td> <td> Space separator </td> <td>{@link #SPACE_SEPARATOR}</td> </tr> 92 * <tr> <td> Zl </td> <td> Line separator </td> <td>{@link #LINE_SEPARATOR}</td> </tr> 93 * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr> 94 * </table> 95 * </span> 96 * 97 * @since 1.0 98 */ 99@FindBugsSuppressWarnings("DM_NUMBER_CTOR") 100public final class Character implements Serializable, Comparable<Character> { 101 private static final long serialVersionUID = 3786198910865385080L; 102 103 private final char value; 104 105 /** 106 * The minimum {@code Character} value. 107 */ 108 public static final char MIN_VALUE = '\u0000'; 109 110 /** 111 * The maximum {@code Character} value. 112 */ 113 public static final char MAX_VALUE = '\uffff'; 114 115 /** 116 * The minimum radix used for conversions between characters and integers. 117 */ 118 public static final int MIN_RADIX = 2; 119 120 /** 121 * The maximum radix used for conversions between characters and integers. 122 */ 123 public static final int MAX_RADIX = 36; 124 125 /** 126 * The {@link Class} object that represents the primitive type {@code char}. 127 */ 128 @SuppressWarnings("unchecked") 129 public static final Class<Character> TYPE 130 = (Class<Character>) char[].class.getComponentType(); 131 // Note: Character.TYPE can't be set to "char.class", since *that* is 132 // defined to be "java.lang.Character.TYPE"; 133 134 /** 135 * Unicode category constant Cn. 136 */ 137 public static final byte UNASSIGNED = 0; 138 139 /** 140 * Unicode category constant Lu. 141 */ 142 public static final byte UPPERCASE_LETTER = 1; 143 144 /** 145 * Unicode category constant Ll. 146 */ 147 public static final byte LOWERCASE_LETTER = 2; 148 149 /** 150 * Unicode category constant Lt. 151 */ 152 public static final byte TITLECASE_LETTER = 3; 153 154 /** 155 * Unicode category constant Lm. 156 */ 157 public static final byte MODIFIER_LETTER = 4; 158 159 /** 160 * Unicode category constant Lo. 161 */ 162 public static final byte OTHER_LETTER = 5; 163 164 /** 165 * Unicode category constant Mn. 166 */ 167 public static final byte NON_SPACING_MARK = 6; 168 169 /** 170 * Unicode category constant Me. 171 */ 172 public static final byte ENCLOSING_MARK = 7; 173 174 /** 175 * Unicode category constant Mc. 176 */ 177 public static final byte COMBINING_SPACING_MARK = 8; 178 179 /** 180 * Unicode category constant Nd. 181 */ 182 public static final byte DECIMAL_DIGIT_NUMBER = 9; 183 184 /** 185 * Unicode category constant Nl. 186 */ 187 public static final byte LETTER_NUMBER = 10; 188 189 /** 190 * Unicode category constant No. 191 */ 192 public static final byte OTHER_NUMBER = 11; 193 194 /** 195 * Unicode category constant Zs. 196 */ 197 public static final byte SPACE_SEPARATOR = 12; 198 199 /** 200 * Unicode category constant Zl. 201 */ 202 public static final byte LINE_SEPARATOR = 13; 203 204 /** 205 * Unicode category constant Zp. 206 */ 207 public static final byte PARAGRAPH_SEPARATOR = 14; 208 209 /** 210 * Unicode category constant Cc. 211 */ 212 public static final byte CONTROL = 15; 213 214 /** 215 * Unicode category constant Cf. 216 */ 217 public static final byte FORMAT = 16; 218 219 /** 220 * Unicode category constant Co. 221 */ 222 public static final byte PRIVATE_USE = 18; 223 224 /** 225 * Unicode category constant Cs. 226 */ 227 public static final byte SURROGATE = 19; 228 229 /** 230 * Unicode category constant Pd. 231 */ 232 public static final byte DASH_PUNCTUATION = 20; 233 234 /** 235 * Unicode category constant Ps. 236 */ 237 public static final byte START_PUNCTUATION = 21; 238 239 /** 240 * Unicode category constant Pe. 241 */ 242 public static final byte END_PUNCTUATION = 22; 243 244 /** 245 * Unicode category constant Pc. 246 */ 247 public static final byte CONNECTOR_PUNCTUATION = 23; 248 249 /** 250 * Unicode category constant Po. 251 */ 252 public static final byte OTHER_PUNCTUATION = 24; 253 254 /** 255 * Unicode category constant Sm. 256 */ 257 public static final byte MATH_SYMBOL = 25; 258 259 /** 260 * Unicode category constant Sc. 261 */ 262 public static final byte CURRENCY_SYMBOL = 26; 263 264 /** 265 * Unicode category constant Sk. 266 */ 267 public static final byte MODIFIER_SYMBOL = 27; 268 269 /** 270 * Unicode category constant So. 271 */ 272 public static final byte OTHER_SYMBOL = 28; 273 274 /** 275 * Unicode category constant Pi. 276 * 277 * @since 1.4 278 */ 279 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 280 281 /** 282 * Unicode category constant Pf. 283 * 284 * @since 1.4 285 */ 286 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 287 288 /** 289 * Unicode bidirectional constant. 290 * 291 * @since 1.4 292 */ 293 public static final byte DIRECTIONALITY_UNDEFINED = -1; 294 295 /** 296 * Unicode bidirectional constant L. 297 * 298 * @since 1.4 299 */ 300 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 301 302 /** 303 * Unicode bidirectional constant R. 304 * 305 * @since 1.4 306 */ 307 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 308 309 /** 310 * Unicode bidirectional constant AL. 311 * 312 * @since 1.4 313 */ 314 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 315 316 /** 317 * Unicode bidirectional constant EN. 318 * 319 * @since 1.4 320 */ 321 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 322 323 /** 324 * Unicode bidirectional constant ES. 325 * 326 * @since 1.4 327 */ 328 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 329 330 /** 331 * Unicode bidirectional constant ET. 332 * 333 * @since 1.4 334 */ 335 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 336 337 /** 338 * Unicode bidirectional constant AN. 339 * 340 * @since 1.4 341 */ 342 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 343 344 /** 345 * Unicode bidirectional constant CS. 346 * 347 * @since 1.4 348 */ 349 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 350 351 /** 352 * Unicode bidirectional constant NSM. 353 * 354 * @since 1.4 355 */ 356 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 357 358 /** 359 * Unicode bidirectional constant BN. 360 * 361 * @since 1.4 362 */ 363 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 364 365 /** 366 * Unicode bidirectional constant B. 367 * 368 * @since 1.4 369 */ 370 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 371 372 /** 373 * Unicode bidirectional constant S. 374 * 375 * @since 1.4 376 */ 377 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 378 379 /** 380 * Unicode bidirectional constant WS. 381 * 382 * @since 1.4 383 */ 384 public static final byte DIRECTIONALITY_WHITESPACE = 12; 385 386 /** 387 * Unicode bidirectional constant ON. 388 * 389 * @since 1.4 390 */ 391 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 392 393 /** 394 * Unicode bidirectional constant LRE. 395 * 396 * @since 1.4 397 */ 398 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 399 400 /** 401 * Unicode bidirectional constant LRO. 402 * 403 * @since 1.4 404 */ 405 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 406 407 /** 408 * Unicode bidirectional constant RLE. 409 * 410 * @since 1.4 411 */ 412 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 413 414 /** 415 * Unicode bidirectional constant RLO. 416 * 417 * @since 1.4 418 */ 419 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 420 421 /** 422 * Unicode bidirectional constant PDF. 423 * 424 * @since 1.4 425 */ 426 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 427 428 /** 429 * The minimum value of a high surrogate or leading surrogate unit in UTF-16 430 * encoding, {@code '\uD800'}. 431 * 432 * @since 1.5 433 */ 434 public static final char MIN_HIGH_SURROGATE = '\uD800'; 435 436 /** 437 * The maximum value of a high surrogate or leading surrogate unit in UTF-16 438 * encoding, {@code '\uDBFF'}. 439 * 440 * @since 1.5 441 */ 442 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 443 444 /** 445 * The minimum value of a low surrogate or trailing surrogate unit in UTF-16 446 * encoding, {@code '\uDC00'}. 447 * 448 * @since 1.5 449 */ 450 public static final char MIN_LOW_SURROGATE = '\uDC00'; 451 452 /** 453 * The maximum value of a low surrogate or trailing surrogate unit in UTF-16 454 * encoding, {@code '\uDFFF'}. 455 * 456 * @since 1.5 457 */ 458 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 459 460 /** 461 * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}. 462 * 463 * @since 1.5 464 */ 465 public static final char MIN_SURROGATE = '\uD800'; 466 467 /** 468 * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}. 469 * 470 * @since 1.5 471 */ 472 public static final char MAX_SURROGATE = '\uDFFF'; 473 474 /** 475 * The minimum value of a supplementary code point, {@code U+010000}. 476 * 477 * @since 1.5 478 */ 479 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; 480 481 /** 482 * The minimum code point value, {@code U+0000}. 483 * 484 * @since 1.5 485 */ 486 public static final int MIN_CODE_POINT = 0x000000; 487 488 /** 489 * The maximum code point value, {@code U+10FFFF}. 490 * 491 * @since 1.5 492 */ 493 public static final int MAX_CODE_POINT = 0x10FFFF; 494 495 /** 496 * The number of bits required to represent a {@code Character} value 497 * unsigned form. 498 * 499 * @since 1.5 500 */ 501 public static final int SIZE = 16; 502 503 private static final byte[] DIRECTIONALITY = new byte[] { 504 DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT, 505 DIRECTIONALITY_EUROPEAN_NUMBER, 506 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, 507 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, 508 DIRECTIONALITY_ARABIC_NUMBER, 509 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, 510 DIRECTIONALITY_PARAGRAPH_SEPARATOR, 511 DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE, 512 DIRECTIONALITY_OTHER_NEUTRALS, 513 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, 514 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, 515 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, 516 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, 517 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, 518 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, 519 DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL }; 520 521 /* 522 * Represents a subset of the Unicode character set. 523 */ 524 public static class Subset { 525 private final String name; 526 527 /** 528 * Constructs a new {@code Subset}. 529 */ 530 protected Subset(String name) { 531 if (name == null) { 532 throw new NullPointerException("name == null"); 533 } 534 this.name = name; 535 } 536 537 /** 538 * Compares this character subset for identity with the specified object. 539 */ 540 @Override public final boolean equals(Object object) { 541 return object == this; 542 } 543 544 /** 545 * Returns this subset's hash code, which is the hash code computed by 546 * {@link java.lang.Object#hashCode()}. 547 */ 548 @Override public final int hashCode() { 549 return super.hashCode(); 550 } 551 552 /** 553 * Returns this subset's name. 554 */ 555 @Override public final String toString() { 556 return name; 557 } 558 } 559 560 /** 561 * Represents a block of Unicode characters, as defined by the Unicode 4.0.1 562 * specification. 563 * 564 * @since 1.2 565 */ 566 public static final class UnicodeBlock extends Subset { 567 /** 568 * The "Surrogates Area" Unicode Block. 569 * 570 * @deprecated As of Java 5, this block has been replaced by 571 * {@link #HIGH_SURROGATES}, 572 * {@link #HIGH_PRIVATE_USE_SURROGATES} and 573 * {@link #LOW_SURROGATES}. 574 */ 575 @Deprecated 576 public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA"); 577 /** 578 * The "Basic Latin" Unicode Block. 579 * 580 * @since 1.2 581 */ 582 public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN"); 583 /** 584 * The "Latin-1 Supplement" Unicode Block. 585 * 586 * @since 1.2 587 */ 588 public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT"); 589 /** 590 * The "Latin Extended-A" Unicode Block. 591 * 592 * @since 1.2 593 */ 594 public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A"); 595 /** 596 * The "Latin Extended-B" Unicode Block. 597 * 598 * @since 1.2 599 */ 600 public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B"); 601 /** 602 * The "IPA Extensions" Unicode Block. 603 * 604 * @since 1.2 605 */ 606 public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS"); 607 /** 608 * The "Spacing Modifier Letters" Unicode Block. 609 * 610 * @since 1.2 611 */ 612 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS"); 613 /** 614 * The "Combining Diacritical Marks" Unicode Block. 615 * 616 * @since 1.2 617 */ 618 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS"); 619 /** 620 * The "Greek and Coptic" Unicode Block. Previously referred 621 * to as "Greek". 622 * 623 * @since 1.2 624 */ 625 public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK"); 626 /** 627 * The "Cyrillic" Unicode Block. 628 * 629 * @since 1.2 630 */ 631 public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC"); 632 /** 633 * The "Cyrillic Supplement" Unicode Block. Previously 634 * referred to as "Cyrillic Supplementary". 635 * 636 * @since 1.5 637 */ 638 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY"); 639 /** 640 * The "Armenian" Unicode Block. 641 * 642 * @since 1.2 643 */ 644 public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN"); 645 /** 646 * The "Hebrew" Unicode Block. 647 * 648 * @since 1.2 649 */ 650 public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW"); 651 /** 652 * The "Arabic" Unicode Block. 653 * 654 * @since 1.2 655 */ 656 public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC"); 657 /** 658 * The "Syriac" Unicode Block. 659 * 660 * @since 1.4 661 */ 662 public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC"); 663 /** 664 * The "Thaana" Unicode Block. 665 * 666 * @since 1.4 667 */ 668 public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA"); 669 /** 670 * The "Devanagari" Unicode Block. 671 * 672 * @since 1.2 673 */ 674 public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI"); 675 /** 676 * The "Bengali" Unicode Block. 677 * 678 * @since 1.2 679 */ 680 public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI"); 681 /** 682 * The "Gurmukhi" Unicode Block. 683 * 684 * @since 1.2 685 */ 686 public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI"); 687 /** 688 * The "Gujarati" Unicode Block. 689 * 690 * @since 1.2 691 */ 692 public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI"); 693 /** 694 * The "Oriya" Unicode Block. 695 * 696 * @since 1.2 697 */ 698 public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA"); 699 /** 700 * The "Tamil" Unicode Block. 701 * 702 * @since 1.2 703 */ 704 public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL"); 705 /** 706 * The "Telugu" Unicode Block. 707 * 708 * @since 1.2 709 */ 710 public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU"); 711 /** 712 * The "Kannada" Unicode Block. 713 * 714 * @since 1.2 715 */ 716 public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA"); 717 /** 718 * The "Malayalam" Unicode Block. 719 * 720 * @since 1.2 721 */ 722 public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM"); 723 /** 724 * The "Sinhala" Unicode Block. 725 * 726 * @since 1.4 727 */ 728 public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA"); 729 /** 730 * The "Thai" Unicode Block. 731 * 732 * @since 1.2 733 */ 734 public static final UnicodeBlock THAI = new UnicodeBlock("THAI"); 735 /** 736 * The "Lao" Unicode Block. 737 * 738 * @since 1.2 739 */ 740 public static final UnicodeBlock LAO = new UnicodeBlock("LAO"); 741 /** 742 * The "Tibetan" Unicode Block. 743 * 744 * @since 1.2 745 */ 746 public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN"); 747 /** 748 * The "Myanmar" Unicode Block. 749 * 750 * @since 1.4 751 */ 752 public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR"); 753 /** 754 * The "Georgian" Unicode Block. 755 * 756 * @since 1.2 757 */ 758 public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN"); 759 /** 760 * The "Hangul Jamo" Unicode Block. 761 * 762 * @since 1.2 763 */ 764 public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO"); 765 /** 766 * The "Ethiopic" Unicode Block. 767 * 768 * @since 1.4 769 */ 770 public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC"); 771 /** 772 * The "Cherokee" Unicode Block. 773 * 774 * @since 1.4 775 */ 776 public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE"); 777 /** 778 * The "Unified Canadian Aboriginal Syllabics" Unicode Block. 779 * 780 * @since 1.4 781 */ 782 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS"); 783 /** 784 * The "Ogham" Unicode Block. 785 * 786 * @since 1.4 787 */ 788 public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM"); 789 /** 790 * The "Runic" Unicode Block. 791 * 792 * @since 1.4 793 */ 794 public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC"); 795 /** 796 * The "Tagalog" Unicode Block. 797 * 798 * @since 1.5 799 */ 800 public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG"); 801 /** 802 * The "Hanunoo" Unicode Block. 803 * 804 * @since 1.5 805 */ 806 public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO"); 807 /** 808 * The "Buhid" Unicode Block. 809 * 810 * @since 1.5 811 */ 812 public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID"); 813 /** 814 * The "Tagbanwa" Unicode Block. 815 * 816 * @since 1.5 817 */ 818 public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA"); 819 /** 820 * The "Khmer" Unicode Block. 821 * 822 * @since 1.4 823 */ 824 public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER"); 825 /** 826 * The "Mongolian" Unicode Block. 827 * 828 * @since 1.4 829 */ 830 public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN"); 831 /** 832 * The "Limbu" Unicode Block. 833 * 834 * @since 1.5 835 */ 836 public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU"); 837 /** 838 * The "Tai Le" Unicode Block. 839 * 840 * @since 1.5 841 */ 842 public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE"); 843 /** 844 * The "Khmer Symbols" Unicode Block. 845 * 846 * @since 1.5 847 */ 848 public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS"); 849 /** 850 * The "Phonetic Extensions" Unicode Block. 851 * 852 * @since 1.5 853 */ 854 public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS"); 855 /** 856 * The "Latin Extended Additional" Unicode Block. 857 * 858 * @since 1.2 859 */ 860 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL"); 861 /** 862 * The "Greek Extended" Unicode Block. 863 * 864 * @since 1.2 865 */ 866 public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED"); 867 /** 868 * The "General Punctuation" Unicode Block. 869 * 870 * @since 1.2 871 */ 872 public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION"); 873 /** 874 * The "Superscripts and Subscripts" Unicode Block. 875 * 876 * @since 1.2 877 */ 878 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS"); 879 /** 880 * The "Currency Symbols" Unicode Block. 881 * 882 * @since 1.2 883 */ 884 public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS"); 885 /** 886 * The "Combining Diacritical Marks for Symbols" Unicode 887 * Block. Previously referred to as "Combining Marks for 888 * Symbols". 889 * 890 * @since 1.2 891 */ 892 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS"); 893 /** 894 * The "Letterlike Symbols" Unicode Block. 895 * 896 * @since 1.2 897 */ 898 public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS"); 899 /** 900 * The "Number Forms" Unicode Block. 901 * 902 * @since 1.2 903 */ 904 public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS"); 905 /** 906 * The "Arrows" Unicode Block. 907 * 908 * @since 1.2 909 */ 910 public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS"); 911 /** 912 * The "Mathematical Operators" Unicode Block. 913 * 914 * @since 1.2 915 */ 916 public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS"); 917 /** 918 * The "Miscellaneous Technical" Unicode Block. 919 * 920 * @since 1.2 921 */ 922 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL"); 923 /** 924 * The "Control Pictures" Unicode Block. 925 * 926 * @since 1.2 927 */ 928 public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES"); 929 /** 930 * The "Optical Character Recognition" Unicode Block. 931 * 932 * @since 1.2 933 */ 934 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION"); 935 /** 936 * The "Enclosed Alphanumerics" Unicode Block. 937 * 938 * @since 1.2 939 */ 940 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS"); 941 /** 942 * The "Box Drawing" Unicode Block. 943 * 944 * @since 1.2 945 */ 946 public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING"); 947 /** 948 * The "Block Elements" Unicode Block. 949 * 950 * @since 1.2 951 */ 952 public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS"); 953 /** 954 * The "Geometric Shapes" Unicode Block. 955 * 956 * @since 1.2 957 */ 958 public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES"); 959 /** 960 * The "Miscellaneous Symbols" Unicode Block. 961 * 962 * @since 1.2 963 */ 964 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS"); 965 /** 966 * The "Dingbats" Unicode Block. 967 * 968 * @since 1.2 969 */ 970 public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS"); 971 /** 972 * The "Miscellaneous Mathematical Symbols-A" Unicode Block. 973 * 974 * @since 1.5 975 */ 976 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A"); 977 /** 978 * The "Supplemental Arrows-A" Unicode Block. 979 * 980 * @since 1.5 981 */ 982 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A"); 983 /** 984 * The "Braille Patterns" Unicode Block. 985 * 986 * @since 1.4 987 */ 988 public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS"); 989 /** 990 * The "Supplemental Arrows-B" Unicode Block. 991 * 992 * @since 1.5 993 */ 994 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B"); 995 /** 996 * The "Miscellaneous Mathematical Symbols-B" Unicode Block. 997 * 998 * @since 1.5 999 */ 1000 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B"); 1001 /** 1002 * The "Supplemental Mathematical Operators" Unicode Block. 1003 * 1004 * @since 1.5 1005 */ 1006 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS"); 1007 /** 1008 * The "Miscellaneous Symbols and Arrows" Unicode Block. 1009 * 1010 * @since 1.2 1011 */ 1012 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS"); 1013 /** 1014 * The "CJK Radicals Supplement" Unicode Block. 1015 * 1016 * @since 1.4 1017 */ 1018 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT"); 1019 /** 1020 * The "Kangxi Radicals" Unicode Block. 1021 * 1022 * @since 1.4 1023 */ 1024 public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS"); 1025 /** 1026 * The "Ideographic Description Characters" Unicode Block. 1027 * 1028 * @since 1.4 1029 */ 1030 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS"); 1031 /** 1032 * The "CJK Symbols and Punctuation" Unicode Block. 1033 * 1034 * @since 1.2 1035 */ 1036 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION"); 1037 /** 1038 * The "Hiragana" Unicode Block. 1039 * 1040 * @since 1.2 1041 */ 1042 public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA"); 1043 /** 1044 * The "Katakana" Unicode Block. 1045 * 1046 * @since 1.2 1047 */ 1048 public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA"); 1049 /** 1050 * The "Bopomofo" Unicode Block. 1051 * 1052 * @since 1.2 1053 */ 1054 public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO"); 1055 /** 1056 * The "Hangul Compatibility Jamo" Unicode Block. 1057 * 1058 * @since 1.2 1059 */ 1060 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO"); 1061 /** 1062 * The "Kanbun" Unicode Block. 1063 * 1064 * @since 1.2 1065 */ 1066 public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN"); 1067 /** 1068 * The "Bopomofo Extended" Unicode Block. 1069 * 1070 * @since 1.4 1071 */ 1072 public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED"); 1073 /** 1074 * The "Katakana Phonetic Extensions" Unicode Block. 1075 * 1076 * @since 1.5 1077 */ 1078 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS"); 1079 /** 1080 * The "Enclosed CJK Letters and Months" Unicode Block. 1081 * 1082 * @since 1.2 1083 */ 1084 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS"); 1085 /** 1086 * The "CJK Compatibility" Unicode Block. 1087 * 1088 * @since 1.2 1089 */ 1090 public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY"); 1091 /** 1092 * The "CJK Unified Ideographs Extension A" Unicode Block. 1093 * 1094 * @since 1.4 1095 */ 1096 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A"); 1097 /** 1098 * The "Yijing Hexagram Symbols" Unicode Block. 1099 * 1100 * @since 1.5 1101 */ 1102 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS"); 1103 /** 1104 * The "CJK Unified Ideographs" Unicode Block. 1105 * 1106 * @since 1.2 1107 */ 1108 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS"); 1109 /** 1110 * The "Yi Syllables" Unicode Block. 1111 * 1112 * @since 1.4 1113 */ 1114 public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES"); 1115 /** 1116 * The "Yi Radicals" Unicode Block. 1117 * 1118 * @since 1.4 1119 */ 1120 public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS"); 1121 /** 1122 * The "Hangul Syllables" Unicode Block. 1123 * 1124 * @since 1.2 1125 */ 1126 public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES"); 1127 /** 1128 * The "High Surrogates" Unicode Block. This block represents 1129 * code point values in the high surrogate range 0xD800 to 0xDB7F 1130 */ 1131 public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES"); 1132 /** 1133 * The "High Private Use Surrogates" Unicode Block. This block 1134 * represents code point values in the high surrogate range 0xDB80 to 1135 * 0xDBFF 1136 */ 1137 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES"); 1138 /** 1139 * The "Low Surrogates" Unicode Block. This block represents 1140 * code point values in the low surrogate range 0xDC00 to 0xDFFF 1141 */ 1142 public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES"); 1143 /** 1144 * The "Private Use Area" Unicode Block. 1145 * 1146 * @since 1.2 1147 */ 1148 public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA"); 1149 /** 1150 * The "CJK Compatibility Ideographs" Unicode Block. 1151 * 1152 * @since 1.2 1153 */ 1154 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS"); 1155 /** 1156 * The "Alphabetic Presentation Forms" Unicode Block. 1157 * 1158 * @since 1.2 1159 */ 1160 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS"); 1161 /** 1162 * The "Arabic Presentation Forms-A" Unicode Block. 1163 * 1164 * @since 1.2 1165 */ 1166 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A"); 1167 /** 1168 * The "Variation Selectors" Unicode Block. 1169 * 1170 * @since 1.5 1171 */ 1172 public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS"); 1173 /** 1174 * The "Combining Half Marks" Unicode Block. 1175 * 1176 * @since 1.2 1177 */ 1178 public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS"); 1179 /** 1180 * The "CJK Compatibility Forms" Unicode Block. 1181 * 1182 * @since 1.2 1183 */ 1184 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS"); 1185 /** 1186 * The "Small Form Variants" Unicode Block. 1187 * 1188 * @since 1.2 1189 */ 1190 public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS"); 1191 /** 1192 * The "Arabic Presentation Forms-B" Unicode Block. 1193 * 1194 * @since 1.2 1195 */ 1196 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B"); 1197 /** 1198 * The "Halfwidth and Fullwidth Forms" Unicode Block. 1199 * 1200 * @since 1.2 1201 */ 1202 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS"); 1203 /** 1204 * The "Specials" Unicode Block. 1205 * 1206 * @since 1.2 1207 */ 1208 public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS"); 1209 /** 1210 * The "Linear B Syllabary" Unicode Block. 1211 * 1212 * @since 1.2 1213 */ 1214 public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY"); 1215 /** 1216 * The "Linear B Ideograms" Unicode Block. 1217 * 1218 * @since 1.5 1219 */ 1220 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS"); 1221 /** 1222 * The "Aegean Numbers" Unicode Block. 1223 * 1224 * @since 1.5 1225 */ 1226 public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS"); 1227 /** 1228 * The "Old Italic" Unicode Block. 1229 * 1230 * @since 1.5 1231 */ 1232 public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC"); 1233 /** 1234 * The "Gothic" Unicode Block. 1235 * 1236 * @since 1.5 1237 */ 1238 public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC"); 1239 /** 1240 * The "Ugaritic" Unicode Block. 1241 * 1242 * @since 1.5 1243 */ 1244 public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC"); 1245 /** 1246 * The "Deseret" Unicode Block. 1247 * 1248 * @since 1.5 1249 */ 1250 public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET"); 1251 /** 1252 * The "Shavian" Unicode Block. 1253 * 1254 * @since 1.5 1255 */ 1256 public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN"); 1257 /** 1258 * The "Osmanya" Unicode Block. 1259 * 1260 * @since 1.5 1261 */ 1262 public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA"); 1263 /** 1264 * The "Cypriot Syllabary" Unicode Block. 1265 * 1266 * @since 1.5 1267 */ 1268 public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY"); 1269 /** 1270 * The "Byzantine Musical Symbols" Unicode Block. 1271 * 1272 * @since 1.5 1273 */ 1274 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS"); 1275 /** 1276 * The "Musical Symbols" Unicode Block. 1277 * 1278 * @since 1.5 1279 */ 1280 public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS"); 1281 /** 1282 * The "Tai Xuan Jing Symbols" Unicode Block. 1283 * 1284 * @since 1.5 1285 */ 1286 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS"); 1287 /** 1288 * The "Mathematical Alphanumeric Symbols" Unicode Block. 1289 * 1290 * @since 1.5 1291 */ 1292 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS"); 1293 /** 1294 * The "CJK Unified Ideographs Extension B" Unicode Block. 1295 * 1296 * @since 1.5 1297 */ 1298 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B"); 1299 /** 1300 * The "CJK Compatibility Ideographs Supplement" Unicode Block. 1301 * 1302 * @since 1.5 1303 */ 1304 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT"); 1305 /** 1306 * The "Tags" Unicode Block. 1307 * 1308 * @since 1.5 1309 */ 1310 public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS"); 1311 /** 1312 * The "Variation Selectors Supplement" Unicode Block. 1313 * 1314 * @since 1.5 1315 */ 1316 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT"); 1317 /** 1318 * The "Supplementary Private Use Area-A" Unicode Block. 1319 * 1320 * @since 1.5 1321 */ 1322 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A"); 1323 /** 1324 * The "Supplementary Private Use Area-B" Unicode Block. 1325 * 1326 * @since 1.5 1327 */ 1328 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B"); 1329 1330 /* 1331 * All of the UnicodeBlocks with valid ranges in ascending order. 1332 */ 1333 private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] { 1334 null, 1335 UnicodeBlock.BASIC_LATIN, 1336 UnicodeBlock.LATIN_1_SUPPLEMENT, 1337 UnicodeBlock.LATIN_EXTENDED_A, 1338 UnicodeBlock.LATIN_EXTENDED_B, 1339 UnicodeBlock.IPA_EXTENSIONS, 1340 UnicodeBlock.SPACING_MODIFIER_LETTERS, 1341 UnicodeBlock.COMBINING_DIACRITICAL_MARKS, 1342 UnicodeBlock.GREEK, 1343 UnicodeBlock.CYRILLIC, 1344 UnicodeBlock.ARMENIAN, 1345 UnicodeBlock.HEBREW, 1346 UnicodeBlock.ARABIC, 1347 UnicodeBlock.SYRIAC, 1348 UnicodeBlock.THAANA, 1349 UnicodeBlock.DEVANAGARI, 1350 UnicodeBlock.BENGALI, 1351 UnicodeBlock.GURMUKHI, 1352 UnicodeBlock.GUJARATI, 1353 UnicodeBlock.ORIYA, 1354 UnicodeBlock.TAMIL, 1355 UnicodeBlock.TELUGU, 1356 UnicodeBlock.KANNADA, 1357 UnicodeBlock.MALAYALAM, 1358 UnicodeBlock.SINHALA, 1359 UnicodeBlock.THAI, 1360 UnicodeBlock.LAO, 1361 UnicodeBlock.TIBETAN, 1362 UnicodeBlock.MYANMAR, 1363 UnicodeBlock.GEORGIAN, 1364 UnicodeBlock.HANGUL_JAMO, 1365 UnicodeBlock.ETHIOPIC, 1366 UnicodeBlock.CHEROKEE, 1367 UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 1368 UnicodeBlock.OGHAM, 1369 UnicodeBlock.RUNIC, 1370 UnicodeBlock.KHMER, 1371 UnicodeBlock.MONGOLIAN, 1372 UnicodeBlock.LATIN_EXTENDED_ADDITIONAL, 1373 UnicodeBlock.GREEK_EXTENDED, 1374 UnicodeBlock.GENERAL_PUNCTUATION, 1375 UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS, 1376 UnicodeBlock.CURRENCY_SYMBOLS, 1377 UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS, 1378 UnicodeBlock.LETTERLIKE_SYMBOLS, 1379 UnicodeBlock.NUMBER_FORMS, 1380 UnicodeBlock.ARROWS, 1381 UnicodeBlock.MATHEMATICAL_OPERATORS, 1382 UnicodeBlock.MISCELLANEOUS_TECHNICAL, 1383 UnicodeBlock.CONTROL_PICTURES, 1384 UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION, 1385 UnicodeBlock.ENCLOSED_ALPHANUMERICS, 1386 UnicodeBlock.BOX_DRAWING, 1387 UnicodeBlock.BLOCK_ELEMENTS, 1388 UnicodeBlock.GEOMETRIC_SHAPES, 1389 UnicodeBlock.MISCELLANEOUS_SYMBOLS, 1390 UnicodeBlock.DINGBATS, 1391 UnicodeBlock.BRAILLE_PATTERNS, 1392 UnicodeBlock.CJK_RADICALS_SUPPLEMENT, 1393 UnicodeBlock.KANGXI_RADICALS, 1394 UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 1395 UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION, 1396 UnicodeBlock.HIRAGANA, 1397 UnicodeBlock.KATAKANA, 1398 UnicodeBlock.BOPOMOFO, 1399 UnicodeBlock.HANGUL_COMPATIBILITY_JAMO, 1400 UnicodeBlock.KANBUN, 1401 UnicodeBlock.BOPOMOFO_EXTENDED, 1402 UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS, 1403 UnicodeBlock.CJK_COMPATIBILITY, 1404 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 1405 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS, 1406 UnicodeBlock.YI_SYLLABLES, 1407 UnicodeBlock.YI_RADICALS, 1408 UnicodeBlock.HANGUL_SYLLABLES, 1409 UnicodeBlock.HIGH_SURROGATES, 1410 UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES, 1411 UnicodeBlock.LOW_SURROGATES, 1412 UnicodeBlock.PRIVATE_USE_AREA, 1413 UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS, 1414 UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS, 1415 UnicodeBlock.ARABIC_PRESENTATION_FORMS_A, 1416 UnicodeBlock.COMBINING_HALF_MARKS, 1417 UnicodeBlock.CJK_COMPATIBILITY_FORMS, 1418 UnicodeBlock.SMALL_FORM_VARIANTS, 1419 UnicodeBlock.ARABIC_PRESENTATION_FORMS_B, 1420 UnicodeBlock.SPECIALS, 1421 UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS, 1422 UnicodeBlock.OLD_ITALIC, 1423 UnicodeBlock.GOTHIC, 1424 UnicodeBlock.DESERET, 1425 UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS, 1426 UnicodeBlock.MUSICAL_SYMBOLS, 1427 UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 1428 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 1429 UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 1430 UnicodeBlock.TAGS, 1431 UnicodeBlock.CYRILLIC_SUPPLEMENTARY, 1432 UnicodeBlock.TAGALOG, 1433 UnicodeBlock.HANUNOO, 1434 UnicodeBlock.BUHID, 1435 UnicodeBlock.TAGBANWA, 1436 UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 1437 UnicodeBlock.SUPPLEMENTAL_ARROWS_A, 1438 UnicodeBlock.SUPPLEMENTAL_ARROWS_B, 1439 UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 1440 UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 1441 UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS, 1442 UnicodeBlock.VARIATION_SELECTORS, 1443 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A, 1444 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B, 1445 UnicodeBlock.LIMBU, 1446 UnicodeBlock.TAI_LE, 1447 UnicodeBlock.KHMER_SYMBOLS, 1448 UnicodeBlock.PHONETIC_EXTENSIONS, 1449 UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS, 1450 UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS, 1451 UnicodeBlock.LINEAR_B_SYLLABARY, 1452 UnicodeBlock.LINEAR_B_IDEOGRAMS, 1453 UnicodeBlock.AEGEAN_NUMBERS, 1454 UnicodeBlock.UGARITIC, 1455 UnicodeBlock.SHAVIAN, 1456 UnicodeBlock.OSMANYA, 1457 UnicodeBlock.CYPRIOT_SYLLABARY, 1458 UnicodeBlock.TAI_XUAN_JING_SYMBOLS, 1459 UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT 1460 }; 1461 1462 /** 1463 * Retrieves the constant that corresponds to the specified block name. 1464 * The block names are defined by the Unicode 4.0.1 specification in the 1465 * {@code Blocks-4.0.1.txt} file. 1466 * <p> 1467 * Block names may be one of the following: 1468 * <ul> 1469 * <li>Canonical block name, as defined by the Unicode specification; 1470 * case-insensitive.</li> 1471 * <li>Canonical block name without any spaces, as defined by the 1472 * Unicode specification; case-insensitive.</li> 1473 * <li>{@code UnicodeBlock} constant identifier. This is determined by 1474 * converting the canonical name to uppercase and replacing all spaces and hyphens 1475 * with underscores.</li> 1476 * </ul> 1477 * 1478 * @param blockName 1479 * the name of the block to retrieve. 1480 * @return the UnicodeBlock constant corresponding to {@code blockName}. 1481 * @throws NullPointerException 1482 * if {@code blockName} is {@code null}. 1483 * @throws IllegalArgumentException 1484 * if {@code blockName} is not a valid block name. 1485 * @since 1.5 1486 */ 1487 public static UnicodeBlock forName(String blockName) { 1488 if (blockName == null) { 1489 throw new NullPointerException("blockName == null"); 1490 } 1491 int block = forNameImpl(blockName); 1492 if (block == -1) { 1493 if (blockName.equals("SURROGATES_AREA")) { 1494 return SURROGATES_AREA; 1495 } else if(blockName.equalsIgnoreCase("greek")) { 1496 return GREEK; 1497 } else if(blockName.equals("COMBINING_MARKS_FOR_SYMBOLS") || 1498 blockName.equals("Combining Marks for Symbols") || 1499 blockName.equals("CombiningMarksforSymbols")) { 1500 return COMBINING_MARKS_FOR_SYMBOLS; 1501 } 1502 throw new IllegalArgumentException("Bad block name: " + blockName); 1503 } 1504 return BLOCKS[block]; 1505 } 1506 1507 /** 1508 * Gets the constant for the Unicode block that contains the specified 1509 * character. 1510 * 1511 * @param c 1512 * the character for which to get the {@code UnicodeBlock} 1513 * constant. 1514 * @return the {@code UnicodeBlock} constant for the block that contains 1515 * {@code c}, or {@code null} if {@code c} does not belong to 1516 * any defined block. 1517 */ 1518 public static UnicodeBlock of(char c) { 1519 return of((int) c); 1520 } 1521 1522 /** 1523 * Gets the constant for the Unicode block that contains the specified 1524 * Unicode code point. 1525 * 1526 * @param codePoint 1527 * the Unicode code point for which to get the 1528 * {@code UnicodeBlock} constant. 1529 * @return the {@code UnicodeBlock} constant for the block that contains 1530 * {@code codePoint}, or {@code null} if {@code codePoint} does 1531 * not belong to any defined block. 1532 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 1533 * @since 1.5 1534 */ 1535 public static UnicodeBlock of(int codePoint) { 1536 checkValidCodePoint(codePoint); 1537 int block = ofImpl(codePoint); 1538 if (block == -1 || block >= BLOCKS.length) { 1539 return null; 1540 } 1541 return BLOCKS[block]; 1542 } 1543 1544 private UnicodeBlock(String blockName) { 1545 super(blockName); 1546 } 1547 } 1548 1549 private static native int forNameImpl(String blockName); 1550 1551 private static native int ofImpl(int codePoint); 1552 1553 /** 1554 * Constructs a new {@code Character} with the specified primitive char 1555 * value. 1556 * 1557 * @param value 1558 * the primitive char value to store in the new instance. 1559 */ 1560 public Character(char value) { 1561 this.value = value; 1562 } 1563 1564 /** 1565 * Gets the primitive value of this character. 1566 * 1567 * @return this object's primitive value. 1568 */ 1569 public char charValue() { 1570 return value; 1571 } 1572 1573 private static void checkValidCodePoint(int codePoint) { 1574 if (!isValidCodePoint(codePoint)) { 1575 throw new IllegalArgumentException("Invalid code point: " + codePoint); 1576 } 1577 } 1578 1579 /** 1580 * Compares this object to the specified character object to determine their 1581 * relative order. 1582 * 1583 * @param c 1584 * the character object to compare this object to. 1585 * @return {@code 0} if the value of this character and the value of 1586 * {@code c} are equal; a positive value if the value of this 1587 * character is greater than the value of {@code c}; a negative 1588 * value if the value of this character is less than the value of 1589 * {@code c}. 1590 * @see java.lang.Comparable 1591 * @since 1.2 1592 */ 1593 public int compareTo(Character c) { 1594 return compare(value, c.value); 1595 } 1596 1597 /** 1598 * Compares two {@code char} values. 1599 * @return 0 if lhs = rhs, less than 0 if lhs < rhs, and greater than 0 if lhs > rhs. 1600 * @since 1.7 1601 */ 1602 public static int compare(char lhs, char rhs) { 1603 return lhs - rhs; 1604 } 1605 1606 /** 1607 * Returns a {@code Character} instance for the {@code char} value passed. 1608 * <p> 1609 * If it is not necessary to get a new {@code Character} instance, it is 1610 * recommended to use this method instead of the constructor, since it 1611 * maintains a cache of instances which may result in better performance. 1612 * 1613 * @param c 1614 * the char value for which to get a {@code Character} instance. 1615 * @return the {@code Character} instance for {@code c}. 1616 * @since 1.5 1617 */ 1618 public static Character valueOf(char c) { 1619 return c < 128 ? SMALL_VALUES[c] : new Character(c); 1620 } 1621 1622 /** 1623 * A cache of instances used by {@link #valueOf(char)} and auto-boxing 1624 */ 1625 private static final Character[] SMALL_VALUES = new Character[128]; 1626 1627 static { 1628 for (int i = 0; i < 128; i++) { 1629 SMALL_VALUES[i] = new Character((char) i); 1630 } 1631 } 1632 /** 1633 * Indicates whether {@code codePoint} is a valid Unicode code point. 1634 * 1635 * @param codePoint 1636 * the code point to test. 1637 * @return {@code true} if {@code codePoint} is a valid Unicode code point; 1638 * {@code false} otherwise. 1639 * @since 1.5 1640 */ 1641 public static boolean isValidCodePoint(int codePoint) { 1642 return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1643 } 1644 1645 /** 1646 * Indicates whether {@code codePoint} is within the supplementary code 1647 * point range. 1648 * 1649 * @param codePoint 1650 * the code point to test. 1651 * @return {@code true} if {@code codePoint} is within the supplementary 1652 * code point range; {@code false} otherwise. 1653 * @since 1.5 1654 */ 1655 public static boolean isSupplementaryCodePoint(int codePoint) { 1656 return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1657 } 1658 1659 /** 1660 * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit 1661 * that is used for representing supplementary characters in UTF-16 1662 * encoding. 1663 * 1664 * @param ch 1665 * the character to test. 1666 * @return {@code true} if {@code ch} is a high-surrogate code unit; 1667 * {@code false} otherwise. 1668 * @see #isLowSurrogate(char) 1669 * @since 1.5 1670 */ 1671 public static boolean isHighSurrogate(char ch) { 1672 return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch); 1673 } 1674 1675 /** 1676 * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit 1677 * that is used for representing supplementary characters in UTF-16 1678 * encoding. 1679 * 1680 * @param ch 1681 * the character to test. 1682 * @return {@code true} if {@code ch} is a low-surrogate code unit; 1683 * {@code false} otherwise. 1684 * @see #isHighSurrogate(char) 1685 * @since 1.5 1686 */ 1687 public static boolean isLowSurrogate(char ch) { 1688 return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch); 1689 } 1690 1691 /** 1692 * Returns true if the given character is a high or low surrogate. 1693 * @since 1.7 1694 */ 1695 public static boolean isSurrogate(char ch) { 1696 return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE; 1697 } 1698 1699 /** 1700 * Indicates whether the specified character pair is a valid surrogate pair. 1701 * 1702 * @param high 1703 * the high surrogate unit to test. 1704 * @param low 1705 * the low surrogate unit to test. 1706 * @return {@code true} if {@code high} is a high-surrogate code unit and 1707 * {@code low} is a low-surrogate code unit; {@code false} 1708 * otherwise. 1709 * @see #isHighSurrogate(char) 1710 * @see #isLowSurrogate(char) 1711 * @since 1.5 1712 */ 1713 public static boolean isSurrogatePair(char high, char low) { 1714 return (isHighSurrogate(high) && isLowSurrogate(low)); 1715 } 1716 1717 /** 1718 * Calculates the number of {@code char} values required to represent the 1719 * specified Unicode code point. This method checks if the {@code codePoint} 1720 * is greater than or equal to {@code 0x10000}, in which case {@code 2} is 1721 * returned, otherwise {@code 1}. To test if the code point is valid, use 1722 * the {@link #isValidCodePoint(int)} method. 1723 * 1724 * @param codePoint 1725 * the code point for which to calculate the number of required 1726 * chars. 1727 * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise. 1728 * @see #isValidCodePoint(int) 1729 * @see #isSupplementaryCodePoint(int) 1730 * @since 1.5 1731 */ 1732 public static int charCount(int codePoint) { 1733 return (codePoint >= 0x10000 ? 2 : 1); 1734 } 1735 1736 /** 1737 * Converts a surrogate pair into a Unicode code point. This method assumes 1738 * that the pair are valid surrogates. If the pair are <i>not</i> valid 1739 * surrogates, then the result is indeterminate. The 1740 * {@link #isSurrogatePair(char, char)} method should be used prior to this 1741 * method to validate the pair. 1742 * 1743 * @param high 1744 * the high surrogate unit. 1745 * @param low 1746 * the low surrogate unit. 1747 * @return the Unicode code point corresponding to the surrogate unit pair. 1748 * @see #isSurrogatePair(char, char) 1749 * @since 1.5 1750 */ 1751 public static int toCodePoint(char high, char low) { 1752 // See RFC 2781, Section 2.2 1753 // http://www.ietf.org/rfc/rfc2781.txt 1754 int h = (high & 0x3FF) << 10; 1755 int l = low & 0x3FF; 1756 return (h | l) + 0x10000; 1757 } 1758 1759 /** 1760 * Returns the code point at {@code index} in the specified sequence of 1761 * character units. If the unit at {@code index} is a high-surrogate unit, 1762 * {@code index + 1} is less than the length of the sequence and the unit at 1763 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1764 * point represented by the pair is returned; otherwise the {@code char} 1765 * value at {@code index} is returned. 1766 * 1767 * @param seq 1768 * the source sequence of {@code char} units. 1769 * @param index 1770 * the position in {@code seq} from which to retrieve the code 1771 * point. 1772 * @return the Unicode code point or {@code char} value at {@code index} in 1773 * {@code seq}. 1774 * @throws NullPointerException 1775 * if {@code seq} is {@code null}. 1776 * @throws IndexOutOfBoundsException 1777 * if the {@code index} is negative or greater than or equal to 1778 * the length of {@code seq}. 1779 * @since 1.5 1780 */ 1781 public static int codePointAt(CharSequence seq, int index) { 1782 if (seq == null) { 1783 throw new NullPointerException("seq == null"); 1784 } 1785 int len = seq.length(); 1786 if (index < 0 || index >= len) { 1787 throw new IndexOutOfBoundsException(); 1788 } 1789 1790 char high = seq.charAt(index++); 1791 if (index >= len) { 1792 return high; 1793 } 1794 char low = seq.charAt(index); 1795 if (isSurrogatePair(high, low)) { 1796 return toCodePoint(high, low); 1797 } 1798 return high; 1799 } 1800 1801 /** 1802 * Returns the code point at {@code index} in the specified array of 1803 * character units. If the unit at {@code index} is a high-surrogate unit, 1804 * {@code index + 1} is less than the length of the array and the unit at 1805 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1806 * point represented by the pair is returned; otherwise the {@code char} 1807 * value at {@code index} is returned. 1808 * 1809 * @param seq 1810 * the source array of {@code char} units. 1811 * @param index 1812 * the position in {@code seq} from which to retrieve the code 1813 * point. 1814 * @return the Unicode code point or {@code char} value at {@code index} in 1815 * {@code seq}. 1816 * @throws NullPointerException 1817 * if {@code seq} is {@code null}. 1818 * @throws IndexOutOfBoundsException 1819 * if the {@code index} is negative or greater than or equal to 1820 * the length of {@code seq}. 1821 * @since 1.5 1822 */ 1823 public static int codePointAt(char[] seq, int index) { 1824 if (seq == null) { 1825 throw new NullPointerException("seq == null"); 1826 } 1827 int len = seq.length; 1828 if (index < 0 || index >= len) { 1829 throw new IndexOutOfBoundsException(); 1830 } 1831 1832 char high = seq[index++]; 1833 if (index >= len) { 1834 return high; 1835 } 1836 char low = seq[index]; 1837 if (isSurrogatePair(high, low)) { 1838 return toCodePoint(high, low); 1839 } 1840 return high; 1841 } 1842 1843 /** 1844 * Returns the code point at {@code index} in the specified array of 1845 * character units, where {@code index} has to be less than {@code limit}. 1846 * If the unit at {@code index} is a high-surrogate unit, {@code index + 1} 1847 * is less than {@code limit} and the unit at {@code index + 1} is a 1848 * low-surrogate unit, then the supplementary code point represented by the 1849 * pair is returned; otherwise the {@code char} value at {@code index} is 1850 * returned. 1851 * 1852 * @param seq 1853 * the source array of {@code char} units. 1854 * @param index 1855 * the position in {@code seq} from which to get the code point. 1856 * @param limit 1857 * the index after the last unit in {@code seq} that can be used. 1858 * @return the Unicode code point or {@code char} value at {@code index} in 1859 * {@code seq}. 1860 * @throws NullPointerException 1861 * if {@code seq} is {@code null}. 1862 * @throws IndexOutOfBoundsException 1863 * if {@code index < 0}, {@code index >= limit}, 1864 * {@code limit < 0} or if {@code limit} is greater than the 1865 * length of {@code seq}. 1866 * @since 1.5 1867 */ 1868 public static int codePointAt(char[] seq, int index, int limit) { 1869 if (index < 0 || index >= limit || limit < 0 || limit > seq.length) { 1870 throw new IndexOutOfBoundsException(); 1871 } 1872 1873 char high = seq[index++]; 1874 if (index >= limit) { 1875 return high; 1876 } 1877 char low = seq[index]; 1878 if (isSurrogatePair(high, low)) { 1879 return toCodePoint(high, low); 1880 } 1881 return high; 1882 } 1883 1884 /** 1885 * Returns the code point that precedes {@code index} in the specified 1886 * sequence of character units. If the unit at {@code index - 1} is a 1887 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1888 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1889 * point represented by the pair is returned; otherwise the {@code char} 1890 * value at {@code index - 1} is returned. 1891 * 1892 * @param seq 1893 * the source sequence of {@code char} units. 1894 * @param index 1895 * the position in {@code seq} following the code 1896 * point that should be returned. 1897 * @return the Unicode code point or {@code char} value before {@code index} 1898 * in {@code seq}. 1899 * @throws NullPointerException 1900 * if {@code seq} is {@code null}. 1901 * @throws IndexOutOfBoundsException 1902 * if the {@code index} is less than 1 or greater than the 1903 * length of {@code seq}. 1904 * @since 1.5 1905 */ 1906 public static int codePointBefore(CharSequence seq, int index) { 1907 if (seq == null) { 1908 throw new NullPointerException("seq == null"); 1909 } 1910 int len = seq.length(); 1911 if (index < 1 || index > len) { 1912 throw new IndexOutOfBoundsException(); 1913 } 1914 1915 char low = seq.charAt(--index); 1916 if (--index < 0) { 1917 return low; 1918 } 1919 char high = seq.charAt(index); 1920 if (isSurrogatePair(high, low)) { 1921 return toCodePoint(high, low); 1922 } 1923 return low; 1924 } 1925 1926 /** 1927 * Returns the code point that precedes {@code index} in the specified 1928 * array of character units. If the unit at {@code index - 1} is a 1929 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1930 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1931 * point represented by the pair is returned; otherwise the {@code char} 1932 * value at {@code index - 1} is returned. 1933 * 1934 * @param seq 1935 * the source array of {@code char} units. 1936 * @param index 1937 * the position in {@code seq} following the code 1938 * point that should be returned. 1939 * @return the Unicode code point or {@code char} value before {@code index} 1940 * in {@code seq}. 1941 * @throws NullPointerException 1942 * if {@code seq} is {@code null}. 1943 * @throws IndexOutOfBoundsException 1944 * if the {@code index} is less than 1 or greater than the 1945 * length of {@code seq}. 1946 * @since 1.5 1947 */ 1948 public static int codePointBefore(char[] seq, int index) { 1949 if (seq == null) { 1950 throw new NullPointerException("seq == null"); 1951 } 1952 int len = seq.length; 1953 if (index < 1 || index > len) { 1954 throw new IndexOutOfBoundsException(); 1955 } 1956 1957 char low = seq[--index]; 1958 if (--index < 0) { 1959 return low; 1960 } 1961 char high = seq[index]; 1962 if (isSurrogatePair(high, low)) { 1963 return toCodePoint(high, low); 1964 } 1965 return low; 1966 } 1967 1968 /** 1969 * Returns the code point that precedes the {@code index} in the specified 1970 * array of character units and is not less than {@code start}. If the unit 1971 * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not 1972 * less than {@code start} and the unit at {@code index - 2} is a 1973 * high-surrogate unit, then the supplementary code point represented by the 1974 * pair is returned; otherwise the {@code char} value at {@code index - 1} 1975 * is returned. 1976 * 1977 * @param seq 1978 * the source array of {@code char} units. 1979 * @param index 1980 * the position in {@code seq} following the code point that 1981 * should be returned. 1982 * @param start 1983 * the index of the first element in {@code seq}. 1984 * @return the Unicode code point or {@code char} value before {@code index} 1985 * in {@code seq}. 1986 * @throws NullPointerException 1987 * if {@code seq} is {@code null}. 1988 * @throws IndexOutOfBoundsException 1989 * if the {@code index <= start}, {@code start < 0}, 1990 * {@code index} is greater than the length of {@code seq}, or 1991 * if {@code start} is equal or greater than the length of 1992 * {@code seq}. 1993 * @since 1.5 1994 */ 1995 public static int codePointBefore(char[] seq, int index, int start) { 1996 if (seq == null) { 1997 throw new NullPointerException("seq == null"); 1998 } 1999 int len = seq.length; 2000 if (index <= start || index > len || start < 0 || start >= len) { 2001 throw new IndexOutOfBoundsException(); 2002 } 2003 2004 char low = seq[--index]; 2005 if (--index < start) { 2006 return low; 2007 } 2008 char high = seq[index]; 2009 if (isSurrogatePair(high, low)) { 2010 return toCodePoint(high, low); 2011 } 2012 return low; 2013 } 2014 2015 /** 2016 * Converts the specified Unicode code point into a UTF-16 encoded sequence 2017 * and copies the value(s) into the char array {@code dst}, starting at 2018 * index {@code dstIndex}. 2019 * 2020 * @param codePoint 2021 * the Unicode code point to encode. 2022 * @param dst 2023 * the destination array to copy the encoded value into. 2024 * @param dstIndex 2025 * the index in {@code dst} from where to start copying. 2026 * @return the number of {@code char} value units copied into {@code dst}. 2027 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2028 * @throws NullPointerException 2029 * if {@code dst} is {@code null}. 2030 * @throws IndexOutOfBoundsException 2031 * if {@code dstIndex} is negative, greater than or equal to 2032 * {@code dst.length} or equals {@code dst.length - 1} when 2033 * {@code codePoint} is a 2034 * {@link #isSupplementaryCodePoint(int) supplementary code point}. 2035 * @since 1.5 2036 */ 2037 public static int toChars(int codePoint, char[] dst, int dstIndex) { 2038 checkValidCodePoint(codePoint); 2039 if (dst == null) { 2040 throw new NullPointerException("dst == null"); 2041 } 2042 if (dstIndex < 0 || dstIndex >= dst.length) { 2043 throw new IndexOutOfBoundsException(); 2044 } 2045 2046 if (isSupplementaryCodePoint(codePoint)) { 2047 if (dstIndex == dst.length - 1) { 2048 throw new IndexOutOfBoundsException(); 2049 } 2050 // See RFC 2781, Section 2.1 2051 // http://www.ietf.org/rfc/rfc2781.txt 2052 int cpPrime = codePoint - 0x10000; 2053 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 2054 int low = 0xDC00 | (cpPrime & 0x3FF); 2055 dst[dstIndex] = (char) high; 2056 dst[dstIndex + 1] = (char) low; 2057 return 2; 2058 } 2059 2060 dst[dstIndex] = (char) codePoint; 2061 return 1; 2062 } 2063 2064 /** 2065 * Converts the specified Unicode code point into a UTF-16 encoded sequence 2066 * and returns it as a char array. 2067 * 2068 * @param codePoint 2069 * the Unicode code point to encode. 2070 * @return the UTF-16 encoded char sequence. If {@code codePoint} is a 2071 * {@link #isSupplementaryCodePoint(int) supplementary code point}, 2072 * then the returned array contains two characters, otherwise it 2073 * contains just one character. 2074 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2075 * @since 1.5 2076 */ 2077 public static char[] toChars(int codePoint) { 2078 checkValidCodePoint(codePoint); 2079 if (isSupplementaryCodePoint(codePoint)) { 2080 int cpPrime = codePoint - 0x10000; 2081 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 2082 int low = 0xDC00 | (cpPrime & 0x3FF); 2083 return new char[] { (char) high, (char) low }; 2084 } 2085 return new char[] { (char) codePoint }; 2086 } 2087 2088 /** 2089 * Counts the number of Unicode code points in the subsequence of the 2090 * specified character sequence, as delineated by {@code beginIndex} and 2091 * {@code endIndex}. Any surrogate values with missing pair values will be 2092 * counted as one code point. 2093 * 2094 * @param seq 2095 * the {@code CharSequence} to look through. 2096 * @param beginIndex 2097 * the inclusive index to begin counting at. 2098 * @param endIndex 2099 * the exclusive index to stop counting at. 2100 * @return the number of Unicode code points. 2101 * @throws NullPointerException 2102 * if {@code seq} is {@code null}. 2103 * @throws IndexOutOfBoundsException 2104 * if {@code beginIndex < 0}, {@code beginIndex > endIndex} or 2105 * if {@code endIndex} is greater than the length of {@code seq}. 2106 * @since 1.5 2107 */ 2108 public static int codePointCount(CharSequence seq, int beginIndex, 2109 int endIndex) { 2110 if (seq == null) { 2111 throw new NullPointerException("seq == null"); 2112 } 2113 int len = seq.length(); 2114 if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) { 2115 throw new IndexOutOfBoundsException(); 2116 } 2117 2118 int result = 0; 2119 for (int i = beginIndex; i < endIndex; i++) { 2120 char c = seq.charAt(i); 2121 if (isHighSurrogate(c)) { 2122 if (++i < endIndex) { 2123 c = seq.charAt(i); 2124 if (!isLowSurrogate(c)) { 2125 result++; 2126 } 2127 } 2128 } 2129 result++; 2130 } 2131 return result; 2132 } 2133 2134 /** 2135 * Counts the number of Unicode code points in the subsequence of the 2136 * specified char array, as delineated by {@code offset} and {@code count}. 2137 * Any surrogate values with missing pair values will be counted as one code 2138 * point. 2139 * 2140 * @param seq 2141 * the char array to look through 2142 * @param offset 2143 * the inclusive index to begin counting at. 2144 * @param count 2145 * the number of {@code char} values to look through in 2146 * {@code seq}. 2147 * @return the number of Unicode code points. 2148 * @throws NullPointerException 2149 * if {@code seq} is {@code null}. 2150 * @throws IndexOutOfBoundsException 2151 * if {@code offset < 0}, {@code count < 0} or if 2152 * {@code offset + count} is greater than the length of 2153 * {@code seq}. 2154 * @since 1.5 2155 */ 2156 public static int codePointCount(char[] seq, int offset, int count) { 2157 Arrays.checkOffsetAndCount(seq.length, offset, count); 2158 int endIndex = offset + count; 2159 int result = 0; 2160 for (int i = offset; i < endIndex; i++) { 2161 char c = seq[i]; 2162 if (isHighSurrogate(c)) { 2163 if (++i < endIndex) { 2164 c = seq[i]; 2165 if (!isLowSurrogate(c)) { 2166 result++; 2167 } 2168 } 2169 } 2170 result++; 2171 } 2172 return result; 2173 } 2174 2175 /** 2176 * Determines the index in the specified character sequence that is offset 2177 * {@code codePointOffset} code points from {@code index}. 2178 * 2179 * @param seq 2180 * the character sequence to find the index in. 2181 * @param index 2182 * the start index in {@code seq}. 2183 * @param codePointOffset 2184 * the number of code points to look backwards or forwards; may 2185 * be a negative or positive value. 2186 * @return the index in {@code seq} that is {@code codePointOffset} code 2187 * points away from {@code index}. 2188 * @throws NullPointerException 2189 * if {@code seq} is {@code null}. 2190 * @throws IndexOutOfBoundsException 2191 * if {@code index < 0}, {@code index} is greater than the 2192 * length of {@code seq}, or if there are not enough values in 2193 * {@code seq} to skip {@code codePointOffset} code points 2194 * forwards or backwards (if {@code codePointOffset} is 2195 * negative) from {@code index}. 2196 * @since 1.5 2197 */ 2198 public static int offsetByCodePoints(CharSequence seq, int index, int codePointOffset) { 2199 if (seq == null) { 2200 throw new NullPointerException("seq == null"); 2201 } 2202 int len = seq.length(); 2203 if (index < 0 || index > len) { 2204 throw new IndexOutOfBoundsException(); 2205 } 2206 2207 if (codePointOffset == 0) { 2208 return index; 2209 } 2210 2211 if (codePointOffset > 0) { 2212 int codePoints = codePointOffset; 2213 int i = index; 2214 while (codePoints > 0) { 2215 codePoints--; 2216 if (i >= len) { 2217 throw new IndexOutOfBoundsException(); 2218 } 2219 if (isHighSurrogate(seq.charAt(i))) { 2220 int next = i + 1; 2221 if (next < len && isLowSurrogate(seq.charAt(next))) { 2222 i++; 2223 } 2224 } 2225 i++; 2226 } 2227 return i; 2228 } 2229 2230 int codePoints = -codePointOffset; 2231 int i = index; 2232 while (codePoints > 0) { 2233 codePoints--; 2234 i--; 2235 if (i < 0) { 2236 throw new IndexOutOfBoundsException(); 2237 } 2238 if (isLowSurrogate(seq.charAt(i))) { 2239 int prev = i - 1; 2240 if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) { 2241 i--; 2242 } 2243 } 2244 } 2245 return i; 2246 } 2247 2248 /** 2249 * Determines the index in a subsequence of the specified character array 2250 * that is offset {@code codePointOffset} code points from {@code index}. 2251 * The subsequence is delineated by {@code start} and {@code count}. 2252 * 2253 * @param seq 2254 * the character array to find the index in. 2255 * @param start 2256 * the inclusive index that marks the beginning of the 2257 * subsequence. 2258 * @param count 2259 * the number of {@code char} values to include within the 2260 * subsequence. 2261 * @param index 2262 * the start index in the subsequence of the char array. 2263 * @param codePointOffset 2264 * the number of code points to look backwards or forwards; may 2265 * be a negative or positive value. 2266 * @return the index in {@code seq} that is {@code codePointOffset} code 2267 * points away from {@code index}. 2268 * @throws NullPointerException 2269 * if {@code seq} is {@code null}. 2270 * @throws IndexOutOfBoundsException 2271 * if {@code start < 0}, {@code count < 0}, 2272 * {@code index < start}, {@code index > start + count}, 2273 * {@code start + count} is greater than the length of 2274 * {@code seq}, or if there are not enough values in 2275 * {@code seq} to skip {@code codePointOffset} code points 2276 * forward or backward (if {@code codePointOffset} is 2277 * negative) from {@code index}. 2278 * @since 1.5 2279 */ 2280 public static int offsetByCodePoints(char[] seq, int start, int count, 2281 int index, int codePointOffset) { 2282 Arrays.checkOffsetAndCount(seq.length, start, count); 2283 int end = start + count; 2284 if (index < start || index > end) { 2285 throw new IndexOutOfBoundsException(); 2286 } 2287 2288 if (codePointOffset == 0) { 2289 return index; 2290 } 2291 2292 if (codePointOffset > 0) { 2293 int codePoints = codePointOffset; 2294 int i = index; 2295 while (codePoints > 0) { 2296 codePoints--; 2297 if (i >= end) { 2298 throw new IndexOutOfBoundsException(); 2299 } 2300 if (isHighSurrogate(seq[i])) { 2301 int next = i + 1; 2302 if (next < end && isLowSurrogate(seq[next])) { 2303 i++; 2304 } 2305 } 2306 i++; 2307 } 2308 return i; 2309 } 2310 2311 int codePoints = -codePointOffset; 2312 int i = index; 2313 while (codePoints > 0) { 2314 codePoints--; 2315 i--; 2316 if (i < start) { 2317 throw new IndexOutOfBoundsException(); 2318 } 2319 if (isLowSurrogate(seq[i])) { 2320 int prev = i - 1; 2321 if (prev >= start && isHighSurrogate(seq[prev])) { 2322 i--; 2323 } 2324 } 2325 } 2326 return i; 2327 } 2328 2329 /** 2330 * Convenience method to determine the value of the specified character 2331 * {@code c} in the supplied radix. The value of {@code radix} must be 2332 * between MIN_RADIX and MAX_RADIX. 2333 * 2334 * @param c 2335 * the character to determine the value of. 2336 * @param radix 2337 * the radix. 2338 * @return the value of {@code c} in {@code radix} if {@code radix} lies 2339 * between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise. 2340 */ 2341 public static int digit(char c, int radix) { 2342 return digit((int) c, radix); 2343 } 2344 2345 /** 2346 * Convenience method to determine the value of the character 2347 * {@code codePoint} in the supplied radix. The value of {@code radix} must 2348 * be between MIN_RADIX and MAX_RADIX. 2349 * 2350 * @param codePoint 2351 * the character, including supplementary characters. 2352 * @param radix 2353 * the radix. 2354 * @return if {@code radix} lies between {@link #MIN_RADIX} and 2355 * {@link #MAX_RADIX} then the value of the character in the radix; 2356 * -1 otherwise. 2357 */ 2358 public static int digit(int codePoint, int radix) { 2359 if (radix < MIN_RADIX || radix > MAX_RADIX) { 2360 return -1; 2361 } 2362 if (codePoint < 128) { 2363 // Optimized for ASCII 2364 int result = -1; 2365 if ('0' <= codePoint && codePoint <= '9') { 2366 result = codePoint - '0'; 2367 } else if ('a' <= codePoint && codePoint <= 'z') { 2368 result = 10 + (codePoint - 'a'); 2369 } else if ('A' <= codePoint && codePoint <= 'Z') { 2370 result = 10 + (codePoint - 'A'); 2371 } 2372 return result < radix ? result : -1; 2373 } 2374 return digitImpl(codePoint, radix); 2375 } 2376 2377 private static native int digitImpl(int codePoint, int radix); 2378 2379 /** 2380 * Compares this object with the specified object and indicates if they are 2381 * equal. In order to be equal, {@code object} must be an instance of 2382 * {@code Character} and have the same char value as this object. 2383 * 2384 * @param object 2385 * the object to compare this double with. 2386 * @return {@code true} if the specified object is equal to this 2387 * {@code Character}; {@code false} otherwise. 2388 */ 2389 @Override 2390 public boolean equals(Object object) { 2391 return (object instanceof Character) && (((Character) object).value == value); 2392 } 2393 2394 /** 2395 * Returns the character which represents the specified digit in the 2396 * specified radix. The {@code radix} must be between {@code MIN_RADIX} and 2397 * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and 2398 * smaller than {@code radix}. If any of these conditions does not hold, 0 2399 * is returned. 2400 * 2401 * @param digit 2402 * the integer value. 2403 * @param radix 2404 * the radix. 2405 * @return the character which represents the {@code digit} in the 2406 * {@code radix}. 2407 */ 2408 public static char forDigit(int digit, int radix) { 2409 if (MIN_RADIX <= radix && radix <= MAX_RADIX) { 2410 if (digit >= 0 && digit < radix) { 2411 return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10); 2412 } 2413 } 2414 return 0; 2415 } 2416 2417 /** 2418 * Returns a human-readable name for the given code point, 2419 * or null if the code point is unassigned. 2420 * 2421 * <p>As a fallback mechanism this method returns strings consisting of the Unicode 2422 * block name (with underscores replaced by spaces), a single space, and the uppercase 2423 * hex value of the code point, using as few digits as necessary. 2424 * 2425 * <p>Examples: 2426 * <ul> 2427 * <li>{@code Character.getName(0)} returns "NULL". 2428 * <li>{@code Character.getName('e')} returns "LATIN SMALL LETTER E". 2429 * <li>{@code Character.getName('\u0666')} returns "ARABIC-INDIC DIGIT SIX". 2430 * <li>{@code Character.getName(0xe000)} returns "PRIVATE USE AREA E000". 2431 * </ul> 2432 * 2433 * <p>Note that the exact strings returned will vary from release to release. 2434 * 2435 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2436 * @since 1.7 2437 */ 2438 public static String getName(int codePoint) { 2439 checkValidCodePoint(codePoint); 2440 if (getType(codePoint) == Character.UNASSIGNED) { 2441 return null; 2442 } 2443 String result = getNameImpl(codePoint); 2444 if (result == null) { 2445 String blockName = Character.UnicodeBlock.of(codePoint).toString().replace('_', ' '); 2446 result = blockName + " " + IntegralToString.intToHexString(codePoint, true, 0); 2447 } 2448 return result; 2449 } 2450 2451 private static native String getNameImpl(int codePoint); 2452 2453 /** 2454 * Returns the numeric value of the specified Unicode character. 2455 * See {@link #getNumericValue(int)}. 2456 * 2457 * @param c the character 2458 * @return a non-negative numeric integer value if a numeric value for 2459 * {@code c} exists, -1 if there is no numeric value for {@code c}, 2460 * -2 if the numeric value can not be represented as an integer. 2461 */ 2462 public static int getNumericValue(char c) { 2463 return getNumericValue((int) c); 2464 } 2465 2466 /** 2467 * Gets the numeric value of the specified Unicode code point. For example, 2468 * the code point '\u216B' stands for the Roman number XII, which has the 2469 * numeric value 12. 2470 * 2471 * <p>There are two points of divergence between this method and the Unicode 2472 * specification. This method treats the letters a-z (in both upper and lower 2473 * cases, and their full-width variants) as numbers from 10 to 35. The 2474 * Unicode specification also supports the idea of code points with non-integer 2475 * numeric values; this method does not (except to the extent of returning -2 2476 * for such code points). 2477 * 2478 * @param codePoint the code point 2479 * @return a non-negative numeric integer value if a numeric value for 2480 * {@code codePoint} exists, -1 if there is no numeric value for 2481 * {@code codePoint}, -2 if the numeric value can not be 2482 * represented with an integer. 2483 */ 2484 public static int getNumericValue(int codePoint) { 2485 // This is both an optimization and papers over differences between Java and ICU. 2486 if (codePoint < 128) { 2487 if (codePoint >= '0' && codePoint <= '9') { 2488 return codePoint - '0'; 2489 } 2490 if (codePoint >= 'a' && codePoint <= 'z') { 2491 return codePoint - ('a' - 10); 2492 } 2493 if (codePoint >= 'A' && codePoint <= 'Z') { 2494 return codePoint - ('A' - 10); 2495 } 2496 return -1; 2497 } 2498 // Full-width uppercase A-Z. 2499 if (codePoint >= 0xff21 && codePoint <= 0xff3a) { 2500 return codePoint - 0xff17; 2501 } 2502 // Full-width lowercase a-z. 2503 if (codePoint >= 0xff41 && codePoint <= 0xff5a) { 2504 return codePoint - 0xff37; 2505 } 2506 return getNumericValueImpl(codePoint); 2507 } 2508 2509 private static native int getNumericValueImpl(int codePoint); 2510 2511 /** 2512 * Gets the general Unicode category of the specified character. 2513 * 2514 * @param c 2515 * the character to get the category of. 2516 * @return the Unicode category of {@code c}. 2517 */ 2518 public static int getType(char c) { 2519 return getType((int) c); 2520 } 2521 2522 /** 2523 * Gets the general Unicode category of the specified code point. 2524 * 2525 * @param codePoint 2526 * the Unicode code point to get the category of. 2527 * @return the Unicode category of {@code codePoint}. 2528 */ 2529 public static int getType(int codePoint) { 2530 int type = getTypeImpl(codePoint); 2531 // The type values returned by ICU are not RI-compatible. The RI skips the value 17. 2532 if (type <= Character.FORMAT) { 2533 return type; 2534 } 2535 return (type + 1); 2536 } 2537 2538 private static native int getTypeImpl(int codePoint); 2539 2540 /** 2541 * Gets the Unicode directionality of the specified character. 2542 * 2543 * @param c 2544 * the character to get the directionality of. 2545 * @return the Unicode directionality of {@code c}. 2546 */ 2547 public static byte getDirectionality(char c) { 2548 return getDirectionality((int)c); 2549 } 2550 2551 /** 2552 * Gets the Unicode directionality of the specified character. 2553 * 2554 * @param codePoint 2555 * the Unicode code point to get the directionality of. 2556 * @return the Unicode directionality of {@code codePoint}. 2557 */ 2558 public static byte getDirectionality(int codePoint) { 2559 if (getType(codePoint) == Character.UNASSIGNED) { 2560 return Character.DIRECTIONALITY_UNDEFINED; 2561 } 2562 2563 byte directionality = getDirectionalityImpl(codePoint); 2564 if (directionality == -1) { 2565 return -1; 2566 } 2567 return DIRECTIONALITY[directionality]; 2568 } 2569 2570 private static native byte getDirectionalityImpl(int codePoint); 2571 2572 /** 2573 * Indicates whether the specified character is mirrored. 2574 * 2575 * @param c 2576 * the character to check. 2577 * @return {@code true} if {@code c} is mirrored; {@code false} 2578 * otherwise. 2579 */ 2580 public static boolean isMirrored(char c) { 2581 return isMirrored((int) c); 2582 } 2583 2584 /** 2585 * Indicates whether the specified code point is mirrored. 2586 * 2587 * @param codePoint 2588 * the code point to check. 2589 * @return {@code true} if {@code codePoint} is mirrored, {@code false} 2590 * otherwise. 2591 */ 2592 public static boolean isMirrored(int codePoint) { 2593 return isMirroredImpl(codePoint); 2594 } 2595 2596 private static native boolean isMirroredImpl(int codePoint); 2597 2598 @Override 2599 public int hashCode() { 2600 return value; 2601 } 2602 2603 /** 2604 * Returns the high surrogate for the given code point. The result is meaningless if 2605 * the given code point is not a supplementary character. 2606 * @since 1.7 2607 */ 2608 public static char highSurrogate(int codePoint) { 2609 return (char) ((codePoint >> 10) + 0xd7c0); 2610 } 2611 2612 /** 2613 * Returns the low surrogate for the given code point. The result is meaningless if 2614 * the given code point is not a supplementary character. 2615 * @since 1.7 2616 */ 2617 public static char lowSurrogate(int codePoint) { 2618 return (char) ((codePoint & 0x3ff) | 0xdc00); 2619 } 2620 2621 /** 2622 * Returns true if the given code point is in the Basic Multilingual Plane (BMP). 2623 * Such code points can be represented by a single {@code char}. 2624 * @since 1.7 2625 */ 2626 public static boolean isBmpCodePoint(int codePoint) { 2627 return codePoint >= Character.MIN_VALUE && codePoint <= Character.MAX_VALUE; 2628 } 2629 2630 /** 2631 * Indicates whether the specified character is defined in the Unicode 2632 * specification. 2633 * 2634 * @param c 2635 * the character to check. 2636 * @return {@code true} if the general Unicode category of the character is 2637 * not {@code UNASSIGNED}; {@code false} otherwise. 2638 */ 2639 public static boolean isDefined(char c) { 2640 return isDefinedImpl(c); 2641 } 2642 2643 /** 2644 * Indicates whether the specified code point is defined in the Unicode 2645 * specification. 2646 * 2647 * @param codePoint 2648 * the code point to check. 2649 * @return {@code true} if the general Unicode category of the code point is 2650 * not {@code UNASSIGNED}; {@code false} otherwise. 2651 */ 2652 public static boolean isDefined(int codePoint) { 2653 return isDefinedImpl(codePoint); 2654 } 2655 2656 private static native boolean isDefinedImpl(int codePoint); 2657 2658 /** 2659 * Indicates whether the specified character is a digit. 2660 * 2661 * @param c 2662 * the character to check. 2663 * @return {@code true} if {@code c} is a digit; {@code false} 2664 * otherwise. 2665 */ 2666 public static boolean isDigit(char c) { 2667 return isDigit((int) c); 2668 } 2669 2670 /** 2671 * Indicates whether the specified code point is a digit. 2672 * 2673 * @param codePoint 2674 * the code point to check. 2675 * @return {@code true} if {@code codePoint} is a digit; {@code false} 2676 * otherwise. 2677 */ 2678 public static boolean isDigit(int codePoint) { 2679 // Optimized case for ASCII 2680 if ('0' <= codePoint && codePoint <= '9') { 2681 return true; 2682 } 2683 if (codePoint < 1632) { 2684 return false; 2685 } 2686 return isDigitImpl(codePoint); 2687 } 2688 2689 private static native boolean isDigitImpl(int codePoint); 2690 2691 /** 2692 * Indicates whether the specified character is ignorable in a Java or 2693 * Unicode identifier. 2694 * 2695 * @param c 2696 * the character to check. 2697 * @return {@code true} if {@code c} is ignorable; {@code false} otherwise. 2698 */ 2699 public static boolean isIdentifierIgnorable(char c) { 2700 return isIdentifierIgnorable((int) c); 2701 } 2702 2703 /** 2704 * Indicates whether the specified code point is ignorable in a Java or 2705 * Unicode identifier. 2706 * 2707 * @param codePoint 2708 * the code point to check. 2709 * @return {@code true} if {@code codePoint} is ignorable; {@code false} 2710 * otherwise. 2711 */ 2712 public static boolean isIdentifierIgnorable(int codePoint) { 2713 // This is both an optimization and papers over differences between Java and ICU. 2714 if (codePoint < 0x600) { 2715 return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) || 2716 (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad); 2717 } 2718 return isIdentifierIgnorableImpl(codePoint); 2719 } 2720 2721 private static native boolean isIdentifierIgnorableImpl(int codePoint); 2722 2723 /** 2724 * Indicates whether the specified character is an ISO control character. 2725 * 2726 * @param c 2727 * the character to check. 2728 * @return {@code true} if {@code c} is an ISO control character; 2729 * {@code false} otherwise. 2730 */ 2731 public static boolean isISOControl(char c) { 2732 return isISOControl((int) c); 2733 } 2734 2735 /** 2736 * Indicates whether the specified code point is an ISO control character. 2737 * 2738 * @param c 2739 * the code point to check. 2740 * @return {@code true} if {@code c} is an ISO control character; 2741 * {@code false} otherwise. 2742 */ 2743 public static boolean isISOControl(int c) { 2744 return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f); 2745 } 2746 2747 /** 2748 * Indicates whether the specified character is a valid part of a Java 2749 * identifier other than the first character. 2750 * 2751 * @param c 2752 * the character to check. 2753 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2754 * {@code false} otherwise. 2755 */ 2756 public static boolean isJavaIdentifierPart(char c) { 2757 return isJavaIdentifierPart((int) c); 2758 } 2759 2760 /** 2761 * Indicates whether the specified code point is a valid part of a Java 2762 * identifier other than the first character. 2763 * 2764 * @param codePoint 2765 * the code point to check. 2766 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2767 * {@code false} otherwise. 2768 */ 2769 public static boolean isJavaIdentifierPart(int codePoint) { 2770 // Use precomputed bitmasks to optimize the ASCII range. 2771 if (codePoint < 64) { 2772 return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0; 2773 } else if (codePoint < 128) { 2774 return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 2775 } 2776 int type = getType(codePoint); 2777 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2778 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 2779 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2780 || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK 2781 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) 2782 || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT; 2783 } 2784 2785 /** 2786 * Indicates whether the specified character is a valid first character for 2787 * a Java identifier. 2788 * 2789 * @param c 2790 * the character to check. 2791 * @return {@code true} if {@code c} is a valid first character of a Java 2792 * identifier; {@code false} otherwise. 2793 */ 2794 public static boolean isJavaIdentifierStart(char c) { 2795 return isJavaIdentifierStart((int) c); 2796 } 2797 2798 /** 2799 * Indicates whether the specified code point is a valid first character for 2800 * a Java identifier. 2801 * 2802 * @param codePoint 2803 * the code point to check. 2804 * @return {@code true} if {@code codePoint} is a valid start of a Java 2805 * identifier; {@code false} otherwise. 2806 */ 2807 public static boolean isJavaIdentifierStart(int codePoint) { 2808 // Use precomputed bitmasks to optimize the ASCII range. 2809 if (codePoint < 64) { 2810 return (codePoint == '$'); // There's only one character in this range. 2811 } else if (codePoint < 128) { 2812 return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 2813 } 2814 int type = getType(codePoint); 2815 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL 2816 || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER; 2817 } 2818 2819 /** 2820 * Indicates whether the specified character is a Java letter. 2821 * 2822 * @param c 2823 * the character to check. 2824 * @return {@code true} if {@code c} is a Java letter; {@code false} 2825 * otherwise. 2826 * @deprecated Use {@link #isJavaIdentifierStart(char)} instead. 2827 */ 2828 @Deprecated 2829 public static boolean isJavaLetter(char c) { 2830 return isJavaIdentifierStart(c); 2831 } 2832 2833 /** 2834 * Indicates whether the specified character is a Java letter or digit 2835 * character. 2836 * 2837 * @param c 2838 * the character to check. 2839 * @return {@code true} if {@code c} is a Java letter or digit; 2840 * {@code false} otherwise. 2841 * @deprecated Use {@link #isJavaIdentifierPart(char)} instead. 2842 */ 2843 @Deprecated 2844 public static boolean isJavaLetterOrDigit(char c) { 2845 return isJavaIdentifierPart(c); 2846 } 2847 2848 /** 2849 * Indicates whether the specified character is a letter. 2850 * 2851 * @param c 2852 * the character to check. 2853 * @return {@code true} if {@code c} is a letter; {@code false} otherwise. 2854 */ 2855 public static boolean isLetter(char c) { 2856 return isLetter((int) c); 2857 } 2858 2859 /** 2860 * Indicates whether the specified code point is a letter. 2861 * 2862 * @param codePoint 2863 * the code point to check. 2864 * @return {@code true} if {@code codePoint} is a letter; {@code false} 2865 * otherwise. 2866 */ 2867 public static boolean isLetter(int codePoint) { 2868 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 2869 return true; 2870 } 2871 if (codePoint < 128) { 2872 return false; 2873 } 2874 return isLetterImpl(codePoint); 2875 } 2876 2877 private static native boolean isLetterImpl(int codePoint); 2878 2879 /** 2880 * Indicates whether the specified character is a letter or a digit. 2881 * 2882 * @param c 2883 * the character to check. 2884 * @return {@code true} if {@code c} is a letter or a digit; {@code false} 2885 * otherwise. 2886 */ 2887 public static boolean isLetterOrDigit(char c) { 2888 return isLetterOrDigit((int) c); 2889 } 2890 2891 /** 2892 * Indicates whether the specified code point is a letter or a digit. 2893 * 2894 * @param codePoint 2895 * the code point to check. 2896 * @return {@code true} if {@code codePoint} is a letter or a digit; 2897 * {@code false} otherwise. 2898 */ 2899 public static boolean isLetterOrDigit(int codePoint) { 2900 // Optimized case for ASCII 2901 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 2902 return true; 2903 } 2904 if ('0' <= codePoint && codePoint <= '9') { 2905 return true; 2906 } 2907 if (codePoint < 128) { 2908 return false; 2909 } 2910 return isLetterOrDigitImpl(codePoint); 2911 } 2912 2913 private static native boolean isLetterOrDigitImpl(int codePoint); 2914 2915 /** 2916 * Indicates whether the specified character is a lower case letter. 2917 * 2918 * @param c 2919 * the character to check. 2920 * @return {@code true} if {@code c} is a lower case letter; {@code false} 2921 * otherwise. 2922 */ 2923 public static boolean isLowerCase(char c) { 2924 return isLowerCase((int) c); 2925 } 2926 2927 /** 2928 * Indicates whether the specified code point is a lower case letter. 2929 * 2930 * @param codePoint 2931 * the code point to check. 2932 * @return {@code true} if {@code codePoint} is a lower case letter; 2933 * {@code false} otherwise. 2934 */ 2935 public static boolean isLowerCase(int codePoint) { 2936 // Optimized case for ASCII 2937 if ('a' <= codePoint && codePoint <= 'z') { 2938 return true; 2939 } 2940 if (codePoint < 128) { 2941 return false; 2942 } 2943 return isLowerCaseImpl(codePoint); 2944 } 2945 2946 private static native boolean isLowerCaseImpl(int codePoint); 2947 2948 /** 2949 * Indicates whether the specified character is a Java space. 2950 * 2951 * @param c 2952 * the character to check. 2953 * @return {@code true} if {@code c} is a Java space; {@code false} 2954 * otherwise. 2955 * @deprecated Use {@link #isWhitespace(char)} instead. 2956 */ 2957 @Deprecated 2958 public static boolean isSpace(char c) { 2959 return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' '; 2960 } 2961 2962 /** 2963 * Indicates whether the specified character is a Unicode space character. 2964 * That is, if it is a member of one of the Unicode categories Space 2965 * Separator, Line Separator, or Paragraph Separator. 2966 * 2967 * @param c 2968 * the character to check. 2969 * @return {@code true} if {@code c} is a Unicode space character, 2970 * {@code false} otherwise. 2971 */ 2972 public static boolean isSpaceChar(char c) { 2973 return isSpaceChar((int) c); 2974 } 2975 2976 /** 2977 * Indicates whether the specified code point is a Unicode space character. 2978 * That is, if it is a member of one of the Unicode categories Space 2979 * Separator, Line Separator, or Paragraph Separator. 2980 * 2981 * @param codePoint 2982 * the code point to check. 2983 * @return {@code true} if {@code codePoint} is a Unicode space character, 2984 * {@code false} otherwise. 2985 */ 2986 public static boolean isSpaceChar(int codePoint) { 2987 if (codePoint == 0x20 || codePoint == 0xa0 || codePoint == 0x1680) { 2988 return true; 2989 } 2990 if (codePoint < 0x2000) { 2991 return false; 2992 } 2993 if (codePoint <= 0xffff) { 2994 return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 || 2995 codePoint == 0x202f || codePoint == 0x3000; 2996 } 2997 return isSpaceCharImpl(codePoint); 2998 } 2999 3000 private static native boolean isSpaceCharImpl(int codePoint); 3001 3002 /** 3003 * Indicates whether the specified character is a titlecase character. 3004 * 3005 * @param c 3006 * the character to check. 3007 * @return {@code true} if {@code c} is a titlecase character, {@code false} 3008 * otherwise. 3009 */ 3010 public static boolean isTitleCase(char c) { 3011 return isTitleCaseImpl(c); 3012 } 3013 3014 /** 3015 * Indicates whether the specified code point is a titlecase character. 3016 * 3017 * @param codePoint 3018 * the code point to check. 3019 * @return {@code true} if {@code codePoint} is a titlecase character, 3020 * {@code false} otherwise. 3021 */ 3022 public static boolean isTitleCase(int codePoint) { 3023 return isTitleCaseImpl(codePoint); 3024 } 3025 3026 private static native boolean isTitleCaseImpl(int codePoint); 3027 3028 /** 3029 * Indicates whether the specified character is valid as part of a Unicode 3030 * identifier other than the first character. 3031 * 3032 * @param c 3033 * the character to check. 3034 * @return {@code true} if {@code c} is valid as part of a Unicode 3035 * identifier; {@code false} otherwise. 3036 */ 3037 public static boolean isUnicodeIdentifierPart(char c) { 3038 return isUnicodeIdentifierPartImpl(c); 3039 } 3040 3041 /** 3042 * Indicates whether the specified code point is valid as part of a Unicode 3043 * identifier other than the first character. 3044 * 3045 * @param codePoint 3046 * the code point to check. 3047 * @return {@code true} if {@code codePoint} is valid as part of a Unicode 3048 * identifier; {@code false} otherwise. 3049 */ 3050 public static boolean isUnicodeIdentifierPart(int codePoint) { 3051 return isUnicodeIdentifierPartImpl(codePoint); 3052 } 3053 3054 private static native boolean isUnicodeIdentifierPartImpl(int codePoint); 3055 3056 /** 3057 * Indicates whether the specified character is a valid initial character 3058 * for a Unicode identifier. 3059 * 3060 * @param c 3061 * the character to check. 3062 * @return {@code true} if {@code c} is a valid first character for a 3063 * Unicode identifier; {@code false} otherwise. 3064 */ 3065 public static boolean isUnicodeIdentifierStart(char c) { 3066 return isUnicodeIdentifierStartImpl(c); 3067 } 3068 3069 /** 3070 * Indicates whether the specified code point is a valid initial character 3071 * for a Unicode identifier. 3072 * 3073 * @param codePoint 3074 * the code point to check. 3075 * @return {@code true} if {@code codePoint} is a valid first character for 3076 * a Unicode identifier; {@code false} otherwise. 3077 */ 3078 public static boolean isUnicodeIdentifierStart(int codePoint) { 3079 return isUnicodeIdentifierStartImpl(codePoint); 3080 } 3081 3082 private static native boolean isUnicodeIdentifierStartImpl(int codePoint); 3083 3084 /** 3085 * Indicates whether the specified character is an upper case letter. 3086 * 3087 * @param c 3088 * the character to check. 3089 * @return {@code true} if {@code c} is a upper case letter; {@code false} 3090 * otherwise. 3091 */ 3092 public static boolean isUpperCase(char c) { 3093 return isUpperCase((int) c); 3094 } 3095 3096 /** 3097 * Indicates whether the specified code point is an upper case letter. 3098 * 3099 * @param codePoint 3100 * the code point to check. 3101 * @return {@code true} if {@code codePoint} is a upper case letter; 3102 * {@code false} otherwise. 3103 */ 3104 public static boolean isUpperCase(int codePoint) { 3105 // Optimized case for ASCII 3106 if ('A' <= codePoint && codePoint <= 'Z') { 3107 return true; 3108 } 3109 if (codePoint < 128) { 3110 return false; 3111 } 3112 return isUpperCaseImpl(codePoint); 3113 } 3114 3115 private static native boolean isUpperCaseImpl(int codePoint); 3116 3117 /** 3118 * Indicates whether the specified character is a whitespace character in 3119 * Java. 3120 * 3121 * @param c 3122 * the character to check. 3123 * @return {@code true} if the supplied {@code c} is a whitespace character 3124 * in Java; {@code false} otherwise. 3125 */ 3126 public static boolean isWhitespace(char c) { 3127 return isWhitespace((int) c); 3128 } 3129 3130 /** 3131 * Indicates whether the specified code point is a whitespace character in 3132 * Java. 3133 * 3134 * @param codePoint 3135 * the code point to check. 3136 * @return {@code true} if the supplied {@code c} is a whitespace character 3137 * in Java; {@code false} otherwise. 3138 */ 3139 public static boolean isWhitespace(int codePoint) { 3140 // This is both an optimization and papers over differences between Java and ICU. 3141 if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x9 && codePoint <= 0xd)) { 3142 return true; 3143 } 3144 if (codePoint == 0x1680) { 3145 return true; 3146 } 3147 if (codePoint < 0x2000 || codePoint == 0x2007) { 3148 return false; 3149 } 3150 if (codePoint <= 0xffff) { 3151 return codePoint <= 0x200b || codePoint == 0x2028 || codePoint == 0x2029 || 3152 codePoint == 0x3000; 3153 } 3154 return isWhitespaceImpl(codePoint); 3155 } 3156 3157 private static native boolean isWhitespaceImpl(int codePoint); 3158 3159 /** 3160 * Reverses the order of the first and second byte in the specified 3161 * character. 3162 * 3163 * @param c 3164 * the character to reverse. 3165 * @return the character with reordered bytes. 3166 */ 3167 public static char reverseBytes(char c) { 3168 return (char)((c<<8) | (c>>8)); 3169 } 3170 3171 /** 3172 * Returns the lower case equivalent for the specified character if the 3173 * character is an upper case letter. Otherwise, the specified character is 3174 * returned unchanged. 3175 * 3176 * @param c 3177 * the character 3178 * @return if {@code c} is an upper case character then its lower case 3179 * counterpart, otherwise just {@code c}. 3180 */ 3181 public static char toLowerCase(char c) { 3182 return (char) toLowerCase((int) c); 3183 } 3184 3185 /** 3186 * Returns the lower case equivalent for the specified code point if it is 3187 * an upper case letter. Otherwise, the specified code point is returned 3188 * unchanged. 3189 * 3190 * @param codePoint 3191 * the code point to check. 3192 * @return if {@code codePoint} is an upper case character then its lower 3193 * case counterpart, otherwise just {@code codePoint}. 3194 */ 3195 public static int toLowerCase(int codePoint) { 3196 // Optimized case for ASCII 3197 if ('A' <= codePoint && codePoint <= 'Z') { 3198 return (char) (codePoint + ('a' - 'A')); 3199 } 3200 if (codePoint < 192) { 3201 return codePoint; 3202 } 3203 return toLowerCaseImpl(codePoint); 3204 } 3205 3206 private static native int toLowerCaseImpl(int codePoint); 3207 3208 @Override 3209 public String toString() { 3210 return String.valueOf(value); 3211 } 3212 3213 /** 3214 * Converts the specified character to its string representation. 3215 * 3216 * @param value 3217 * the character to convert. 3218 * @return the character converted to a string. 3219 */ 3220 public static String toString(char value) { 3221 return String.valueOf(value); 3222 } 3223 3224 /** 3225 * Returns the title case equivalent for the specified character if it 3226 * exists. Otherwise, the specified character is returned unchanged. 3227 * 3228 * @param c 3229 * the character to convert. 3230 * @return the title case equivalent of {@code c} if it exists, otherwise 3231 * {@code c}. 3232 */ 3233 public static char toTitleCase(char c) { 3234 return (char) toTitleCaseImpl(c); 3235 } 3236 3237 /** 3238 * Returns the title case equivalent for the specified code point if it 3239 * exists. Otherwise, the specified code point is returned unchanged. 3240 * 3241 * @param codePoint 3242 * the code point to convert. 3243 * @return the title case equivalent of {@code codePoint} if it exists, 3244 * otherwise {@code codePoint}. 3245 */ 3246 public static int toTitleCase(int codePoint) { 3247 return toTitleCaseImpl(codePoint); 3248 } 3249 3250 private static native int toTitleCaseImpl(int codePoint); 3251 3252 /** 3253 * Returns the upper case equivalent for the specified character if the 3254 * character is a lower case letter. Otherwise, the specified character is 3255 * returned unchanged. 3256 * 3257 * @param c 3258 * the character to convert. 3259 * @return if {@code c} is a lower case character then its upper case 3260 * counterpart, otherwise just {@code c}. 3261 */ 3262 public static char toUpperCase(char c) { 3263 return (char) toUpperCase((int) c); 3264 } 3265 3266 /** 3267 * Returns the upper case equivalent for the specified code point if the 3268 * code point is a lower case letter. Otherwise, the specified code point is 3269 * returned unchanged. 3270 * 3271 * @param codePoint 3272 * the code point to convert. 3273 * @return if {@code codePoint} is a lower case character then its upper 3274 * case counterpart, otherwise just {@code codePoint}. 3275 */ 3276 public static int toUpperCase(int codePoint) { 3277 // Optimized case for ASCII 3278 if ('a' <= codePoint && codePoint <= 'z') { 3279 return (char) (codePoint - ('a' - 'A')); 3280 } 3281 if (codePoint < 181) { 3282 return codePoint; 3283 } 3284 return toUpperCaseImpl(codePoint); 3285 } 3286 3287 private static native int toUpperCaseImpl(int codePoint); 3288} 3289