1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.lang; 19 20import java.io.Serializable; 21import java.util.Arrays; 22 23/** 24 * The wrapper for the primitive type {@code char}. This class also provides a 25 * number of utility methods for working with characters. 26 * 27 * <p>Character data is kept up to date as Unicode evolves. 28 * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of 29 * the {@code Locale} documentation for details of the Unicode versions implemented by current 30 * and historical Android releases. 31 * 32 * <p>The Unicode specification, character tables, and other information are available at 33 * <a href="http://www.unicode.org/">http://www.unicode.org/</a>. 34 * 35 * <p>Unicode characters are referred to as <i>code points</i>. The range of valid 36 * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i> 37 * is the code point range U+0000 to U+FFFF. Characters above the BMP are 38 * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16 39 * encoding and {@code char} pairs are used to represent code points in the 40 * supplementary range. A pair of {@code char} values that represent a 41 * supplementary character are made up of a <i>high surrogate</i> with a value 42 * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of 43 * 0xDC00 to 0xDFFF. 44 * <p> 45 * On the Java platform a {@code char} value represents either a single BMP code 46 * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type 47 * is used to represent all Unicode code points. 48 * 49 * <a name="unicode_categories"><h3>Unicode categories</h3></a> 50 * <p>Here's a list of the Unicode character categories and the corresponding Java constant, 51 * grouped semantically to provide a convenient overview. This table is also useful in 52 * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}. 53 * <span class="datatable"> 54 * <style type="text/css"> 55 * .datatable td { padding-right: 20px; } 56 * </style> 57 * <p><table> 58 * <tr> <td> Cn </td> <td> Unassigned </td> <td>{@link #UNASSIGNED}</td> </tr> 59 * <tr> <td> Cc </td> <td> Control </td> <td>{@link #CONTROL}</td> </tr> 60 * <tr> <td> Cf </td> <td> Format </td> <td>{@link #FORMAT}</td> </tr> 61 * <tr> <td> Co </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr> 62 * <tr> <td> Cs </td> <td> Surrogate </td> <td>{@link #SURROGATE}</td> </tr> 63 * <tr> <td><br></td> </tr> 64 * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr> 65 * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr> 66 * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr> 67 * <tr> <td> Lm </td> <td> Modifier letter </td> <td>{@link #MODIFIER_LETTER}</td> </tr> 68 * <tr> <td> Lo </td> <td> Other letter </td> <td>{@link #OTHER_LETTER}</td> </tr> 69 * <tr> <td><br></td> </tr> 70 * <tr> <td> Mn </td> <td> Non-spacing mark </td> <td>{@link #NON_SPACING_MARK}</td> </tr> 71 * <tr> <td> Me </td> <td> Enclosing mark </td> <td>{@link #ENCLOSING_MARK}</td> </tr> 72 * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr> 73 * <tr> <td><br></td> </tr> 74 * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr> 75 * <tr> <td> Nl </td> <td> Letter number </td> <td>{@link #LETTER_NUMBER}</td> </tr> 76 * <tr> <td> No </td> <td> Other number </td> <td>{@link #OTHER_NUMBER}</td> </tr> 77 * <tr> <td><br></td> </tr> 78 * <tr> <td> Pd </td> <td> Dash punctuation </td> <td>{@link #DASH_PUNCTUATION}</td> </tr> 79 * <tr> <td> Ps </td> <td> Start punctuation </td> <td>{@link #START_PUNCTUATION}</td> </tr> 80 * <tr> <td> Pe </td> <td> End punctuation </td> <td>{@link #END_PUNCTUATION}</td> </tr> 81 * <tr> <td> Pc </td> <td> Connector punctuation </td> <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr> 82 * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr> 83 * <tr> <td> Pf </td> <td> Final quote punctuation </td> <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr> 84 * <tr> <td> Po </td> <td> Other punctuation </td> <td>{@link #OTHER_PUNCTUATION}</td> </tr> 85 * <tr> <td><br></td> </tr> 86 * <tr> <td> Sm </td> <td> Math symbol </td> <td>{@link #MATH_SYMBOL}</td> </tr> 87 * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr> 88 * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr> 89 * <tr> <td> So </td> <td> Other symbol </td> <td>{@link #OTHER_SYMBOL}</td> </tr> 90 * <tr> <td><br></td> </tr> 91 * <tr> <td> Zs </td> <td> Space separator </td> <td>{@link #SPACE_SEPARATOR}</td> </tr> 92 * <tr> <td> Zl </td> <td> Line separator </td> <td>{@link #LINE_SEPARATOR}</td> </tr> 93 * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr> 94 * </table> 95 * </span> 96 * 97 * @since 1.0 98 */ 99@FindBugsSuppressWarnings("DM_NUMBER_CTOR") 100public final class Character implements Serializable, Comparable<Character> { 101 private static final long serialVersionUID = 3786198910865385080L; 102 103 private final char value; 104 105 /** 106 * The minimum {@code Character} value. 107 */ 108 public static final char MIN_VALUE = '\u0000'; 109 110 /** 111 * The maximum {@code Character} value. 112 */ 113 public static final char MAX_VALUE = '\uffff'; 114 115 /** 116 * The minimum radix used for conversions between characters and integers. 117 */ 118 public static final int MIN_RADIX = 2; 119 120 /** 121 * The maximum radix used for conversions between characters and integers. 122 */ 123 public static final int MAX_RADIX = 36; 124 125 /** 126 * The {@link Class} object that represents the primitive type {@code char}. 127 */ 128 @SuppressWarnings("unchecked") 129 public static final Class<Character> TYPE 130 = (Class<Character>) char[].class.getComponentType(); 131 // Note: Character.TYPE can't be set to "char.class", since *that* is 132 // defined to be "java.lang.Character.TYPE"; 133 134 /** 135 * Unicode category constant Cn. 136 */ 137 public static final byte UNASSIGNED = 0; 138 139 /** 140 * Unicode category constant Lu. 141 */ 142 public static final byte UPPERCASE_LETTER = 1; 143 144 /** 145 * Unicode category constant Ll. 146 */ 147 public static final byte LOWERCASE_LETTER = 2; 148 149 /** 150 * Unicode category constant Lt. 151 */ 152 public static final byte TITLECASE_LETTER = 3; 153 154 /** 155 * Unicode category constant Lm. 156 */ 157 public static final byte MODIFIER_LETTER = 4; 158 159 /** 160 * Unicode category constant Lo. 161 */ 162 public static final byte OTHER_LETTER = 5; 163 164 /** 165 * Unicode category constant Mn. 166 */ 167 public static final byte NON_SPACING_MARK = 6; 168 169 /** 170 * Unicode category constant Me. 171 */ 172 public static final byte ENCLOSING_MARK = 7; 173 174 /** 175 * Unicode category constant Mc. 176 */ 177 public static final byte COMBINING_SPACING_MARK = 8; 178 179 /** 180 * Unicode category constant Nd. 181 */ 182 public static final byte DECIMAL_DIGIT_NUMBER = 9; 183 184 /** 185 * Unicode category constant Nl. 186 */ 187 public static final byte LETTER_NUMBER = 10; 188 189 /** 190 * Unicode category constant No. 191 */ 192 public static final byte OTHER_NUMBER = 11; 193 194 /** 195 * Unicode category constant Zs. 196 */ 197 public static final byte SPACE_SEPARATOR = 12; 198 199 /** 200 * Unicode category constant Zl. 201 */ 202 public static final byte LINE_SEPARATOR = 13; 203 204 /** 205 * Unicode category constant Zp. 206 */ 207 public static final byte PARAGRAPH_SEPARATOR = 14; 208 209 /** 210 * Unicode category constant Cc. 211 */ 212 public static final byte CONTROL = 15; 213 214 /** 215 * Unicode category constant Cf. 216 */ 217 public static final byte FORMAT = 16; 218 219 /** 220 * Unicode category constant Co. 221 */ 222 public static final byte PRIVATE_USE = 18; 223 224 /** 225 * Unicode category constant Cs. 226 */ 227 public static final byte SURROGATE = 19; 228 229 /** 230 * Unicode category constant Pd. 231 */ 232 public static final byte DASH_PUNCTUATION = 20; 233 234 /** 235 * Unicode category constant Ps. 236 */ 237 public static final byte START_PUNCTUATION = 21; 238 239 /** 240 * Unicode category constant Pe. 241 */ 242 public static final byte END_PUNCTUATION = 22; 243 244 /** 245 * Unicode category constant Pc. 246 */ 247 public static final byte CONNECTOR_PUNCTUATION = 23; 248 249 /** 250 * Unicode category constant Po. 251 */ 252 public static final byte OTHER_PUNCTUATION = 24; 253 254 /** 255 * Unicode category constant Sm. 256 */ 257 public static final byte MATH_SYMBOL = 25; 258 259 /** 260 * Unicode category constant Sc. 261 */ 262 public static final byte CURRENCY_SYMBOL = 26; 263 264 /** 265 * Unicode category constant Sk. 266 */ 267 public static final byte MODIFIER_SYMBOL = 27; 268 269 /** 270 * Unicode category constant So. 271 */ 272 public static final byte OTHER_SYMBOL = 28; 273 274 /** 275 * Unicode category constant Pi. 276 * 277 * @since 1.4 278 */ 279 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 280 281 /** 282 * Unicode category constant Pf. 283 * 284 * @since 1.4 285 */ 286 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 287 288 /** 289 * Unicode bidirectional constant. 290 * 291 * @since 1.4 292 */ 293 public static final byte DIRECTIONALITY_UNDEFINED = -1; 294 295 /** 296 * Unicode bidirectional constant L. 297 * 298 * @since 1.4 299 */ 300 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 301 302 /** 303 * Unicode bidirectional constant R. 304 * 305 * @since 1.4 306 */ 307 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 308 309 /** 310 * Unicode bidirectional constant AL. 311 * 312 * @since 1.4 313 */ 314 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 315 316 /** 317 * Unicode bidirectional constant EN. 318 * 319 * @since 1.4 320 */ 321 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 322 323 /** 324 * Unicode bidirectional constant ES. 325 * 326 * @since 1.4 327 */ 328 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 329 330 /** 331 * Unicode bidirectional constant ET. 332 * 333 * @since 1.4 334 */ 335 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 336 337 /** 338 * Unicode bidirectional constant AN. 339 * 340 * @since 1.4 341 */ 342 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 343 344 /** 345 * Unicode bidirectional constant CS. 346 * 347 * @since 1.4 348 */ 349 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 350 351 /** 352 * Unicode bidirectional constant NSM. 353 * 354 * @since 1.4 355 */ 356 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 357 358 /** 359 * Unicode bidirectional constant BN. 360 * 361 * @since 1.4 362 */ 363 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 364 365 /** 366 * Unicode bidirectional constant B. 367 * 368 * @since 1.4 369 */ 370 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 371 372 /** 373 * Unicode bidirectional constant S. 374 * 375 * @since 1.4 376 */ 377 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 378 379 /** 380 * Unicode bidirectional constant WS. 381 * 382 * @since 1.4 383 */ 384 public static final byte DIRECTIONALITY_WHITESPACE = 12; 385 386 /** 387 * Unicode bidirectional constant ON. 388 * 389 * @since 1.4 390 */ 391 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 392 393 /** 394 * Unicode bidirectional constant LRE. 395 * 396 * @since 1.4 397 */ 398 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 399 400 /** 401 * Unicode bidirectional constant LRO. 402 * 403 * @since 1.4 404 */ 405 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 406 407 /** 408 * Unicode bidirectional constant RLE. 409 * 410 * @since 1.4 411 */ 412 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 413 414 /** 415 * Unicode bidirectional constant RLO. 416 * 417 * @since 1.4 418 */ 419 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 420 421 /** 422 * Unicode bidirectional constant PDF. 423 * 424 * @since 1.4 425 */ 426 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 427 428 /** 429 * The minimum value of a high surrogate or leading surrogate unit in UTF-16 430 * encoding, {@code '\uD800'}. 431 * 432 * @since 1.5 433 */ 434 public static final char MIN_HIGH_SURROGATE = '\uD800'; 435 436 /** 437 * The maximum value of a high surrogate or leading surrogate unit in UTF-16 438 * encoding, {@code '\uDBFF'}. 439 * 440 * @since 1.5 441 */ 442 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 443 444 /** 445 * The minimum value of a low surrogate or trailing surrogate unit in UTF-16 446 * encoding, {@code '\uDC00'}. 447 * 448 * @since 1.5 449 */ 450 public static final char MIN_LOW_SURROGATE = '\uDC00'; 451 452 /** 453 * The maximum value of a low surrogate or trailing surrogate unit in UTF-16 454 * encoding, {@code '\uDFFF'}. 455 * 456 * @since 1.5 457 */ 458 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 459 460 /** 461 * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}. 462 * 463 * @since 1.5 464 */ 465 public static final char MIN_SURROGATE = '\uD800'; 466 467 /** 468 * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}. 469 * 470 * @since 1.5 471 */ 472 public static final char MAX_SURROGATE = '\uDFFF'; 473 474 /** 475 * The minimum value of a supplementary code point, {@code U+010000}. 476 * 477 * @since 1.5 478 */ 479 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; 480 481 /** 482 * The minimum code point value, {@code U+0000}. 483 * 484 * @since 1.5 485 */ 486 public static final int MIN_CODE_POINT = 0x000000; 487 488 /** 489 * The maximum code point value, {@code U+10FFFF}. 490 * 491 * @since 1.5 492 */ 493 public static final int MAX_CODE_POINT = 0x10FFFF; 494 495 /** 496 * The number of bits required to represent a {@code Character} value 497 * unsigned form. 498 * 499 * @since 1.5 500 */ 501 public static final int SIZE = 16; 502 503 private static final byte[] DIRECTIONALITY = new byte[] { 504 DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT, 505 DIRECTIONALITY_EUROPEAN_NUMBER, 506 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, 507 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, 508 DIRECTIONALITY_ARABIC_NUMBER, 509 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, 510 DIRECTIONALITY_PARAGRAPH_SEPARATOR, 511 DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE, 512 DIRECTIONALITY_OTHER_NEUTRALS, 513 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, 514 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, 515 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, 516 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, 517 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, 518 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, 519 DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL }; 520 521 /* 522 * Represents a subset of the Unicode character set. 523 */ 524 public static class Subset { 525 private final String name; 526 527 /** 528 * Constructs a new {@code Subset}. 529 */ 530 protected Subset(String name) { 531 if (name == null) { 532 throw new NullPointerException("name == null"); 533 } 534 this.name = name; 535 } 536 537 /** 538 * Compares this character subset for identity with the specified object. 539 */ 540 @Override public final boolean equals(Object object) { 541 return object == this; 542 } 543 544 /** 545 * Returns this subset's hash code, which is the hash code computed by 546 * {@link java.lang.Object#hashCode()}. 547 */ 548 @Override public final int hashCode() { 549 return super.hashCode(); 550 } 551 552 /** 553 * Returns this subset's name. 554 */ 555 @Override public final String toString() { 556 return name; 557 } 558 } 559 560 /** 561 * Represents a block of Unicode characters. This class provides constants for various 562 * well-known blocks (but not all blocks) and methods for looking up a block 563 * by name {@link #forName} or by code point {@link #of}. 564 * 565 * @since 1.2 566 */ 567 public static final class UnicodeBlock extends Subset { 568 /** 569 * The Surrogates Area Unicode block. 570 * 571 * @deprecated As of Java 5, this block has been replaced by 572 * {@link #HIGH_SURROGATES}, 573 * {@link #HIGH_PRIVATE_USE_SURROGATES} and 574 * {@link #LOW_SURROGATES}. 575 */ 576 @Deprecated 577 public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA"); 578 579 /** The Basic Latin Unicode block. */ 580 public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN"); 581 582 /** The Latin-1 Supplement Unicode block. */ 583 public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT"); 584 585 /** The Latin Extended-A Unicode block. */ 586 public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A"); 587 588 /** The Latin Extended-B Unicode block. */ 589 public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B"); 590 591 /** The IPA Extensions Unicode block. */ 592 public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS"); 593 594 /** The Spacing Modifier Letters Unicode block. */ 595 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS"); 596 597 /** The Combining Diacritical Marks Unicode block. */ 598 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS"); 599 600 /** 601 * The Greek and Coptic Unicode block. Previously referred to as Greek. 602 */ 603 public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK"); 604 605 /** The Cyrillic Unicode block. */ 606 public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC"); 607 608 /** 609 * The Cyrillic Supplement Unicode block. Previously referred to as Cyrillic Supplementary. 610 */ 611 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY"); 612 613 /** The Armenian Unicode block. */ 614 public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN"); 615 616 /** The Hebrew Unicode block. */ 617 public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW"); 618 619 /** The Arabic Unicode block. */ 620 public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC"); 621 622 /** The Syriac Unicode block. */ 623 public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC"); 624 625 /** The Thaana Unicode block. */ 626 public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA"); 627 628 /** The Devanagari Unicode block. */ 629 public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI"); 630 631 /** The Bengali Unicode block. */ 632 public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI"); 633 634 /** The Gurmukhi Unicode block. */ 635 public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI"); 636 637 /** The Gujarati Unicode block. */ 638 public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI"); 639 640 /** The Oriya Unicode block. */ 641 public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA"); 642 643 /** The Tamil Unicode block. */ 644 public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL"); 645 646 /** The Telugu Unicode block. */ 647 public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU"); 648 649 /** The Kannada Unicode block. */ 650 public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA"); 651 652 /** The Malayalam Unicode block. */ 653 public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM"); 654 655 /** The Sinhala Unicode block. */ 656 public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA"); 657 658 /** The Thai Unicode block. */ 659 public static final UnicodeBlock THAI = new UnicodeBlock("THAI"); 660 661 /** The Lao Unicode block. */ 662 public static final UnicodeBlock LAO = new UnicodeBlock("LAO"); 663 664 /** The Tibetan Unicode block. */ 665 public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN"); 666 667 /** The Myanmar Unicode block. */ 668 public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR"); 669 670 /** The Georgian Unicode block. */ 671 public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN"); 672 673 /** The Hangul Jamo Unicode block. */ 674 public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO"); 675 676 /** The Ethiopic Unicode block. */ 677 public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC"); 678 679 /** The Cherokee Unicode block. */ 680 public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE"); 681 682 /** The Unified Canadian Aboriginal Syllabics Unicode block. */ 683 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS"); 684 685 /** The Ogham Unicode block. */ 686 public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM"); 687 688 /** The Runic Unicode block. */ 689 public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC"); 690 691 /** The Tagalog Unicode block. */ 692 public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG"); 693 694 /** The Hanunoo Unicode block. */ 695 public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO"); 696 697 /** The Buhid Unicode block. */ 698 public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID"); 699 700 /** The Tagbanwa Unicode block. */ 701 public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA"); 702 703 /** The Khmer Unicode block. */ 704 public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER"); 705 706 /** The Mongolian Unicode block. */ 707 public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN"); 708 709 /** The Limbu Unicode block. */ 710 public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU"); 711 712 /** The Tai Le Unicode block. */ 713 public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE"); 714 715 /** The Khmer Symbols Unicode block. */ 716 public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS"); 717 718 /** The Phonetic Extensions Unicode block. */ 719 public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS"); 720 721 /** The Latin Extended Additional Unicode block. */ 722 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL"); 723 724 /** The Greek Extended Unicode block. */ 725 public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED"); 726 727 /** The General Punctuation Unicode block. */ 728 public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION"); 729 730 /** The Superscripts and Subscripts Unicode block. */ 731 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS"); 732 733 /** The Currency Symbols Unicode block. */ 734 public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS"); 735 736 /** 737 * The Combining Diacritical Marks for Symbols Unicode 738 * Block. Previously referred to as Combining Marks for 739 * Symbols. 740 */ 741 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS"); 742 743 /** The Letterlike Symbols Unicode block. */ 744 public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS"); 745 746 /** The Number Forms Unicode block. */ 747 public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS"); 748 749 /** The Arrows Unicode block. */ 750 public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS"); 751 752 /** The Mathematical Operators Unicode block. */ 753 public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS"); 754 755 /** The Miscellaneous Technical Unicode block. */ 756 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL"); 757 758 /** The Control Pictures Unicode block. */ 759 public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES"); 760 761 /** The Optical Character Recognition Unicode block. */ 762 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION"); 763 764 /** The Enclosed Alphanumerics Unicode block. */ 765 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS"); 766 767 /** The Box Drawing Unicode block. */ 768 public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING"); 769 770 /** The Block Elements Unicode block. */ 771 public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS"); 772 773 /** The Geometric Shapes Unicode block. */ 774 public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES"); 775 776 /** The Miscellaneous Symbols Unicode block. */ 777 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS"); 778 779 /** The Dingbats Unicode block. */ 780 public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS"); 781 782 /** The Miscellaneous Mathematical Symbols-A Unicode block. */ 783 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A"); 784 785 /** The Supplemental Arrows-A Unicode block. */ 786 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A"); 787 788 /** The Braille Patterns Unicode block. */ 789 public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS"); 790 791 /** The Supplemental Arrows-B Unicode block. */ 792 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B"); 793 794 /** The Miscellaneous Mathematical Symbols-B Unicode block. */ 795 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B"); 796 797 /** The Supplemental Mathematical Operators Unicode block. */ 798 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS"); 799 800 /** The Miscellaneous Symbols and Arrows Unicode block. */ 801 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS"); 802 803 /** The CJK Radicals Supplement Unicode block. */ 804 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT"); 805 806 /** The Kangxi Radicals Unicode block. */ 807 public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS"); 808 809 /** The Ideographic Description Characters Unicode block. */ 810 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS"); 811 812 /** The CJK Symbols and Punctuation Unicode block. */ 813 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION"); 814 815 /** The Hiragana Unicode block. */ 816 public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA"); 817 818 /** The Katakana Unicode block. */ 819 public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA"); 820 821 /** The Bopomofo Unicode block. */ 822 public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO"); 823 824 /** The Hangul Compatibility Jamo Unicode block. */ 825 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO"); 826 827 /** The Kanbun Unicode block. */ 828 public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN"); 829 830 /** The Bopomofo Extended Unicode block. */ 831 public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED"); 832 833 /** The Katakana Phonetic Extensions Unicode block. */ 834 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS"); 835 836 /** The Enclosed CJK Letters and Months Unicode block. */ 837 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS"); 838 839 /** The CJK Compatibility Unicode block. */ 840 public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY"); 841 842 /** The CJK Unified Ideographs Extension A Unicode block. */ 843 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A"); 844 845 /** The Yijing Hexagram Symbols Unicode block. */ 846 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS"); 847 848 /** The CJK Unified Ideographs Unicode block. */ 849 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS"); 850 851 /** The Yi Syllables Unicode block. */ 852 public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES"); 853 854 /** The Yi Radicals Unicode block. */ 855 public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS"); 856 857 /** The Hangul Syllables Unicode block. */ 858 public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES"); 859 860 /** 861 * The High Surrogates Unicode block. This block represents 862 * code point values in the high surrogate range 0xD800 to 0xDB7F 863 */ 864 public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES"); 865 866 /** 867 * The High Private Use Surrogates Unicode block. This block 868 * represents code point values in the high surrogate range 0xDB80 to 869 * 0xDBFF 870 */ 871 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES"); 872 873 /** 874 * The Low Surrogates Unicode block. This block represents 875 * code point values in the low surrogate range 0xDC00 to 0xDFFF 876 */ 877 public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES"); 878 879 /** The Private Use Area Unicode block. */ 880 public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA"); 881 882 /** The CJK Compatibility Ideographs Unicode block. */ 883 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS"); 884 885 /** The Alphabetic Presentation Forms Unicode block. */ 886 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS"); 887 888 /** The Arabic Presentation Forms-A Unicode block. */ 889 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A"); 890 891 /** The Variation Selectors Unicode block. */ 892 public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS"); 893 894 /** The Combining Half Marks Unicode block. */ 895 public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS"); 896 897 /** The CJK Compatibility Forms Unicode block. */ 898 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS"); 899 900 /** The Small Form Variants Unicode block. */ 901 public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS"); 902 903 /** The Arabic Presentation Forms-B Unicode block. */ 904 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B"); 905 906 /** The Halfwidth and Fullwidth Forms Unicode block. */ 907 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS"); 908 909 /** The Specials Unicode block. */ 910 public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS"); 911 912 /** The Linear B Syllabary Unicode block. */ 913 public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY"); 914 915 /** The Linear B Ideograms Unicode block. */ 916 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS"); 917 918 /** The Aegean Numbers Unicode block. */ 919 public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS"); 920 921 /** The Old Italic Unicode block. */ 922 public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC"); 923 924 /** The Gothic Unicode block. */ 925 public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC"); 926 927 /** The Ugaritic Unicode block. */ 928 public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC"); 929 930 /** The Deseret Unicode block. */ 931 public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET"); 932 933 /** The Shavian Unicode block. */ 934 public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN"); 935 936 /** The Osmanya Unicode block. */ 937 public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA"); 938 939 /** The Cypriot Syllabary Unicode block. */ 940 public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY"); 941 942 /** The Byzantine Musical Symbols Unicode block. */ 943 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS"); 944 945 /** The Musical Symbols Unicode block. */ 946 public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS"); 947 948 /** The Tai Xuan Jing Symbols Unicode block. */ 949 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS"); 950 951 /** The Mathematical Alphanumeric Symbols Unicode block. */ 952 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS"); 953 954 /** The CJK Unified Ideographs Extension B Unicode block. */ 955 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B"); 956 957 /** The CJK Compatibility Ideographs Supplement Unicode block. */ 958 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT"); 959 960 /** The Tags Unicode block. */ 961 public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS"); 962 963 /** The Variation Selectors Supplement Unicode block. */ 964 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT"); 965 966 /** The Supplementary Private Use Area-A Unicode block. */ 967 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A"); 968 969 /** The Supplementary Private Use Area-B Unicode block. */ 970 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B"); 971 972 // Unicode 4.1. 973 974 /** The Ancient Greek Musical Notation Unicode 4.1 block. */ 975 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION"); 976 977 /** The Ancient Greek Numbers Unicode 4.1 block. */ 978 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = new UnicodeBlock("ANCIENT_GREEK_NUMBERS"); 979 980 /** The Arabic Supplement Unicode 4.1 block. */ 981 public static final UnicodeBlock ARABIC_SUPPLEMENT = new UnicodeBlock("ARABIC_SUPPLEMENT"); 982 983 /** The Buginese Unicode 4.1 block. */ 984 public static final UnicodeBlock BUGINESE = new UnicodeBlock("BUGINESE"); 985 986 /** The CJK Strokes Unicode 4.1 block. */ 987 public static final UnicodeBlock CJK_STROKES = new UnicodeBlock("CJK_STROKES"); 988 989 /** The Combining Diacritical Marks Supplement Unicode 4.1 block. */ 990 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT"); 991 992 /** The Coptic Unicode 4.1 block. */ 993 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC"); 994 995 /** The Ethiopic Extended Unicode 4.1 block. */ 996 public static final UnicodeBlock ETHIOPIC_EXTENDED = new UnicodeBlock("ETHIOPIC_EXTENDED"); 997 998 /** The Ethiopic Supplement Unicode 4.1 block. */ 999 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = new UnicodeBlock("ETHIOPIC_SUPPLEMENT"); 1000 1001 /** The Georgian Supplement Unicode 4.1 block. */ 1002 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = new UnicodeBlock("GEORGIAN_SUPPLEMENT"); 1003 1004 /** The Glagolitic Unicode 4.1 block. */ 1005 public static final UnicodeBlock GLAGOLITIC = new UnicodeBlock("GLAGOLITIC"); 1006 1007 /** The Kharoshthi Unicode 4.1 block. */ 1008 public static final UnicodeBlock KHAROSHTHI = new UnicodeBlock("KHAROSHTHI"); 1009 1010 /** The Modifier Tone Letters Unicode 4.1 block. */ 1011 public static final UnicodeBlock MODIFIER_TONE_LETTERS = new UnicodeBlock("MODIFIER_TONE_LETTERS"); 1012 1013 /** The New Tai Lue Unicode 4.1 block. */ 1014 public static final UnicodeBlock NEW_TAI_LUE = new UnicodeBlock("NEW_TAI_LUE"); 1015 1016 /** The Old Persian Unicode 4.1 block. */ 1017 public static final UnicodeBlock OLD_PERSIAN = new UnicodeBlock("OLD_PERSIAN"); 1018 1019 /** The Phonetic Extensions Supplement Unicode 4.1 block. */ 1020 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT"); 1021 1022 /** The Supplemental Punctuation Unicode 4.1 block. */ 1023 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION"); 1024 1025 /** The Syloti Nagri Unicode 4.1 block. */ 1026 public static final UnicodeBlock SYLOTI_NAGRI = new UnicodeBlock("SYLOTI_NAGRI"); 1027 1028 /** The Tifinagh Unicode 4.1 block. */ 1029 public static final UnicodeBlock TIFINAGH = new UnicodeBlock("TIFINAGH"); 1030 1031 /** The Vertical Forms Unicode 4.1 block. */ 1032 public static final UnicodeBlock VERTICAL_FORMS = new UnicodeBlock("VERTICAL_FORMS"); 1033 1034 // Unicode 5.0. 1035 1036 /** The NKo Unicode 5.0 block. */ 1037 public static final UnicodeBlock NKO = new UnicodeBlock("NKO"); 1038 1039 /** The Balinese Unicode 5.0 block. */ 1040 public static final UnicodeBlock BALINESE = new UnicodeBlock("BALINESE"); 1041 1042 /** The Latin Extended C Unicode 5.0 block. */ 1043 public static final UnicodeBlock LATIN_EXTENDED_C = new UnicodeBlock("LATIN_EXTENDED_C"); 1044 1045 /** The Latin Extended D Unicode 5.0 block. */ 1046 public static final UnicodeBlock LATIN_EXTENDED_D = new UnicodeBlock("LATIN_EXTENDED_D"); 1047 1048 /** The Phags-pa Unicode 5.0 block. */ 1049 public static final UnicodeBlock PHAGS_PA = new UnicodeBlock("PHAGS_PA"); 1050 1051 /** The Phoenician Unicode 5.0 block. */ 1052 public static final UnicodeBlock PHOENICIAN = new UnicodeBlock("PHOENICIAN"); 1053 1054 /** The Cuneiform Unicode 5.0 block. */ 1055 public static final UnicodeBlock CUNEIFORM = new UnicodeBlock("CUNEIFORM"); 1056 1057 /** The Cuneiform Numbers And Punctuation Unicode 5.0 block. */ 1058 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION"); 1059 1060 /** The Counting Rod Numerals Unicode 5.0 block. */ 1061 public static final UnicodeBlock COUNTING_ROD_NUMERALS = new UnicodeBlock("COUNTING_ROD_NUMERALS"); 1062 1063 // Unicode 5.1. 1064 1065 /** The Sudanese Unicode 5.1 block. */ 1066 public static final UnicodeBlock SUNDANESE = new UnicodeBlock("SUNDANESE"); 1067 1068 /** The Lepcha Unicode 5.1 block. */ 1069 public static final UnicodeBlock LEPCHA = new UnicodeBlock("LEPCHA"); 1070 1071 /** The Ol Chiki Unicode 5.1 block. */ 1072 public static final UnicodeBlock OL_CHIKI = new UnicodeBlock("OL_CHIKI"); 1073 1074 /** The Cyrillic Extended-A Unicode 5.1 block. */ 1075 public static final UnicodeBlock CYRILLIC_EXTENDED_A = new UnicodeBlock("CYRILLIC_EXTENDED_A"); 1076 1077 /** The Vai Unicode 5.1 block. */ 1078 public static final UnicodeBlock VAI = new UnicodeBlock("VAI"); 1079 1080 /** The Cyrillic Extended-B Unicode 5.1 block. */ 1081 public static final UnicodeBlock CYRILLIC_EXTENDED_B = new UnicodeBlock("CYRILLIC_EXTENDED_B"); 1082 1083 /** The Saurashtra Unicode 5.1 block. */ 1084 public static final UnicodeBlock SAURASHTRA = new UnicodeBlock("SAURASHTRA"); 1085 1086 /** The Kayah Li Unicode 5.1 block. */ 1087 public static final UnicodeBlock KAYAH_LI = new UnicodeBlock("KAYAH_LI"); 1088 1089 /** The Rejang Unicode 5.1 block. */ 1090 public static final UnicodeBlock REJANG = new UnicodeBlock("REJANG"); 1091 1092 /** The Cham Unicode 5.1 block. */ 1093 public static final UnicodeBlock CHAM = new UnicodeBlock("CHAM"); 1094 1095 /** The Ancient Symbols Unicode 5.1 block. */ 1096 public static final UnicodeBlock ANCIENT_SYMBOLS = new UnicodeBlock("ANCIENT_SYMBOLS"); 1097 1098 /** The Phaistos Disc Unicode 5.1 block. */ 1099 public static final UnicodeBlock PHAISTOS_DISC = new UnicodeBlock("PHAISTOS_DISC"); 1100 1101 /** The Lycian Unicode 5.1 block. */ 1102 public static final UnicodeBlock LYCIAN = new UnicodeBlock("LYCIAN"); 1103 1104 /** The Carian Unicode 5.1 block. */ 1105 public static final UnicodeBlock CARIAN = new UnicodeBlock("CARIAN"); 1106 1107 /** The Lydian Unicode 5.1 block. */ 1108 public static final UnicodeBlock LYDIAN = new UnicodeBlock("LYDIAN"); 1109 1110 /** The Mahjong Tiles Unicode 5.1 block. */ 1111 public static final UnicodeBlock MAHJONG_TILES = new UnicodeBlock("MAHJONG_TILES"); 1112 1113 /** The Domino Tiles Unicode 5.1 block. */ 1114 public static final UnicodeBlock DOMINO_TILES = new UnicodeBlock("DOMINO_TILES"); 1115 1116 // Unicode 5.2. 1117 1118 /** The Samaritan Unicode 5.2 block. */ 1119 public static final UnicodeBlock SAMARITAN = new UnicodeBlock("SAMARITAN"); 1120 1121 /** The Unified Canadian Aboriginal Syllabics Expanded Unicode 5.2 block. */ 1122 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED"); 1123 1124 /** The Tai Tham Unicode 5.2 block. */ 1125 public static final UnicodeBlock TAI_THAM = new UnicodeBlock("TAI_THAM"); 1126 1127 /** The Vedic Extensions Unicode 5.2 block. */ 1128 public static final UnicodeBlock VEDIC_EXTENSIONS = new UnicodeBlock("VEDIC_EXTENSIONS"); 1129 1130 /** The Lisu Extensions Unicode 5.2 block. */ 1131 public static final UnicodeBlock LISU = new UnicodeBlock("LISU"); 1132 1133 /** The Bamum Extensions Unicode 5.2 block. */ 1134 public static final UnicodeBlock BAMUM = new UnicodeBlock("BAMUM"); 1135 1136 /** The Common Indic Number Forms Unicode 5.2 block. */ 1137 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS"); 1138 1139 /** The Devanagari Extended Unicode 5.2 block. */ 1140 public static final UnicodeBlock DEVANAGARI_EXTENDED = new UnicodeBlock("DEVANAGARI_EXTENDED"); 1141 1142 /** The Hangul Jamo Extended-A Unicode 5.2 block. */ 1143 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = new UnicodeBlock("HANGUL_JAMO_EXTENDED_A"); 1144 1145 /** The Javanese Unicode 5.2 block. */ 1146 public static final UnicodeBlock JAVANESE = new UnicodeBlock("JAVANESE"); 1147 1148 /** The Myanmar Extended-A Unicode 5.2 block. */ 1149 public static final UnicodeBlock MYANMAR_EXTENDED_A = new UnicodeBlock("MYANMAR_EXTENDED_A"); 1150 1151 /** The Tai Viet Unicode 5.2 block. */ 1152 public static final UnicodeBlock TAI_VIET = new UnicodeBlock("TAI_VIET"); 1153 1154 /** The Meetei Mayek Unicode 5.2 block. */ 1155 public static final UnicodeBlock MEETEI_MAYEK = new UnicodeBlock("MEETEI_MAYEK"); 1156 1157 /** The Hangul Jamo Extended-B Unicode 5.2 block. */ 1158 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = new UnicodeBlock("HANGUL_JAMO_EXTENDED_B"); 1159 1160 /** The Imperial Aramaic Unicode 5.2 block. */ 1161 public static final UnicodeBlock IMPERIAL_ARAMAIC = new UnicodeBlock("IMPERIAL_ARAMAIC"); 1162 1163 /** The Old South Arabian Unicode 5.2 block. */ 1164 public static final UnicodeBlock OLD_SOUTH_ARABIAN = new UnicodeBlock("OLD_SOUTH_ARABIAN"); 1165 1166 /** The Avestan Unicode 5.2 block. */ 1167 public static final UnicodeBlock AVESTAN = new UnicodeBlock("AVESTAN"); 1168 1169 /** The Inscriptional Pathian Unicode 5.2 block. */ 1170 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = new UnicodeBlock("INSCRIPTIONAL_PARTHIAN"); 1171 1172 /** The Inscriptional Pahlavi Unicode 5.2 block. */ 1173 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = new UnicodeBlock("INSCRIPTIONAL_PAHLAVI"); 1174 1175 /** The Old Turkic Unicode 5.2 block. */ 1176 public static final UnicodeBlock OLD_TURKIC = new UnicodeBlock("OLD_TURKIC"); 1177 1178 /** The Rumi Numeral Symbols Unicode 5.2 block. */ 1179 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = new UnicodeBlock("RUMI_NUMERAL_SYMBOLS"); 1180 1181 /** The Kaithi Unicode 5.2 block. */ 1182 public static final UnicodeBlock KAITHI = new UnicodeBlock("KAITHI"); 1183 1184 /** The Egyptian Hieroglyphs Unicode 5.2 block. */ 1185 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = new UnicodeBlock("EGYPTIAN_HIEROGLYPHS"); 1186 1187 /** The Enclosed Alphanumeric Supplement Unicode 5.2 block. */ 1188 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT"); 1189 1190 /** The Enclosed Ideographic Supplement Unicode 5.2 block. */ 1191 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT"); 1192 1193 /** The CJK Unified Ideographs Unicode 5.2 block. */ 1194 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C"); 1195 1196 // Unicode 6.0. 1197 1198 /** The Mandaic Unicode 6.0 block. */ 1199 public static final UnicodeBlock MANDAIC = new UnicodeBlock("MANDAIC"); 1200 1201 /** The Batak Unicode 6.0 block. */ 1202 public static final UnicodeBlock BATAK = new UnicodeBlock("BATAK"); 1203 1204 /** The Ethiopic Extended-A Unicode 6.0 block. */ 1205 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = new UnicodeBlock("ETHIOPIC_EXTENDED_A"); 1206 1207 /** The Brahmi Unicode 6.0 block. */ 1208 public static final UnicodeBlock BRAHMI = new UnicodeBlock("BRAHMI"); 1209 1210 /** The Bamum Supplement Unicode 6.0 block. */ 1211 public static final UnicodeBlock BAMUM_SUPPLEMENT = new UnicodeBlock("BAMUM_SUPPLEMENT"); 1212 1213 /** The Kana Supplement Unicode 6.0 block. */ 1214 public static final UnicodeBlock KANA_SUPPLEMENT = new UnicodeBlock("KANA_SUPPLEMENT"); 1215 1216 /** The Playing Cards Supplement Unicode 6.0 block. */ 1217 public static final UnicodeBlock PLAYING_CARDS = new UnicodeBlock("PLAYING_CARDS"); 1218 1219 /** The Miscellaneous Symbols And Pictographs Supplement Unicode 6.0 block. */ 1220 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS"); 1221 1222 /** The Emoticons Unicode 6.0 block. */ 1223 public static final UnicodeBlock EMOTICONS = new UnicodeBlock("EMOTICONS"); 1224 1225 /** The Transport And Map Symbols Unicode 6.0 block. */ 1226 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS"); 1227 1228 /** The Alchemical Symbols Unicode 6.0 block. */ 1229 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = new UnicodeBlock("ALCHEMICAL_SYMBOLS"); 1230 1231 /** The CJK Unified Ideographs Extension-D Unicode 6.0 block. */ 1232 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D"); 1233 1234 /* 1235 * All of the UnicodeBlocks above, in the icu4c UBlock enum order. 1236 */ 1237 private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] { 1238 null, // icu4c numbers blocks starting at 1, so index 0 should be null. 1239 1240 UnicodeBlock.BASIC_LATIN, 1241 UnicodeBlock.LATIN_1_SUPPLEMENT, 1242 UnicodeBlock.LATIN_EXTENDED_A, 1243 UnicodeBlock.LATIN_EXTENDED_B, 1244 UnicodeBlock.IPA_EXTENSIONS, 1245 UnicodeBlock.SPACING_MODIFIER_LETTERS, 1246 UnicodeBlock.COMBINING_DIACRITICAL_MARKS, 1247 UnicodeBlock.GREEK, 1248 UnicodeBlock.CYRILLIC, 1249 UnicodeBlock.ARMENIAN, 1250 UnicodeBlock.HEBREW, 1251 UnicodeBlock.ARABIC, 1252 UnicodeBlock.SYRIAC, 1253 UnicodeBlock.THAANA, 1254 UnicodeBlock.DEVANAGARI, 1255 UnicodeBlock.BENGALI, 1256 UnicodeBlock.GURMUKHI, 1257 UnicodeBlock.GUJARATI, 1258 UnicodeBlock.ORIYA, 1259 UnicodeBlock.TAMIL, 1260 UnicodeBlock.TELUGU, 1261 UnicodeBlock.KANNADA, 1262 UnicodeBlock.MALAYALAM, 1263 UnicodeBlock.SINHALA, 1264 UnicodeBlock.THAI, 1265 UnicodeBlock.LAO, 1266 UnicodeBlock.TIBETAN, 1267 UnicodeBlock.MYANMAR, 1268 UnicodeBlock.GEORGIAN, 1269 UnicodeBlock.HANGUL_JAMO, 1270 UnicodeBlock.ETHIOPIC, 1271 UnicodeBlock.CHEROKEE, 1272 UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 1273 UnicodeBlock.OGHAM, 1274 UnicodeBlock.RUNIC, 1275 UnicodeBlock.KHMER, 1276 UnicodeBlock.MONGOLIAN, 1277 UnicodeBlock.LATIN_EXTENDED_ADDITIONAL, 1278 UnicodeBlock.GREEK_EXTENDED, 1279 UnicodeBlock.GENERAL_PUNCTUATION, 1280 UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS, 1281 UnicodeBlock.CURRENCY_SYMBOLS, 1282 UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS, 1283 UnicodeBlock.LETTERLIKE_SYMBOLS, 1284 UnicodeBlock.NUMBER_FORMS, 1285 UnicodeBlock.ARROWS, 1286 UnicodeBlock.MATHEMATICAL_OPERATORS, 1287 UnicodeBlock.MISCELLANEOUS_TECHNICAL, 1288 UnicodeBlock.CONTROL_PICTURES, 1289 UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION, 1290 UnicodeBlock.ENCLOSED_ALPHANUMERICS, 1291 UnicodeBlock.BOX_DRAWING, 1292 UnicodeBlock.BLOCK_ELEMENTS, 1293 UnicodeBlock.GEOMETRIC_SHAPES, 1294 UnicodeBlock.MISCELLANEOUS_SYMBOLS, 1295 UnicodeBlock.DINGBATS, 1296 UnicodeBlock.BRAILLE_PATTERNS, 1297 UnicodeBlock.CJK_RADICALS_SUPPLEMENT, 1298 UnicodeBlock.KANGXI_RADICALS, 1299 UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 1300 UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION, 1301 UnicodeBlock.HIRAGANA, 1302 UnicodeBlock.KATAKANA, 1303 UnicodeBlock.BOPOMOFO, 1304 UnicodeBlock.HANGUL_COMPATIBILITY_JAMO, 1305 UnicodeBlock.KANBUN, 1306 UnicodeBlock.BOPOMOFO_EXTENDED, 1307 UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS, 1308 UnicodeBlock.CJK_COMPATIBILITY, 1309 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 1310 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS, 1311 UnicodeBlock.YI_SYLLABLES, 1312 UnicodeBlock.YI_RADICALS, 1313 UnicodeBlock.HANGUL_SYLLABLES, 1314 UnicodeBlock.HIGH_SURROGATES, 1315 UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES, 1316 UnicodeBlock.LOW_SURROGATES, 1317 UnicodeBlock.PRIVATE_USE_AREA, 1318 UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS, 1319 UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS, 1320 UnicodeBlock.ARABIC_PRESENTATION_FORMS_A, 1321 UnicodeBlock.COMBINING_HALF_MARKS, 1322 UnicodeBlock.CJK_COMPATIBILITY_FORMS, 1323 UnicodeBlock.SMALL_FORM_VARIANTS, 1324 UnicodeBlock.ARABIC_PRESENTATION_FORMS_B, 1325 UnicodeBlock.SPECIALS, 1326 UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS, 1327 1328 // Unicode 3.1. 1329 UnicodeBlock.OLD_ITALIC, 1330 UnicodeBlock.GOTHIC, 1331 UnicodeBlock.DESERET, 1332 UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS, 1333 UnicodeBlock.MUSICAL_SYMBOLS, 1334 UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 1335 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 1336 UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 1337 UnicodeBlock.TAGS, 1338 1339 // Unicode 3.2. 1340 UnicodeBlock.CYRILLIC_SUPPLEMENTARY, 1341 UnicodeBlock.TAGALOG, 1342 UnicodeBlock.HANUNOO, 1343 UnicodeBlock.BUHID, 1344 UnicodeBlock.TAGBANWA, 1345 UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 1346 UnicodeBlock.SUPPLEMENTAL_ARROWS_A, 1347 UnicodeBlock.SUPPLEMENTAL_ARROWS_B, 1348 UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 1349 UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 1350 UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS, 1351 UnicodeBlock.VARIATION_SELECTORS, 1352 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A, 1353 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B, 1354 1355 // Unicode 4.0. 1356 UnicodeBlock.LIMBU, 1357 UnicodeBlock.TAI_LE, 1358 UnicodeBlock.KHMER_SYMBOLS, 1359 UnicodeBlock.PHONETIC_EXTENSIONS, 1360 UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS, 1361 UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS, 1362 UnicodeBlock.LINEAR_B_SYLLABARY, 1363 UnicodeBlock.LINEAR_B_IDEOGRAMS, 1364 UnicodeBlock.AEGEAN_NUMBERS, 1365 UnicodeBlock.UGARITIC, 1366 UnicodeBlock.SHAVIAN, 1367 UnicodeBlock.OSMANYA, 1368 UnicodeBlock.CYPRIOT_SYLLABARY, 1369 UnicodeBlock.TAI_XUAN_JING_SYMBOLS, 1370 UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT, 1371 1372 // Unicode 4.1. 1373 UnicodeBlock.ANCIENT_GREEK_MUSICAL_NOTATION, 1374 UnicodeBlock.ANCIENT_GREEK_NUMBERS, 1375 UnicodeBlock.ARABIC_SUPPLEMENT, 1376 UnicodeBlock.BUGINESE, 1377 UnicodeBlock.CJK_STROKES, 1378 UnicodeBlock.COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 1379 UnicodeBlock.COPTIC, 1380 UnicodeBlock.ETHIOPIC_EXTENDED, 1381 UnicodeBlock.ETHIOPIC_SUPPLEMENT, 1382 UnicodeBlock.GEORGIAN_SUPPLEMENT, 1383 UnicodeBlock.GLAGOLITIC, 1384 UnicodeBlock.KHAROSHTHI, 1385 UnicodeBlock.MODIFIER_TONE_LETTERS, 1386 UnicodeBlock.NEW_TAI_LUE, 1387 UnicodeBlock.OLD_PERSIAN, 1388 UnicodeBlock.PHONETIC_EXTENSIONS_SUPPLEMENT, 1389 UnicodeBlock.SUPPLEMENTAL_PUNCTUATION, 1390 UnicodeBlock.SYLOTI_NAGRI, 1391 UnicodeBlock.TIFINAGH, 1392 UnicodeBlock.VERTICAL_FORMS, 1393 1394 // Unicode 5.0. 1395 UnicodeBlock.NKO, 1396 UnicodeBlock.BALINESE, 1397 UnicodeBlock.LATIN_EXTENDED_C, 1398 UnicodeBlock.LATIN_EXTENDED_D, 1399 UnicodeBlock.PHAGS_PA, 1400 UnicodeBlock.PHOENICIAN, 1401 UnicodeBlock.CUNEIFORM, 1402 UnicodeBlock.CUNEIFORM_NUMBERS_AND_PUNCTUATION, 1403 UnicodeBlock.COUNTING_ROD_NUMERALS, 1404 1405 // Unicode 5.1. 1406 UnicodeBlock.SUNDANESE, 1407 UnicodeBlock.LEPCHA, 1408 UnicodeBlock.OL_CHIKI, 1409 UnicodeBlock.CYRILLIC_EXTENDED_A, 1410 UnicodeBlock.VAI, 1411 UnicodeBlock.CYRILLIC_EXTENDED_B, 1412 UnicodeBlock.SAURASHTRA, 1413 UnicodeBlock.KAYAH_LI, 1414 UnicodeBlock.REJANG, 1415 UnicodeBlock.CHAM, 1416 UnicodeBlock.ANCIENT_SYMBOLS, 1417 UnicodeBlock.PHAISTOS_DISC, 1418 UnicodeBlock.LYCIAN, 1419 UnicodeBlock.CARIAN, 1420 UnicodeBlock.LYDIAN, 1421 UnicodeBlock.MAHJONG_TILES, 1422 UnicodeBlock.DOMINO_TILES, 1423 1424 // Unicode 5.2. 1425 UnicodeBlock.SAMARITAN, 1426 UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 1427 UnicodeBlock.TAI_THAM, 1428 UnicodeBlock.VEDIC_EXTENSIONS, 1429 UnicodeBlock.LISU, 1430 UnicodeBlock.BAMUM, 1431 UnicodeBlock.COMMON_INDIC_NUMBER_FORMS, 1432 UnicodeBlock.DEVANAGARI_EXTENDED, 1433 UnicodeBlock.HANGUL_JAMO_EXTENDED_A, 1434 UnicodeBlock.JAVANESE, 1435 UnicodeBlock.MYANMAR_EXTENDED_A, 1436 UnicodeBlock.TAI_VIET, 1437 UnicodeBlock.MEETEI_MAYEK, 1438 UnicodeBlock.HANGUL_JAMO_EXTENDED_B, 1439 UnicodeBlock.IMPERIAL_ARAMAIC, 1440 UnicodeBlock.OLD_SOUTH_ARABIAN, 1441 UnicodeBlock.AVESTAN, 1442 UnicodeBlock.INSCRIPTIONAL_PARTHIAN, 1443 UnicodeBlock.INSCRIPTIONAL_PAHLAVI, 1444 UnicodeBlock.OLD_TURKIC, 1445 UnicodeBlock.RUMI_NUMERAL_SYMBOLS, 1446 UnicodeBlock.KAITHI, 1447 UnicodeBlock.EGYPTIAN_HIEROGLYPHS, 1448 UnicodeBlock.ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 1449 UnicodeBlock.ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 1450 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 1451 1452 // Unicode 6.0. 1453 UnicodeBlock.MANDAIC, 1454 UnicodeBlock.BATAK, 1455 UnicodeBlock.ETHIOPIC_EXTENDED_A, 1456 UnicodeBlock.BRAHMI, 1457 UnicodeBlock.BAMUM_SUPPLEMENT, 1458 UnicodeBlock.KANA_SUPPLEMENT, 1459 UnicodeBlock.PLAYING_CARDS, 1460 UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 1461 UnicodeBlock.EMOTICONS, 1462 UnicodeBlock.TRANSPORT_AND_MAP_SYMBOLS, 1463 UnicodeBlock.ALCHEMICAL_SYMBOLS, 1464 UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 1465 }; 1466 1467 /** 1468 * Returns the Unicode block for the given block name, or null if there is no 1469 * such block. 1470 * 1471 * <p>Block names may be one of the following: 1472 * <ul> 1473 * <li>Canonical block name, as defined by the Unicode specification; 1474 * case-insensitive.</li> 1475 * <li>Canonical block name without any spaces, as defined by the 1476 * Unicode specification; case-insensitive.</li> 1477 * <li>A {@code UnicodeBlock} constant identifier. This is determined by 1478 * converting the canonical name to uppercase and replacing all spaces and hyphens 1479 * with underscores.</li> 1480 * </ul> 1481 * 1482 * @throws NullPointerException 1483 * if {@code blockName == null}. 1484 * @throws IllegalArgumentException 1485 * if {@code blockName} is not the name of any known block. 1486 * @since 1.5 1487 */ 1488 public static UnicodeBlock forName(String blockName) { 1489 if (blockName == null) { 1490 throw new NullPointerException("blockName == null"); 1491 } 1492 int block = forNameImpl(blockName); 1493 if (block == -1) { 1494 throw new IllegalArgumentException("Unknown block: " + blockName); 1495 } 1496 return BLOCKS[block]; 1497 } 1498 1499 /** 1500 * Returns the Unicode block containing the given code point, or null if the 1501 * code point does not belong to any known block. 1502 */ 1503 public static UnicodeBlock of(char c) { 1504 return of((int) c); 1505 } 1506 1507 /** 1508 * Returns the Unicode block containing the given code point, or null if the 1509 * code point does not belong to any known block. 1510 */ 1511 public static UnicodeBlock of(int codePoint) { 1512 checkValidCodePoint(codePoint); 1513 int block = ofImpl(codePoint); 1514 if (block == -1 || block >= BLOCKS.length) { 1515 return null; 1516 } 1517 return BLOCKS[block]; 1518 } 1519 1520 private UnicodeBlock(String blockName) { 1521 super(blockName); 1522 } 1523 } 1524 1525 private static native int forNameImpl(String blockName); 1526 1527 private static native int ofImpl(int codePoint); 1528 1529 /** 1530 * Constructs a new {@code Character} with the specified primitive char 1531 * value. 1532 * 1533 * @param value 1534 * the primitive char value to store in the new instance. 1535 */ 1536 public Character(char value) { 1537 this.value = value; 1538 } 1539 1540 /** 1541 * Gets the primitive value of this character. 1542 * 1543 * @return this object's primitive value. 1544 */ 1545 public char charValue() { 1546 return value; 1547 } 1548 1549 private static void checkValidCodePoint(int codePoint) { 1550 if (!isValidCodePoint(codePoint)) { 1551 throw new IllegalArgumentException("Invalid code point: " + codePoint); 1552 } 1553 } 1554 1555 /** 1556 * Compares this object to the specified character object to determine their 1557 * relative order. 1558 * 1559 * @param c 1560 * the character object to compare this object to. 1561 * @return {@code 0} if the value of this character and the value of 1562 * {@code c} are equal; a positive value if the value of this 1563 * character is greater than the value of {@code c}; a negative 1564 * value if the value of this character is less than the value of 1565 * {@code c}. 1566 * @see java.lang.Comparable 1567 * @since 1.2 1568 */ 1569 public int compareTo(Character c) { 1570 return compare(value, c.value); 1571 } 1572 1573 /** 1574 * Compares two {@code char} values. 1575 * @return 0 if lhs = rhs, less than 0 if lhs < rhs, and greater than 0 if lhs > rhs. 1576 * @since 1.7 1577 */ 1578 public static int compare(char lhs, char rhs) { 1579 return lhs - rhs; 1580 } 1581 1582 /** 1583 * Returns a {@code Character} instance for the {@code char} value passed. 1584 * <p> 1585 * If it is not necessary to get a new {@code Character} instance, it is 1586 * recommended to use this method instead of the constructor, since it 1587 * maintains a cache of instances which may result in better performance. 1588 * 1589 * @param c 1590 * the char value for which to get a {@code Character} instance. 1591 * @return the {@code Character} instance for {@code c}. 1592 * @since 1.5 1593 */ 1594 public static Character valueOf(char c) { 1595 return c < 128 ? SMALL_VALUES[c] : new Character(c); 1596 } 1597 1598 /** 1599 * A cache of instances used by {@link #valueOf(char)} and auto-boxing 1600 */ 1601 private static final Character[] SMALL_VALUES = new Character[128]; 1602 1603 static { 1604 for (int i = 0; i < 128; i++) { 1605 SMALL_VALUES[i] = new Character((char) i); 1606 } 1607 } 1608 /** 1609 * Indicates whether {@code codePoint} is a valid Unicode code point. 1610 * 1611 * @param codePoint 1612 * the code point to test. 1613 * @return {@code true} if {@code codePoint} is a valid Unicode code point; 1614 * {@code false} otherwise. 1615 * @since 1.5 1616 */ 1617 public static boolean isValidCodePoint(int codePoint) { 1618 return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1619 } 1620 1621 /** 1622 * Indicates whether {@code codePoint} is within the supplementary code 1623 * point range. 1624 * 1625 * @param codePoint 1626 * the code point to test. 1627 * @return {@code true} if {@code codePoint} is within the supplementary 1628 * code point range; {@code false} otherwise. 1629 * @since 1.5 1630 */ 1631 public static boolean isSupplementaryCodePoint(int codePoint) { 1632 return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); 1633 } 1634 1635 /** 1636 * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit 1637 * that is used for representing supplementary characters in UTF-16 1638 * encoding. 1639 * 1640 * @param ch 1641 * the character to test. 1642 * @return {@code true} if {@code ch} is a high-surrogate code unit; 1643 * {@code false} otherwise. 1644 * @see #isLowSurrogate(char) 1645 * @since 1.5 1646 */ 1647 public static boolean isHighSurrogate(char ch) { 1648 return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch); 1649 } 1650 1651 /** 1652 * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit 1653 * that is used for representing supplementary characters in UTF-16 1654 * encoding. 1655 * 1656 * @param ch 1657 * the character to test. 1658 * @return {@code true} if {@code ch} is a low-surrogate code unit; 1659 * {@code false} otherwise. 1660 * @see #isHighSurrogate(char) 1661 * @since 1.5 1662 */ 1663 public static boolean isLowSurrogate(char ch) { 1664 return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch); 1665 } 1666 1667 /** 1668 * Returns true if the given character is a high or low surrogate. 1669 * @since 1.7 1670 */ 1671 public static boolean isSurrogate(char ch) { 1672 return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE; 1673 } 1674 1675 /** 1676 * Indicates whether the specified character pair is a valid surrogate pair. 1677 * 1678 * @param high 1679 * the high surrogate unit to test. 1680 * @param low 1681 * the low surrogate unit to test. 1682 * @return {@code true} if {@code high} is a high-surrogate code unit and 1683 * {@code low} is a low-surrogate code unit; {@code false} 1684 * otherwise. 1685 * @see #isHighSurrogate(char) 1686 * @see #isLowSurrogate(char) 1687 * @since 1.5 1688 */ 1689 public static boolean isSurrogatePair(char high, char low) { 1690 return (isHighSurrogate(high) && isLowSurrogate(low)); 1691 } 1692 1693 /** 1694 * Calculates the number of {@code char} values required to represent the 1695 * specified Unicode code point. This method checks if the {@code codePoint} 1696 * is greater than or equal to {@code 0x10000}, in which case {@code 2} is 1697 * returned, otherwise {@code 1}. To test if the code point is valid, use 1698 * the {@link #isValidCodePoint(int)} method. 1699 * 1700 * @param codePoint 1701 * the code point for which to calculate the number of required 1702 * chars. 1703 * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise. 1704 * @see #isValidCodePoint(int) 1705 * @see #isSupplementaryCodePoint(int) 1706 * @since 1.5 1707 */ 1708 public static int charCount(int codePoint) { 1709 return (codePoint >= 0x10000 ? 2 : 1); 1710 } 1711 1712 /** 1713 * Converts a surrogate pair into a Unicode code point. This method assumes 1714 * that the pair are valid surrogates. If the pair are <i>not</i> valid 1715 * surrogates, then the result is indeterminate. The 1716 * {@link #isSurrogatePair(char, char)} method should be used prior to this 1717 * method to validate the pair. 1718 * 1719 * @param high 1720 * the high surrogate unit. 1721 * @param low 1722 * the low surrogate unit. 1723 * @return the Unicode code point corresponding to the surrogate unit pair. 1724 * @see #isSurrogatePair(char, char) 1725 * @since 1.5 1726 */ 1727 public static int toCodePoint(char high, char low) { 1728 // See RFC 2781, Section 2.2 1729 // http://www.ietf.org/rfc/rfc2781.txt 1730 int h = (high & 0x3FF) << 10; 1731 int l = low & 0x3FF; 1732 return (h | l) + 0x10000; 1733 } 1734 1735 /** 1736 * Returns the code point at {@code index} in the specified sequence of 1737 * character units. If the unit at {@code index} is a high-surrogate unit, 1738 * {@code index + 1} is less than the length of the sequence and the unit at 1739 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1740 * point represented by the pair is returned; otherwise the {@code char} 1741 * value at {@code index} is returned. 1742 * 1743 * @param seq 1744 * the source sequence of {@code char} units. 1745 * @param index 1746 * the position in {@code seq} from which to retrieve the code 1747 * point. 1748 * @return the Unicode code point or {@code char} value at {@code index} in 1749 * {@code seq}. 1750 * @throws NullPointerException 1751 * if {@code seq} is {@code null}. 1752 * @throws IndexOutOfBoundsException 1753 * if the {@code index} is negative or greater than or equal to 1754 * the length of {@code seq}. 1755 * @since 1.5 1756 */ 1757 public static int codePointAt(CharSequence seq, int index) { 1758 if (seq == null) { 1759 throw new NullPointerException("seq == null"); 1760 } 1761 int len = seq.length(); 1762 if (index < 0 || index >= len) { 1763 throw new IndexOutOfBoundsException(); 1764 } 1765 1766 char high = seq.charAt(index++); 1767 if (index >= len) { 1768 return high; 1769 } 1770 char low = seq.charAt(index); 1771 if (isSurrogatePair(high, low)) { 1772 return toCodePoint(high, low); 1773 } 1774 return high; 1775 } 1776 1777 /** 1778 * Returns the code point at {@code index} in the specified array of 1779 * character units. If the unit at {@code index} is a high-surrogate unit, 1780 * {@code index + 1} is less than the length of the array and the unit at 1781 * {@code index + 1} is a low-surrogate unit, then the supplementary code 1782 * point represented by the pair is returned; otherwise the {@code char} 1783 * value at {@code index} is returned. 1784 * 1785 * @param seq 1786 * the source array of {@code char} units. 1787 * @param index 1788 * the position in {@code seq} from which to retrieve the code 1789 * point. 1790 * @return the Unicode code point or {@code char} value at {@code index} in 1791 * {@code seq}. 1792 * @throws NullPointerException 1793 * if {@code seq} is {@code null}. 1794 * @throws IndexOutOfBoundsException 1795 * if the {@code index} is negative or greater than or equal to 1796 * the length of {@code seq}. 1797 * @since 1.5 1798 */ 1799 public static int codePointAt(char[] seq, int index) { 1800 if (seq == null) { 1801 throw new NullPointerException("seq == null"); 1802 } 1803 int len = seq.length; 1804 if (index < 0 || index >= len) { 1805 throw new IndexOutOfBoundsException(); 1806 } 1807 1808 char high = seq[index++]; 1809 if (index >= len) { 1810 return high; 1811 } 1812 char low = seq[index]; 1813 if (isSurrogatePair(high, low)) { 1814 return toCodePoint(high, low); 1815 } 1816 return high; 1817 } 1818 1819 /** 1820 * Returns the code point at {@code index} in the specified array of 1821 * character units, where {@code index} has to be less than {@code limit}. 1822 * If the unit at {@code index} is a high-surrogate unit, {@code index + 1} 1823 * is less than {@code limit} and the unit at {@code index + 1} is a 1824 * low-surrogate unit, then the supplementary code point represented by the 1825 * pair is returned; otherwise the {@code char} value at {@code index} is 1826 * returned. 1827 * 1828 * @param seq 1829 * the source array of {@code char} units. 1830 * @param index 1831 * the position in {@code seq} from which to get the code point. 1832 * @param limit 1833 * the index after the last unit in {@code seq} that can be used. 1834 * @return the Unicode code point or {@code char} value at {@code index} in 1835 * {@code seq}. 1836 * @throws NullPointerException 1837 * if {@code seq} is {@code null}. 1838 * @throws IndexOutOfBoundsException 1839 * if {@code index < 0}, {@code index >= limit}, 1840 * {@code limit < 0} or if {@code limit} is greater than the 1841 * length of {@code seq}. 1842 * @since 1.5 1843 */ 1844 public static int codePointAt(char[] seq, int index, int limit) { 1845 if (index < 0 || index >= limit || limit < 0 || limit > seq.length) { 1846 throw new IndexOutOfBoundsException(); 1847 } 1848 1849 char high = seq[index++]; 1850 if (index >= limit) { 1851 return high; 1852 } 1853 char low = seq[index]; 1854 if (isSurrogatePair(high, low)) { 1855 return toCodePoint(high, low); 1856 } 1857 return high; 1858 } 1859 1860 /** 1861 * Returns the code point that precedes {@code index} in the specified 1862 * sequence of character units. If the unit at {@code index - 1} is a 1863 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1864 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1865 * point represented by the pair is returned; otherwise the {@code char} 1866 * value at {@code index - 1} is returned. 1867 * 1868 * @param seq 1869 * the source sequence of {@code char} units. 1870 * @param index 1871 * the position in {@code seq} following the code 1872 * point that should be returned. 1873 * @return the Unicode code point or {@code char} value before {@code index} 1874 * in {@code seq}. 1875 * @throws NullPointerException 1876 * if {@code seq} is {@code null}. 1877 * @throws IndexOutOfBoundsException 1878 * if the {@code index} is less than 1 or greater than the 1879 * length of {@code seq}. 1880 * @since 1.5 1881 */ 1882 public static int codePointBefore(CharSequence seq, int index) { 1883 if (seq == null) { 1884 throw new NullPointerException("seq == null"); 1885 } 1886 int len = seq.length(); 1887 if (index < 1 || index > len) { 1888 throw new IndexOutOfBoundsException(); 1889 } 1890 1891 char low = seq.charAt(--index); 1892 if (--index < 0) { 1893 return low; 1894 } 1895 char high = seq.charAt(index); 1896 if (isSurrogatePair(high, low)) { 1897 return toCodePoint(high, low); 1898 } 1899 return low; 1900 } 1901 1902 /** 1903 * Returns the code point that precedes {@code index} in the specified 1904 * array of character units. If the unit at {@code index - 1} is a 1905 * low-surrogate unit, {@code index - 2} is not negative and the unit at 1906 * {@code index - 2} is a high-surrogate unit, then the supplementary code 1907 * point represented by the pair is returned; otherwise the {@code char} 1908 * value at {@code index - 1} is returned. 1909 * 1910 * @param seq 1911 * the source array of {@code char} units. 1912 * @param index 1913 * the position in {@code seq} following the code 1914 * point that should be returned. 1915 * @return the Unicode code point or {@code char} value before {@code index} 1916 * in {@code seq}. 1917 * @throws NullPointerException 1918 * if {@code seq} is {@code null}. 1919 * @throws IndexOutOfBoundsException 1920 * if the {@code index} is less than 1 or greater than the 1921 * length of {@code seq}. 1922 * @since 1.5 1923 */ 1924 public static int codePointBefore(char[] seq, int index) { 1925 if (seq == null) { 1926 throw new NullPointerException("seq == null"); 1927 } 1928 int len = seq.length; 1929 if (index < 1 || index > len) { 1930 throw new IndexOutOfBoundsException(); 1931 } 1932 1933 char low = seq[--index]; 1934 if (--index < 0) { 1935 return low; 1936 } 1937 char high = seq[index]; 1938 if (isSurrogatePair(high, low)) { 1939 return toCodePoint(high, low); 1940 } 1941 return low; 1942 } 1943 1944 /** 1945 * Returns the code point that precedes the {@code index} in the specified 1946 * array of character units and is not less than {@code start}. If the unit 1947 * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not 1948 * less than {@code start} and the unit at {@code index - 2} is a 1949 * high-surrogate unit, then the supplementary code point represented by the 1950 * pair is returned; otherwise the {@code char} value at {@code index - 1} 1951 * is returned. 1952 * 1953 * @param seq 1954 * the source array of {@code char} units. 1955 * @param index 1956 * the position in {@code seq} following the code point that 1957 * should be returned. 1958 * @param start 1959 * the index of the first element in {@code seq}. 1960 * @return the Unicode code point or {@code char} value before {@code index} 1961 * in {@code seq}. 1962 * @throws NullPointerException 1963 * if {@code seq} is {@code null}. 1964 * @throws IndexOutOfBoundsException 1965 * if the {@code index <= start}, {@code start < 0}, 1966 * {@code index} is greater than the length of {@code seq}, or 1967 * if {@code start} is equal or greater than the length of 1968 * {@code seq}. 1969 * @since 1.5 1970 */ 1971 public static int codePointBefore(char[] seq, int index, int start) { 1972 if (seq == null) { 1973 throw new NullPointerException("seq == null"); 1974 } 1975 int len = seq.length; 1976 if (index <= start || index > len || start < 0 || start >= len) { 1977 throw new IndexOutOfBoundsException(); 1978 } 1979 1980 char low = seq[--index]; 1981 if (--index < start) { 1982 return low; 1983 } 1984 char high = seq[index]; 1985 if (isSurrogatePair(high, low)) { 1986 return toCodePoint(high, low); 1987 } 1988 return low; 1989 } 1990 1991 /** 1992 * Converts the specified Unicode code point into a UTF-16 encoded sequence 1993 * and copies the value(s) into the char array {@code dst}, starting at 1994 * index {@code dstIndex}. 1995 * 1996 * @param codePoint 1997 * the Unicode code point to encode. 1998 * @param dst 1999 * the destination array to copy the encoded value into. 2000 * @param dstIndex 2001 * the index in {@code dst} from where to start copying. 2002 * @return the number of {@code char} value units copied into {@code dst}. 2003 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2004 * @throws NullPointerException 2005 * if {@code dst} is {@code null}. 2006 * @throws IndexOutOfBoundsException 2007 * if {@code dstIndex} is negative, greater than or equal to 2008 * {@code dst.length} or equals {@code dst.length - 1} when 2009 * {@code codePoint} is a 2010 * {@link #isSupplementaryCodePoint(int) supplementary code point}. 2011 * @since 1.5 2012 */ 2013 public static int toChars(int codePoint, char[] dst, int dstIndex) { 2014 checkValidCodePoint(codePoint); 2015 if (dst == null) { 2016 throw new NullPointerException("dst == null"); 2017 } 2018 if (dstIndex < 0 || dstIndex >= dst.length) { 2019 throw new IndexOutOfBoundsException(); 2020 } 2021 2022 if (isSupplementaryCodePoint(codePoint)) { 2023 if (dstIndex == dst.length - 1) { 2024 throw new IndexOutOfBoundsException(); 2025 } 2026 // See RFC 2781, Section 2.1 2027 // http://www.ietf.org/rfc/rfc2781.txt 2028 int cpPrime = codePoint - 0x10000; 2029 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 2030 int low = 0xDC00 | (cpPrime & 0x3FF); 2031 dst[dstIndex] = (char) high; 2032 dst[dstIndex + 1] = (char) low; 2033 return 2; 2034 } 2035 2036 dst[dstIndex] = (char) codePoint; 2037 return 1; 2038 } 2039 2040 /** 2041 * Converts the specified Unicode code point into a UTF-16 encoded sequence 2042 * and returns it as a char array. 2043 * 2044 * @param codePoint 2045 * the Unicode code point to encode. 2046 * @return the UTF-16 encoded char sequence. If {@code codePoint} is a 2047 * {@link #isSupplementaryCodePoint(int) supplementary code point}, 2048 * then the returned array contains two characters, otherwise it 2049 * contains just one character. 2050 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2051 * @since 1.5 2052 */ 2053 public static char[] toChars(int codePoint) { 2054 checkValidCodePoint(codePoint); 2055 if (isSupplementaryCodePoint(codePoint)) { 2056 int cpPrime = codePoint - 0x10000; 2057 int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); 2058 int low = 0xDC00 | (cpPrime & 0x3FF); 2059 return new char[] { (char) high, (char) low }; 2060 } 2061 return new char[] { (char) codePoint }; 2062 } 2063 2064 /** 2065 * Counts the number of Unicode code points in the subsequence of the 2066 * specified character sequence, as delineated by {@code beginIndex} and 2067 * {@code endIndex}. Any surrogate values with missing pair values will be 2068 * counted as one code point. 2069 * 2070 * @param seq 2071 * the {@code CharSequence} to look through. 2072 * @param beginIndex 2073 * the inclusive index to begin counting at. 2074 * @param endIndex 2075 * the exclusive index to stop counting at. 2076 * @return the number of Unicode code points. 2077 * @throws NullPointerException 2078 * if {@code seq} is {@code null}. 2079 * @throws IndexOutOfBoundsException 2080 * if {@code beginIndex < 0}, {@code beginIndex > endIndex} or 2081 * if {@code endIndex} is greater than the length of {@code seq}. 2082 * @since 1.5 2083 */ 2084 public static int codePointCount(CharSequence seq, int beginIndex, 2085 int endIndex) { 2086 if (seq == null) { 2087 throw new NullPointerException("seq == null"); 2088 } 2089 int len = seq.length(); 2090 if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) { 2091 throw new IndexOutOfBoundsException(); 2092 } 2093 2094 int result = 0; 2095 for (int i = beginIndex; i < endIndex; i++) { 2096 char c = seq.charAt(i); 2097 if (isHighSurrogate(c)) { 2098 if (++i < endIndex) { 2099 c = seq.charAt(i); 2100 if (!isLowSurrogate(c)) { 2101 result++; 2102 } 2103 } 2104 } 2105 result++; 2106 } 2107 return result; 2108 } 2109 2110 /** 2111 * Counts the number of Unicode code points in the subsequence of the 2112 * specified char array, as delineated by {@code offset} and {@code count}. 2113 * Any surrogate values with missing pair values will be counted as one code 2114 * point. 2115 * 2116 * @param seq 2117 * the char array to look through 2118 * @param offset 2119 * the inclusive index to begin counting at. 2120 * @param count 2121 * the number of {@code char} values to look through in 2122 * {@code seq}. 2123 * @return the number of Unicode code points. 2124 * @throws NullPointerException 2125 * if {@code seq} is {@code null}. 2126 * @throws IndexOutOfBoundsException 2127 * if {@code offset < 0}, {@code count < 0} or if 2128 * {@code offset + count} is greater than the length of 2129 * {@code seq}. 2130 * @since 1.5 2131 */ 2132 public static int codePointCount(char[] seq, int offset, int count) { 2133 Arrays.checkOffsetAndCount(seq.length, offset, count); 2134 int endIndex = offset + count; 2135 int result = 0; 2136 for (int i = offset; i < endIndex; i++) { 2137 char c = seq[i]; 2138 if (isHighSurrogate(c)) { 2139 if (++i < endIndex) { 2140 c = seq[i]; 2141 if (!isLowSurrogate(c)) { 2142 result++; 2143 } 2144 } 2145 } 2146 result++; 2147 } 2148 return result; 2149 } 2150 2151 /** 2152 * Determines the index in the specified character sequence that is offset 2153 * {@code codePointOffset} code points from {@code index}. 2154 * 2155 * @param seq 2156 * the character sequence to find the index in. 2157 * @param index 2158 * the start index in {@code seq}. 2159 * @param codePointOffset 2160 * the number of code points to look backwards or forwards; may 2161 * be a negative or positive value. 2162 * @return the index in {@code seq} that is {@code codePointOffset} code 2163 * points away from {@code index}. 2164 * @throws NullPointerException 2165 * if {@code seq} is {@code null}. 2166 * @throws IndexOutOfBoundsException 2167 * if {@code index < 0}, {@code index} is greater than the 2168 * length of {@code seq}, or if there are not enough values in 2169 * {@code seq} to skip {@code codePointOffset} code points 2170 * forwards or backwards (if {@code codePointOffset} is 2171 * negative) from {@code index}. 2172 * @since 1.5 2173 */ 2174 public static int offsetByCodePoints(CharSequence seq, int index, int codePointOffset) { 2175 if (seq == null) { 2176 throw new NullPointerException("seq == null"); 2177 } 2178 int len = seq.length(); 2179 if (index < 0 || index > len) { 2180 throw new IndexOutOfBoundsException(); 2181 } 2182 2183 if (codePointOffset == 0) { 2184 return index; 2185 } 2186 2187 if (codePointOffset > 0) { 2188 int codePoints = codePointOffset; 2189 int i = index; 2190 while (codePoints > 0) { 2191 codePoints--; 2192 if (i >= len) { 2193 throw new IndexOutOfBoundsException(); 2194 } 2195 if (isHighSurrogate(seq.charAt(i))) { 2196 int next = i + 1; 2197 if (next < len && isLowSurrogate(seq.charAt(next))) { 2198 i++; 2199 } 2200 } 2201 i++; 2202 } 2203 return i; 2204 } 2205 2206 int codePoints = -codePointOffset; 2207 int i = index; 2208 while (codePoints > 0) { 2209 codePoints--; 2210 i--; 2211 if (i < 0) { 2212 throw new IndexOutOfBoundsException(); 2213 } 2214 if (isLowSurrogate(seq.charAt(i))) { 2215 int prev = i - 1; 2216 if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) { 2217 i--; 2218 } 2219 } 2220 } 2221 return i; 2222 } 2223 2224 /** 2225 * Determines the index in a subsequence of the specified character array 2226 * that is offset {@code codePointOffset} code points from {@code index}. 2227 * The subsequence is delineated by {@code start} and {@code count}. 2228 * 2229 * @param seq 2230 * the character array to find the index in. 2231 * @param start 2232 * the inclusive index that marks the beginning of the 2233 * subsequence. 2234 * @param count 2235 * the number of {@code char} values to include within the 2236 * subsequence. 2237 * @param index 2238 * the start index in the subsequence of the char array. 2239 * @param codePointOffset 2240 * the number of code points to look backwards or forwards; may 2241 * be a negative or positive value. 2242 * @return the index in {@code seq} that is {@code codePointOffset} code 2243 * points away from {@code index}. 2244 * @throws NullPointerException 2245 * if {@code seq} is {@code null}. 2246 * @throws IndexOutOfBoundsException 2247 * if {@code start < 0}, {@code count < 0}, 2248 * {@code index < start}, {@code index > start + count}, 2249 * {@code start + count} is greater than the length of 2250 * {@code seq}, or if there are not enough values in 2251 * {@code seq} to skip {@code codePointOffset} code points 2252 * forward or backward (if {@code codePointOffset} is 2253 * negative) from {@code index}. 2254 * @since 1.5 2255 */ 2256 public static int offsetByCodePoints(char[] seq, int start, int count, 2257 int index, int codePointOffset) { 2258 Arrays.checkOffsetAndCount(seq.length, start, count); 2259 int end = start + count; 2260 if (index < start || index > end) { 2261 throw new IndexOutOfBoundsException(); 2262 } 2263 2264 if (codePointOffset == 0) { 2265 return index; 2266 } 2267 2268 if (codePointOffset > 0) { 2269 int codePoints = codePointOffset; 2270 int i = index; 2271 while (codePoints > 0) { 2272 codePoints--; 2273 if (i >= end) { 2274 throw new IndexOutOfBoundsException(); 2275 } 2276 if (isHighSurrogate(seq[i])) { 2277 int next = i + 1; 2278 if (next < end && isLowSurrogate(seq[next])) { 2279 i++; 2280 } 2281 } 2282 i++; 2283 } 2284 return i; 2285 } 2286 2287 int codePoints = -codePointOffset; 2288 int i = index; 2289 while (codePoints > 0) { 2290 codePoints--; 2291 i--; 2292 if (i < start) { 2293 throw new IndexOutOfBoundsException(); 2294 } 2295 if (isLowSurrogate(seq[i])) { 2296 int prev = i - 1; 2297 if (prev >= start && isHighSurrogate(seq[prev])) { 2298 i--; 2299 } 2300 } 2301 } 2302 return i; 2303 } 2304 2305 /** 2306 * Convenience method to determine the value of the specified character 2307 * {@code c} in the supplied radix. The value of {@code radix} must be 2308 * between MIN_RADIX and MAX_RADIX. 2309 * 2310 * @param c 2311 * the character to determine the value of. 2312 * @param radix 2313 * the radix. 2314 * @return the value of {@code c} in {@code radix} if {@code radix} lies 2315 * between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise. 2316 */ 2317 public static int digit(char c, int radix) { 2318 return digit((int) c, radix); 2319 } 2320 2321 /** 2322 * Convenience method to determine the value of the character 2323 * {@code codePoint} in the supplied radix. The value of {@code radix} must 2324 * be between MIN_RADIX and MAX_RADIX. 2325 * 2326 * @param codePoint 2327 * the character, including supplementary characters. 2328 * @param radix 2329 * the radix. 2330 * @return if {@code radix} lies between {@link #MIN_RADIX} and 2331 * {@link #MAX_RADIX} then the value of the character in the radix; 2332 * -1 otherwise. 2333 */ 2334 public static int digit(int codePoint, int radix) { 2335 if (radix < MIN_RADIX || radix > MAX_RADIX) { 2336 return -1; 2337 } 2338 if (codePoint < 128) { 2339 // Optimized for ASCII 2340 int result = -1; 2341 if ('0' <= codePoint && codePoint <= '9') { 2342 result = codePoint - '0'; 2343 } else if ('a' <= codePoint && codePoint <= 'z') { 2344 result = 10 + (codePoint - 'a'); 2345 } else if ('A' <= codePoint && codePoint <= 'Z') { 2346 result = 10 + (codePoint - 'A'); 2347 } 2348 return result < radix ? result : -1; 2349 } 2350 return digitImpl(codePoint, radix); 2351 } 2352 2353 private static native int digitImpl(int codePoint, int radix); 2354 2355 /** 2356 * Compares this object with the specified object and indicates if they are 2357 * equal. In order to be equal, {@code object} must be an instance of 2358 * {@code Character} and have the same char value as this object. 2359 * 2360 * @param object 2361 * the object to compare this double with. 2362 * @return {@code true} if the specified object is equal to this 2363 * {@code Character}; {@code false} otherwise. 2364 */ 2365 @Override 2366 public boolean equals(Object object) { 2367 return (object instanceof Character) && (((Character) object).value == value); 2368 } 2369 2370 /** 2371 * Returns the character which represents the specified digit in the 2372 * specified radix. The {@code radix} must be between {@code MIN_RADIX} and 2373 * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and 2374 * smaller than {@code radix}. If any of these conditions does not hold, 0 2375 * is returned. 2376 * 2377 * @param digit 2378 * the integer value. 2379 * @param radix 2380 * the radix. 2381 * @return the character which represents the {@code digit} in the 2382 * {@code radix}. 2383 */ 2384 public static char forDigit(int digit, int radix) { 2385 if (MIN_RADIX <= radix && radix <= MAX_RADIX) { 2386 if (digit >= 0 && digit < radix) { 2387 return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10); 2388 } 2389 } 2390 return 0; 2391 } 2392 2393 /** 2394 * Returns a human-readable name for the given code point, 2395 * or null if the code point is unassigned. 2396 * 2397 * <p>As a fallback mechanism this method returns strings consisting of the Unicode 2398 * block name (with underscores replaced by spaces), a single space, and the uppercase 2399 * hex value of the code point, using as few digits as necessary. 2400 * 2401 * <p>Examples: 2402 * <ul> 2403 * <li>{@code Character.getName(0)} returns "NULL". 2404 * <li>{@code Character.getName('e')} returns "LATIN SMALL LETTER E". 2405 * <li>{@code Character.getName('\u0666')} returns "ARABIC-INDIC DIGIT SIX". 2406 * <li>{@code Character.getName(0xe000)} returns "PRIVATE USE AREA E000". 2407 * </ul> 2408 * 2409 * <p>Note that the exact strings returned will vary from release to release. 2410 * 2411 * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. 2412 * @since 1.7 2413 */ 2414 public static String getName(int codePoint) { 2415 checkValidCodePoint(codePoint); 2416 if (getType(codePoint) == Character.UNASSIGNED) { 2417 return null; 2418 } 2419 String result = getNameImpl(codePoint); 2420 if (result == null) { 2421 String blockName = Character.UnicodeBlock.of(codePoint).toString().replace('_', ' '); 2422 result = blockName + " " + IntegralToString.intToHexString(codePoint, true, 0); 2423 } 2424 return result; 2425 } 2426 2427 private static native String getNameImpl(int codePoint); 2428 2429 /** 2430 * Returns the numeric value of the specified Unicode character. 2431 * See {@link #getNumericValue(int)}. 2432 * 2433 * @param c the character 2434 * @return a non-negative numeric integer value if a numeric value for 2435 * {@code c} exists, -1 if there is no numeric value for {@code c}, 2436 * -2 if the numeric value can not be represented as an integer. 2437 */ 2438 public static int getNumericValue(char c) { 2439 return getNumericValue((int) c); 2440 } 2441 2442 /** 2443 * Gets the numeric value of the specified Unicode code point. For example, 2444 * the code point '\u216B' stands for the Roman number XII, which has the 2445 * numeric value 12. 2446 * 2447 * <p>There are two points of divergence between this method and the Unicode 2448 * specification. This method treats the letters a-z (in both upper and lower 2449 * cases, and their full-width variants) as numbers from 10 to 35. The 2450 * Unicode specification also supports the idea of code points with non-integer 2451 * numeric values; this method does not (except to the extent of returning -2 2452 * for such code points). 2453 * 2454 * @param codePoint the code point 2455 * @return a non-negative numeric integer value if a numeric value for 2456 * {@code codePoint} exists, -1 if there is no numeric value for 2457 * {@code codePoint}, -2 if the numeric value can not be 2458 * represented with an integer. 2459 */ 2460 public static int getNumericValue(int codePoint) { 2461 // This is both an optimization and papers over differences between Java and ICU. 2462 if (codePoint < 128) { 2463 if (codePoint >= '0' && codePoint <= '9') { 2464 return codePoint - '0'; 2465 } 2466 if (codePoint >= 'a' && codePoint <= 'z') { 2467 return codePoint - ('a' - 10); 2468 } 2469 if (codePoint >= 'A' && codePoint <= 'Z') { 2470 return codePoint - ('A' - 10); 2471 } 2472 return -1; 2473 } 2474 // Full-width uppercase A-Z. 2475 if (codePoint >= 0xff21 && codePoint <= 0xff3a) { 2476 return codePoint - 0xff17; 2477 } 2478 // Full-width lowercase a-z. 2479 if (codePoint >= 0xff41 && codePoint <= 0xff5a) { 2480 return codePoint - 0xff37; 2481 } 2482 return getNumericValueImpl(codePoint); 2483 } 2484 2485 private static native int getNumericValueImpl(int codePoint); 2486 2487 /** 2488 * Gets the general Unicode category of the specified character. 2489 * 2490 * @param c 2491 * the character to get the category of. 2492 * @return the Unicode category of {@code c}. 2493 */ 2494 public static int getType(char c) { 2495 return getType((int) c); 2496 } 2497 2498 /** 2499 * Gets the general Unicode category of the specified code point. 2500 * 2501 * @param codePoint 2502 * the Unicode code point to get the category of. 2503 * @return the Unicode category of {@code codePoint}. 2504 */ 2505 public static int getType(int codePoint) { 2506 int type = getTypeImpl(codePoint); 2507 // The type values returned by ICU are not RI-compatible. The RI skips the value 17. 2508 if (type <= Character.FORMAT) { 2509 return type; 2510 } 2511 return (type + 1); 2512 } 2513 2514 private static native int getTypeImpl(int codePoint); 2515 2516 /** 2517 * Gets the Unicode directionality of the specified character. 2518 * 2519 * @param c 2520 * the character to get the directionality of. 2521 * @return the Unicode directionality of {@code c}. 2522 */ 2523 public static byte getDirectionality(char c) { 2524 return getDirectionality((int)c); 2525 } 2526 2527 /** 2528 * Gets the Unicode directionality of the specified character. 2529 * 2530 * @param codePoint 2531 * the Unicode code point to get the directionality of. 2532 * @return the Unicode directionality of {@code codePoint}. 2533 */ 2534 public static byte getDirectionality(int codePoint) { 2535 if (getType(codePoint) == Character.UNASSIGNED) { 2536 return Character.DIRECTIONALITY_UNDEFINED; 2537 } 2538 2539 byte directionality = getDirectionalityImpl(codePoint); 2540 if (directionality == -1) { 2541 return -1; 2542 } 2543 return DIRECTIONALITY[directionality]; 2544 } 2545 2546 private static native byte getDirectionalityImpl(int codePoint); 2547 2548 /** 2549 * Indicates whether the specified character is mirrored. 2550 * 2551 * @param c 2552 * the character to check. 2553 * @return {@code true} if {@code c} is mirrored; {@code false} 2554 * otherwise. 2555 */ 2556 public static boolean isMirrored(char c) { 2557 return isMirrored((int) c); 2558 } 2559 2560 /** 2561 * Indicates whether the specified code point is mirrored. 2562 * 2563 * @param codePoint 2564 * the code point to check. 2565 * @return {@code true} if {@code codePoint} is mirrored, {@code false} 2566 * otherwise. 2567 */ 2568 public static boolean isMirrored(int codePoint) { 2569 return isMirroredImpl(codePoint); 2570 } 2571 2572 private static native boolean isMirroredImpl(int codePoint); 2573 2574 @Override 2575 public int hashCode() { 2576 return value; 2577 } 2578 2579 /** 2580 * Returns the high surrogate for the given code point. The result is meaningless if 2581 * the given code point is not a supplementary character. 2582 * @since 1.7 2583 */ 2584 public static char highSurrogate(int codePoint) { 2585 return (char) ((codePoint >> 10) + 0xd7c0); 2586 } 2587 2588 /** 2589 * Returns the low surrogate for the given code point. The result is meaningless if 2590 * the given code point is not a supplementary character. 2591 * @since 1.7 2592 */ 2593 public static char lowSurrogate(int codePoint) { 2594 return (char) ((codePoint & 0x3ff) | 0xdc00); 2595 } 2596 2597 /** 2598 * Returns true if the given code point is alphabetic. That is, 2599 * if it is in any of the Lu, Ll, Lt, Lm, Lo, Nl, or Other_Alphabetic categories. 2600 * @since 1.7 2601 */ 2602 public static native boolean isAlphabetic(int codePoint); 2603 2604 /** 2605 * Returns true if the given code point is in the Basic Multilingual Plane (BMP). 2606 * Such code points can be represented by a single {@code char}. 2607 * @since 1.7 2608 */ 2609 public static boolean isBmpCodePoint(int codePoint) { 2610 return codePoint >= Character.MIN_VALUE && codePoint <= Character.MAX_VALUE; 2611 } 2612 2613 /** 2614 * Indicates whether the specified character is defined in the Unicode 2615 * specification. 2616 * 2617 * @param c 2618 * the character to check. 2619 * @return {@code true} if the general Unicode category of the character is 2620 * not {@code UNASSIGNED}; {@code false} otherwise. 2621 */ 2622 public static boolean isDefined(char c) { 2623 return isDefinedImpl(c); 2624 } 2625 2626 /** 2627 * Indicates whether the specified code point is defined in the Unicode 2628 * specification. 2629 * 2630 * @param codePoint 2631 * the code point to check. 2632 * @return {@code true} if the general Unicode category of the code point is 2633 * not {@code UNASSIGNED}; {@code false} otherwise. 2634 */ 2635 public static boolean isDefined(int codePoint) { 2636 return isDefinedImpl(codePoint); 2637 } 2638 2639 private static native boolean isDefinedImpl(int codePoint); 2640 2641 /** 2642 * Indicates whether the specified character is a digit. 2643 * 2644 * @param c 2645 * the character to check. 2646 * @return {@code true} if {@code c} is a digit; {@code false} 2647 * otherwise. 2648 */ 2649 public static boolean isDigit(char c) { 2650 return isDigit((int) c); 2651 } 2652 2653 /** 2654 * Indicates whether the specified code point is a digit. 2655 * 2656 * @param codePoint 2657 * the code point to check. 2658 * @return {@code true} if {@code codePoint} is a digit; {@code false} 2659 * otherwise. 2660 */ 2661 public static boolean isDigit(int codePoint) { 2662 // Optimized case for ASCII 2663 if ('0' <= codePoint && codePoint <= '9') { 2664 return true; 2665 } 2666 if (codePoint < 1632) { 2667 return false; 2668 } 2669 return isDigitImpl(codePoint); 2670 } 2671 2672 private static native boolean isDigitImpl(int codePoint); 2673 2674 /** 2675 * Indicates whether the specified character is ignorable in a Java or 2676 * Unicode identifier. 2677 * 2678 * @param c 2679 * the character to check. 2680 * @return {@code true} if {@code c} is ignorable; {@code false} otherwise. 2681 */ 2682 public static boolean isIdentifierIgnorable(char c) { 2683 return isIdentifierIgnorable((int) c); 2684 } 2685 2686 /** 2687 * Returns true if the given code point is a CJKV ideographic character. 2688 * @since 1.7 2689 */ 2690 public static native boolean isIdeographic(int codePoint); 2691 2692 /** 2693 * Indicates whether the specified code point is ignorable in a Java or 2694 * Unicode identifier. 2695 * 2696 * @param codePoint 2697 * the code point to check. 2698 * @return {@code true} if {@code codePoint} is ignorable; {@code false} 2699 * otherwise. 2700 */ 2701 public static boolean isIdentifierIgnorable(int codePoint) { 2702 // This is both an optimization and papers over differences between Java and ICU. 2703 if (codePoint < 0x600) { 2704 return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) || 2705 (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad); 2706 } 2707 return isIdentifierIgnorableImpl(codePoint); 2708 } 2709 2710 private static native boolean isIdentifierIgnorableImpl(int codePoint); 2711 2712 /** 2713 * Indicates whether the specified character is an ISO control character. 2714 * 2715 * @param c 2716 * the character to check. 2717 * @return {@code true} if {@code c} is an ISO control character; 2718 * {@code false} otherwise. 2719 */ 2720 public static boolean isISOControl(char c) { 2721 return isISOControl((int) c); 2722 } 2723 2724 /** 2725 * Indicates whether the specified code point is an ISO control character. 2726 * 2727 * @param c 2728 * the code point to check. 2729 * @return {@code true} if {@code c} is an ISO control character; 2730 * {@code false} otherwise. 2731 */ 2732 public static boolean isISOControl(int c) { 2733 return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f); 2734 } 2735 2736 /** 2737 * Indicates whether the specified character is a valid part of a Java 2738 * identifier other than the first character. 2739 * 2740 * @param c 2741 * the character to check. 2742 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2743 * {@code false} otherwise. 2744 */ 2745 public static boolean isJavaIdentifierPart(char c) { 2746 return isJavaIdentifierPart((int) c); 2747 } 2748 2749 /** 2750 * Indicates whether the specified code point is a valid part of a Java 2751 * identifier other than the first character. 2752 * 2753 * @param codePoint 2754 * the code point to check. 2755 * @return {@code true} if {@code c} is valid as part of a Java identifier; 2756 * {@code false} otherwise. 2757 */ 2758 public static boolean isJavaIdentifierPart(int codePoint) { 2759 // Use precomputed bitmasks to optimize the ASCII range. 2760 if (codePoint < 64) { 2761 return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0; 2762 } else if (codePoint < 128) { 2763 return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 2764 } 2765 int type = getType(codePoint); 2766 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) 2767 || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION 2768 || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) 2769 || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK 2770 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) 2771 || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT; 2772 } 2773 2774 /** 2775 * Indicates whether the specified character is a valid first character for 2776 * a Java identifier. 2777 * 2778 * @param c 2779 * the character to check. 2780 * @return {@code true} if {@code c} is a valid first character of a Java 2781 * identifier; {@code false} otherwise. 2782 */ 2783 public static boolean isJavaIdentifierStart(char c) { 2784 return isJavaIdentifierStart((int) c); 2785 } 2786 2787 /** 2788 * Indicates whether the specified code point is a valid first character for 2789 * a Java identifier. 2790 * 2791 * @param codePoint 2792 * the code point to check. 2793 * @return {@code true} if {@code codePoint} is a valid start of a Java 2794 * identifier; {@code false} otherwise. 2795 */ 2796 public static boolean isJavaIdentifierStart(int codePoint) { 2797 // Use precomputed bitmasks to optimize the ASCII range. 2798 if (codePoint < 64) { 2799 return (codePoint == '$'); // There's only one character in this range. 2800 } else if (codePoint < 128) { 2801 return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 2802 } 2803 int type = getType(codePoint); 2804 return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL 2805 || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER; 2806 } 2807 2808 /** 2809 * Indicates whether the specified character is a Java letter. 2810 * 2811 * @param c 2812 * the character to check. 2813 * @return {@code true} if {@code c} is a Java letter; {@code false} 2814 * otherwise. 2815 * @deprecated Use {@link #isJavaIdentifierStart(char)} instead. 2816 */ 2817 @Deprecated 2818 public static boolean isJavaLetter(char c) { 2819 return isJavaIdentifierStart(c); 2820 } 2821 2822 /** 2823 * Indicates whether the specified character is a Java letter or digit 2824 * character. 2825 * 2826 * @param c 2827 * the character to check. 2828 * @return {@code true} if {@code c} is a Java letter or digit; 2829 * {@code false} otherwise. 2830 * @deprecated Use {@link #isJavaIdentifierPart(char)} instead. 2831 */ 2832 @Deprecated 2833 public static boolean isJavaLetterOrDigit(char c) { 2834 return isJavaIdentifierPart(c); 2835 } 2836 2837 /** 2838 * Indicates whether the specified character is a letter. 2839 * 2840 * @param c 2841 * the character to check. 2842 * @return {@code true} if {@code c} is a letter; {@code false} otherwise. 2843 */ 2844 public static boolean isLetter(char c) { 2845 return isLetter((int) c); 2846 } 2847 2848 /** 2849 * Indicates whether the specified code point is a letter. 2850 * 2851 * @param codePoint 2852 * the code point to check. 2853 * @return {@code true} if {@code codePoint} is a letter; {@code false} 2854 * otherwise. 2855 */ 2856 public static boolean isLetter(int codePoint) { 2857 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 2858 return true; 2859 } 2860 if (codePoint < 128) { 2861 return false; 2862 } 2863 return isLetterImpl(codePoint); 2864 } 2865 2866 private static native boolean isLetterImpl(int codePoint); 2867 2868 /** 2869 * Indicates whether the specified character is a letter or a digit. 2870 * 2871 * @param c 2872 * the character to check. 2873 * @return {@code true} if {@code c} is a letter or a digit; {@code false} 2874 * otherwise. 2875 */ 2876 public static boolean isLetterOrDigit(char c) { 2877 return isLetterOrDigit((int) c); 2878 } 2879 2880 /** 2881 * Indicates whether the specified code point is a letter or a digit. 2882 * 2883 * @param codePoint 2884 * the code point to check. 2885 * @return {@code true} if {@code codePoint} is a letter or a digit; 2886 * {@code false} otherwise. 2887 */ 2888 public static boolean isLetterOrDigit(int codePoint) { 2889 // Optimized case for ASCII 2890 if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { 2891 return true; 2892 } 2893 if ('0' <= codePoint && codePoint <= '9') { 2894 return true; 2895 } 2896 if (codePoint < 128) { 2897 return false; 2898 } 2899 return isLetterOrDigitImpl(codePoint); 2900 } 2901 2902 private static native boolean isLetterOrDigitImpl(int codePoint); 2903 2904 /** 2905 * Indicates whether the specified character is a lower case letter. 2906 * 2907 * @param c 2908 * the character to check. 2909 * @return {@code true} if {@code c} is a lower case letter; {@code false} 2910 * otherwise. 2911 */ 2912 public static boolean isLowerCase(char c) { 2913 return isLowerCase((int) c); 2914 } 2915 2916 /** 2917 * Indicates whether the specified code point is a lower case letter. 2918 * 2919 * @param codePoint 2920 * the code point to check. 2921 * @return {@code true} if {@code codePoint} is a lower case letter; 2922 * {@code false} otherwise. 2923 */ 2924 public static boolean isLowerCase(int codePoint) { 2925 // Optimized case for ASCII 2926 if ('a' <= codePoint && codePoint <= 'z') { 2927 return true; 2928 } 2929 if (codePoint < 128) { 2930 return false; 2931 } 2932 return isLowerCaseImpl(codePoint); 2933 } 2934 2935 private static native boolean isLowerCaseImpl(int codePoint); 2936 2937 /** 2938 * Use {@link #isWhitespace(char)} instead. 2939 * @deprecated Use {@link #isWhitespace(char)} instead. 2940 */ 2941 @Deprecated 2942 public static boolean isSpace(char c) { 2943 return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' '; 2944 } 2945 2946 /** 2947 * See {@link #isSpaceChar(int)}. 2948 */ 2949 public static boolean isSpaceChar(char c) { 2950 return isSpaceChar((int) c); 2951 } 2952 2953 /** 2954 * Returns true if the given code point is a Unicode space character. 2955 * The exact set of characters considered as whitespace varies with Unicode version. 2956 * Note that non-breaking spaces are considered whitespace. 2957 * Note also that line separators are not considered whitespace; see {@link #isWhitespace} 2958 * for an alternative. 2959 */ 2960 public static boolean isSpaceChar(int codePoint) { 2961 // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that. 2962 // SPACE or NO-BREAK SPACE? 2963 if (codePoint == 0x20 || codePoint == 0xa0) { 2964 return true; 2965 } 2966 if (codePoint < 0x1000) { 2967 return false; 2968 } 2969 // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR? 2970 if (codePoint == 0x1680 || codePoint == 0x180e) { 2971 return true; 2972 } 2973 if (codePoint < 0x2000) { 2974 return false; 2975 } 2976 if (codePoint <= 0xffff) { 2977 // Other whitespace from General Punctuation... 2978 return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f || 2979 codePoint == 0x3000; // ...or CJK Symbols and Punctuation? 2980 } 2981 // Let icu4c worry about non-BMP code points. 2982 return isSpaceCharImpl(codePoint); 2983 } 2984 2985 private static native boolean isSpaceCharImpl(int codePoint); 2986 2987 /** 2988 * Indicates whether the specified character is a titlecase character. 2989 * 2990 * @param c 2991 * the character to check. 2992 * @return {@code true} if {@code c} is a titlecase character, {@code false} 2993 * otherwise. 2994 */ 2995 public static boolean isTitleCase(char c) { 2996 return isTitleCaseImpl(c); 2997 } 2998 2999 /** 3000 * Indicates whether the specified code point is a titlecase character. 3001 * 3002 * @param codePoint 3003 * the code point to check. 3004 * @return {@code true} if {@code codePoint} is a titlecase character, 3005 * {@code false} otherwise. 3006 */ 3007 public static boolean isTitleCase(int codePoint) { 3008 return isTitleCaseImpl(codePoint); 3009 } 3010 3011 private static native boolean isTitleCaseImpl(int codePoint); 3012 3013 /** 3014 * Indicates whether the specified character is valid as part of a Unicode 3015 * identifier other than the first character. 3016 * 3017 * @param c 3018 * the character to check. 3019 * @return {@code true} if {@code c} is valid as part of a Unicode 3020 * identifier; {@code false} otherwise. 3021 */ 3022 public static boolean isUnicodeIdentifierPart(char c) { 3023 return isUnicodeIdentifierPartImpl(c); 3024 } 3025 3026 /** 3027 * Indicates whether the specified code point is valid as part of a Unicode 3028 * identifier other than the first character. 3029 * 3030 * @param codePoint 3031 * the code point to check. 3032 * @return {@code true} if {@code codePoint} is valid as part of a Unicode 3033 * identifier; {@code false} otherwise. 3034 */ 3035 public static boolean isUnicodeIdentifierPart(int codePoint) { 3036 return isUnicodeIdentifierPartImpl(codePoint); 3037 } 3038 3039 private static native boolean isUnicodeIdentifierPartImpl(int codePoint); 3040 3041 /** 3042 * Indicates whether the specified character is a valid initial character 3043 * for a Unicode identifier. 3044 * 3045 * @param c 3046 * the character to check. 3047 * @return {@code true} if {@code c} is a valid first character for a 3048 * Unicode identifier; {@code false} otherwise. 3049 */ 3050 public static boolean isUnicodeIdentifierStart(char c) { 3051 return isUnicodeIdentifierStartImpl(c); 3052 } 3053 3054 /** 3055 * Indicates whether the specified code point is a valid initial character 3056 * for a Unicode identifier. 3057 * 3058 * @param codePoint 3059 * the code point to check. 3060 * @return {@code true} if {@code codePoint} is a valid first character for 3061 * a Unicode identifier; {@code false} otherwise. 3062 */ 3063 public static boolean isUnicodeIdentifierStart(int codePoint) { 3064 return isUnicodeIdentifierStartImpl(codePoint); 3065 } 3066 3067 private static native boolean isUnicodeIdentifierStartImpl(int codePoint); 3068 3069 /** 3070 * Indicates whether the specified character is an upper case letter. 3071 * 3072 * @param c 3073 * the character to check. 3074 * @return {@code true} if {@code c} is a upper case letter; {@code false} 3075 * otherwise. 3076 */ 3077 public static boolean isUpperCase(char c) { 3078 return isUpperCase((int) c); 3079 } 3080 3081 /** 3082 * Indicates whether the specified code point is an upper case letter. 3083 * 3084 * @param codePoint 3085 * the code point to check. 3086 * @return {@code true} if {@code codePoint} is a upper case letter; 3087 * {@code false} otherwise. 3088 */ 3089 public static boolean isUpperCase(int codePoint) { 3090 // Optimized case for ASCII 3091 if ('A' <= codePoint && codePoint <= 'Z') { 3092 return true; 3093 } 3094 if (codePoint < 128) { 3095 return false; 3096 } 3097 return isUpperCaseImpl(codePoint); 3098 } 3099 3100 private static native boolean isUpperCaseImpl(int codePoint); 3101 3102 /** 3103 * See {@link #isWhitespace(int)}. 3104 */ 3105 public static boolean isWhitespace(char c) { 3106 return isWhitespace((int) c); 3107 } 3108 3109 /** 3110 * Returns true if the given code point is a Unicode whitespace character. 3111 * The exact set of characters considered as whitespace varies with Unicode version. 3112 * Note that non-breaking spaces are not considered whitespace. 3113 * Note also that line separators are considered whitespace; see {@link #isSpaceChar} 3114 * for an alternative. 3115 */ 3116 public static boolean isWhitespace(int codePoint) { 3117 // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that. 3118 // Any ASCII whitespace character? 3119 if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) { 3120 return true; 3121 } 3122 if (codePoint < 0x1000) { 3123 return false; 3124 } 3125 // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR? 3126 if (codePoint == 0x1680 || codePoint == 0x180e) { 3127 return true; 3128 } 3129 if (codePoint < 0x2000) { 3130 return false; 3131 } 3132 // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE). 3133 if (codePoint == 0x2007 || codePoint == 0x202f) { 3134 return false; 3135 } 3136 if (codePoint <= 0xffff) { 3137 // Other whitespace from General Punctuation... 3138 return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f || 3139 codePoint == 0x3000; // ...or CJK Symbols and Punctuation? 3140 } 3141 // Let icu4c worry about non-BMP code points. 3142 return isWhitespaceImpl(codePoint); 3143 } 3144 3145 private static native boolean isWhitespaceImpl(int codePoint); 3146 3147 /** 3148 * Reverses the order of the first and second byte in the specified 3149 * character. 3150 * 3151 * @param c 3152 * the character to reverse. 3153 * @return the character with reordered bytes. 3154 */ 3155 public static char reverseBytes(char c) { 3156 return (char)((c<<8) | (c>>8)); 3157 } 3158 3159 /** 3160 * Returns the lower case equivalent for the specified character if the 3161 * character is an upper case letter. Otherwise, the specified character is 3162 * returned unchanged. 3163 * 3164 * @param c 3165 * the character 3166 * @return if {@code c} is an upper case character then its lower case 3167 * counterpart, otherwise just {@code c}. 3168 */ 3169 public static char toLowerCase(char c) { 3170 return (char) toLowerCase((int) c); 3171 } 3172 3173 /** 3174 * Returns the lower case equivalent for the specified code point if it is 3175 * an upper case letter. Otherwise, the specified code point is returned 3176 * unchanged. 3177 * 3178 * @param codePoint 3179 * the code point to check. 3180 * @return if {@code codePoint} is an upper case character then its lower 3181 * case counterpart, otherwise just {@code codePoint}. 3182 */ 3183 public static int toLowerCase(int codePoint) { 3184 // Optimized case for ASCII 3185 if ('A' <= codePoint && codePoint <= 'Z') { 3186 return (char) (codePoint + ('a' - 'A')); 3187 } 3188 if (codePoint < 192) { 3189 return codePoint; 3190 } 3191 return toLowerCaseImpl(codePoint); 3192 } 3193 3194 private static native int toLowerCaseImpl(int codePoint); 3195 3196 @Override 3197 public String toString() { 3198 return String.valueOf(value); 3199 } 3200 3201 /** 3202 * Converts the specified character to its string representation. 3203 * 3204 * @param value 3205 * the character to convert. 3206 * @return the character converted to a string. 3207 */ 3208 public static String toString(char value) { 3209 return String.valueOf(value); 3210 } 3211 3212 /** 3213 * Returns the title case equivalent for the specified character if it 3214 * exists. Otherwise, the specified character is returned unchanged. 3215 * 3216 * @param c 3217 * the character to convert. 3218 * @return the title case equivalent of {@code c} if it exists, otherwise 3219 * {@code c}. 3220 */ 3221 public static char toTitleCase(char c) { 3222 return (char) toTitleCaseImpl(c); 3223 } 3224 3225 /** 3226 * Returns the title case equivalent for the specified code point if it 3227 * exists. Otherwise, the specified code point is returned unchanged. 3228 * 3229 * @param codePoint 3230 * the code point to convert. 3231 * @return the title case equivalent of {@code codePoint} if it exists, 3232 * otherwise {@code codePoint}. 3233 */ 3234 public static int toTitleCase(int codePoint) { 3235 return toTitleCaseImpl(codePoint); 3236 } 3237 3238 private static native int toTitleCaseImpl(int codePoint); 3239 3240 /** 3241 * Returns the upper case equivalent for the specified character if the 3242 * character is a lower case letter. Otherwise, the specified character is 3243 * returned unchanged. 3244 * 3245 * @param c 3246 * the character to convert. 3247 * @return if {@code c} is a lower case character then its upper case 3248 * counterpart, otherwise just {@code c}. 3249 */ 3250 public static char toUpperCase(char c) { 3251 return (char) toUpperCase((int) c); 3252 } 3253 3254 /** 3255 * Returns the upper case equivalent for the specified code point if the 3256 * code point is a lower case letter. Otherwise, the specified code point is 3257 * returned unchanged. 3258 * 3259 * @param codePoint 3260 * the code point to convert. 3261 * @return if {@code codePoint} is a lower case character then its upper 3262 * case counterpart, otherwise just {@code codePoint}. 3263 */ 3264 public static int toUpperCase(int codePoint) { 3265 // Optimized case for ASCII 3266 if ('a' <= codePoint && codePoint <= 'z') { 3267 return (char) (codePoint - ('a' - 'A')); 3268 } 3269 if (codePoint < 181) { 3270 return codePoint; 3271 } 3272 return toUpperCaseImpl(codePoint); 3273 } 3274 3275 private static native int toUpperCaseImpl(int codePoint); 3276} 3277