1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html#License 3/** 4 ******************************************************************************* 5 * Copyright (C) 1996-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10package com.ibm.icu.lang; 11 12import java.lang.ref.SoftReference; 13import java.util.HashMap; 14import java.util.Iterator; 15import java.util.Locale; 16import java.util.Map; 17 18import com.ibm.icu.impl.CaseMapImpl; 19import com.ibm.icu.impl.IllegalIcuArgumentException; 20import com.ibm.icu.impl.Trie2; 21import com.ibm.icu.impl.UBiDiProps; 22import com.ibm.icu.impl.UCaseProps; 23import com.ibm.icu.impl.UCharacterName; 24import com.ibm.icu.impl.UCharacterNameChoice; 25import com.ibm.icu.impl.UCharacterProperty; 26import com.ibm.icu.impl.UCharacterUtility; 27import com.ibm.icu.impl.UPropertyAliases; 28import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; 29import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection; 30import com.ibm.icu.text.BreakIterator; 31import com.ibm.icu.text.Normalizer2; 32import com.ibm.icu.util.RangeValueIterator; 33import com.ibm.icu.util.ULocale; 34import com.ibm.icu.util.ValueIterator; 35import com.ibm.icu.util.VersionInfo; 36 37/** 38 * {@icuenhanced java.lang.Character}.{@icu _usage_} 39 * 40 * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class. 41 * These extensions provide support for more Unicode properties. 42 * Each ICU release supports the latest version of Unicode available at that time. 43 * 44 * <p>For some time before Java 5 added support for supplementary Unicode code points, 45 * The ICU UCharacter class and many other ICU classes already supported them. 46 * Some UCharacter methods and constants were widened slightly differently than 47 * how the Character class methods and constants were widened later. 48 * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF, 49 * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF. 50 * 51 * <p>Code points are represented in these API using ints. While it would be 52 * more convenient in Java to have a separate primitive datatype for them, 53 * ints suffice in the meantime. 54 * 55 * <p>To use this class please add the jar file name icu4j.jar to the 56 * class path, since it contains data files which supply the information used 57 * by this file.<br> 58 * E.g. In Windows <br> 59 * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br> 60 * Otherwise, another method would be to copy the files uprops.dat and 61 * unames.icu from the icu4j source subdirectory 62 * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory 63 * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>. 64 * 65 * <p>Aside from the additions for UTF-16 support, and the updated Unicode 66 * properties, the main differences between UCharacter and Character are: 67 * <ul> 68 * <li> UCharacter is not designed to be a char wrapper and does not have 69 * APIs to which involves management of that single char.<br> 70 * These include: 71 * <ul> 72 * <li> char charValue(), 73 * <li> int compareTo(java.lang.Character, java.lang.Character), etc. 74 * </ul> 75 * <li> UCharacter does not include Character APIs that are deprecated, nor 76 * does it include the Java-specific character information, such as 77 * boolean isJavaIdentifierPart(char ch). 78 * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric 79 * values '10' - '35'. UCharacter also does this in digit and 80 * getNumericValue, to adhere to the java semantics of these 81 * methods. New methods unicodeDigit, and 82 * getUnicodeNumericValue do not treat the above code points 83 * as having numeric values. This is a semantic change from ICU4J 1.3.1. 84 * </ul> 85 * <p> 86 * Further detail on differences can be determined using the program 87 * <a href= 88 * "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java"> 89 * com.ibm.icu.dev.test.lang.UCharacterCompare</a> 90 * <p> 91 * In addition to Java compatibility functions, which calculate derived properties, 92 * this API provides low-level access to the Unicode Character Database. 93 * <p> 94 * Unicode assigns each code point (not just assigned character) values for 95 * many properties. 96 * Most of them are simple boolean flags, or constants from a small enumerated list. 97 * For some properties, values are strings or other relatively more complex types. 98 * <p> 99 * For more information see 100 * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a> 101 * (http://www.unicode.org/ucd/) 102 * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU 103 * User Guide chapter on Properties</a> 104 * (http://www.icu-project.org/userguide/properties.html). 105 * <p> 106 * There are also functions that provide easy migration from C/POSIX functions 107 * like isblank(). Their use is generally discouraged because the C/POSIX 108 * standards do not define their semantics beyond the ASCII range, which means 109 * that different implementations exhibit very different behavior. 110 * Instead, Unicode properties should be used directly. 111 * <p> 112 * There are also only a few, broad C/POSIX character classes, and they tend 113 * to be used for conflicting purposes. For example, the "isalpha()" class 114 * is sometimes used to determine word boundaries, while a more sophisticated 115 * approach would at least distinguish initial letters from continuation 116 * characters (the latter including combining marks). 117 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) 118 * Another example: There is no "istitle()" class for titlecase characters. 119 * <p> 120 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. 121 * ICU implements them according to the Standard Recommendations in 122 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions 123 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). 124 * <p> 125 * API access for C/POSIX character classes is as follows: 126 * <pre>{@code 127 * - alpha: isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC) 128 * - lower: isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE) 129 * - upper: isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE) 130 * - punct: ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)| 131 * (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)| 132 * (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0 133 * - digit: isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER 134 * - xdigit: hasBinaryProperty(c, UProperty.POSIX_XDIGIT) 135 * - alnum: hasBinaryProperty(c, UProperty.POSIX_ALNUM) 136 * - space: isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE) 137 * - blank: hasBinaryProperty(c, UProperty.POSIX_BLANK) 138 * - cntrl: getType(c)==CONTROL 139 * - graph: hasBinaryProperty(c, UProperty.POSIX_GRAPH) 140 * - print: hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre> 141 * <p> 142 * The C/POSIX character classes are also available in UnicodeSet patterns, 143 * using patterns like [:graph:] or \p{graph}. 144 * 145 * <p>{@icunote} There are several ICU (and Java) whitespace functions. 146 * Comparison:<ul> 147 * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; 148 * most of general categories "Z" (separators) + most whitespace ISO controls 149 * (including no-break spaces, but excluding IS1..IS4 and ZWSP) 150 * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces 151 * <li> isSpaceChar: just Z (including no-break spaces)</ul> 152 * 153 * <p> 154 * This class is not subclassable. 155 * 156 * @author Syn Wee Quek 157 * @stable ICU 2.1 158 * @see com.ibm.icu.lang.UCharacterEnums 159 */ 160 161public final class UCharacter implements ECharacterCategory, ECharacterDirection 162{ 163 // public inner classes ---------------------------------------------- 164 165 /** 166 * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_} 167 * 168 * A family of character subsets representing the character blocks in the 169 * Unicode specification, generated from Unicode Data file Blocks.txt. 170 * Character blocks generally define characters used for a specific script 171 * or purpose. A character is contained by at most one Unicode block. 172 * 173 * {@icunote} All fields named XXX_ID are specific to ICU. 174 * 175 * @stable ICU 2.4 176 */ 177 public static final class UnicodeBlock extends Character.Subset 178 { 179 // block id corresponding to icu4c ----------------------------------- 180 181 /** 182 * @stable ICU 2.4 183 */ 184 public static final int INVALID_CODE_ID = -1; 185 /** 186 * @stable ICU 2.4 187 */ 188 public static final int BASIC_LATIN_ID = 1; 189 /** 190 * @stable ICU 2.4 191 */ 192 public static final int LATIN_1_SUPPLEMENT_ID = 2; 193 /** 194 * @stable ICU 2.4 195 */ 196 public static final int LATIN_EXTENDED_A_ID = 3; 197 /** 198 * @stable ICU 2.4 199 */ 200 public static final int LATIN_EXTENDED_B_ID = 4; 201 /** 202 * @stable ICU 2.4 203 */ 204 public static final int IPA_EXTENSIONS_ID = 5; 205 /** 206 * @stable ICU 2.4 207 */ 208 public static final int SPACING_MODIFIER_LETTERS_ID = 6; 209 /** 210 * @stable ICU 2.4 211 */ 212 public static final int COMBINING_DIACRITICAL_MARKS_ID = 7; 213 /** 214 * Unicode 3.2 renames this block to "Greek and Coptic". 215 * @stable ICU 2.4 216 */ 217 public static final int GREEK_ID = 8; 218 /** 219 * @stable ICU 2.4 220 */ 221 public static final int CYRILLIC_ID = 9; 222 /** 223 * @stable ICU 2.4 224 */ 225 public static final int ARMENIAN_ID = 10; 226 /** 227 * @stable ICU 2.4 228 */ 229 public static final int HEBREW_ID = 11; 230 /** 231 * @stable ICU 2.4 232 */ 233 public static final int ARABIC_ID = 12; 234 /** 235 * @stable ICU 2.4 236 */ 237 public static final int SYRIAC_ID = 13; 238 /** 239 * @stable ICU 2.4 240 */ 241 public static final int THAANA_ID = 14; 242 /** 243 * @stable ICU 2.4 244 */ 245 public static final int DEVANAGARI_ID = 15; 246 /** 247 * @stable ICU 2.4 248 */ 249 public static final int BENGALI_ID = 16; 250 /** 251 * @stable ICU 2.4 252 */ 253 public static final int GURMUKHI_ID = 17; 254 /** 255 * @stable ICU 2.4 256 */ 257 public static final int GUJARATI_ID = 18; 258 /** 259 * @stable ICU 2.4 260 */ 261 public static final int ORIYA_ID = 19; 262 /** 263 * @stable ICU 2.4 264 */ 265 public static final int TAMIL_ID = 20; 266 /** 267 * @stable ICU 2.4 268 */ 269 public static final int TELUGU_ID = 21; 270 /** 271 * @stable ICU 2.4 272 */ 273 public static final int KANNADA_ID = 22; 274 /** 275 * @stable ICU 2.4 276 */ 277 public static final int MALAYALAM_ID = 23; 278 /** 279 * @stable ICU 2.4 280 */ 281 public static final int SINHALA_ID = 24; 282 /** 283 * @stable ICU 2.4 284 */ 285 public static final int THAI_ID = 25; 286 /** 287 * @stable ICU 2.4 288 */ 289 public static final int LAO_ID = 26; 290 /** 291 * @stable ICU 2.4 292 */ 293 public static final int TIBETAN_ID = 27; 294 /** 295 * @stable ICU 2.4 296 */ 297 public static final int MYANMAR_ID = 28; 298 /** 299 * @stable ICU 2.4 300 */ 301 public static final int GEORGIAN_ID = 29; 302 /** 303 * @stable ICU 2.4 304 */ 305 public static final int HANGUL_JAMO_ID = 30; 306 /** 307 * @stable ICU 2.4 308 */ 309 public static final int ETHIOPIC_ID = 31; 310 /** 311 * @stable ICU 2.4 312 */ 313 public static final int CHEROKEE_ID = 32; 314 /** 315 * @stable ICU 2.4 316 */ 317 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33; 318 /** 319 * @stable ICU 2.4 320 */ 321 public static final int OGHAM_ID = 34; 322 /** 323 * @stable ICU 2.4 324 */ 325 public static final int RUNIC_ID = 35; 326 /** 327 * @stable ICU 2.4 328 */ 329 public static final int KHMER_ID = 36; 330 /** 331 * @stable ICU 2.4 332 */ 333 public static final int MONGOLIAN_ID = 37; 334 /** 335 * @stable ICU 2.4 336 */ 337 public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38; 338 /** 339 * @stable ICU 2.4 340 */ 341 public static final int GREEK_EXTENDED_ID = 39; 342 /** 343 * @stable ICU 2.4 344 */ 345 public static final int GENERAL_PUNCTUATION_ID = 40; 346 /** 347 * @stable ICU 2.4 348 */ 349 public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41; 350 /** 351 * @stable ICU 2.4 352 */ 353 public static final int CURRENCY_SYMBOLS_ID = 42; 354 /** 355 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 356 * Symbols". 357 * @stable ICU 2.4 358 */ 359 public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43; 360 /** 361 * @stable ICU 2.4 362 */ 363 public static final int LETTERLIKE_SYMBOLS_ID = 44; 364 /** 365 * @stable ICU 2.4 366 */ 367 public static final int NUMBER_FORMS_ID = 45; 368 /** 369 * @stable ICU 2.4 370 */ 371 public static final int ARROWS_ID = 46; 372 /** 373 * @stable ICU 2.4 374 */ 375 public static final int MATHEMATICAL_OPERATORS_ID = 47; 376 /** 377 * @stable ICU 2.4 378 */ 379 public static final int MISCELLANEOUS_TECHNICAL_ID = 48; 380 /** 381 * @stable ICU 2.4 382 */ 383 public static final int CONTROL_PICTURES_ID = 49; 384 /** 385 * @stable ICU 2.4 386 */ 387 public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50; 388 /** 389 * @stable ICU 2.4 390 */ 391 public static final int ENCLOSED_ALPHANUMERICS_ID = 51; 392 /** 393 * @stable ICU 2.4 394 */ 395 public static final int BOX_DRAWING_ID = 52; 396 /** 397 * @stable ICU 2.4 398 */ 399 public static final int BLOCK_ELEMENTS_ID = 53; 400 /** 401 * @stable ICU 2.4 402 */ 403 public static final int GEOMETRIC_SHAPES_ID = 54; 404 /** 405 * @stable ICU 2.4 406 */ 407 public static final int MISCELLANEOUS_SYMBOLS_ID = 55; 408 /** 409 * @stable ICU 2.4 410 */ 411 public static final int DINGBATS_ID = 56; 412 /** 413 * @stable ICU 2.4 414 */ 415 public static final int BRAILLE_PATTERNS_ID = 57; 416 /** 417 * @stable ICU 2.4 418 */ 419 public static final int CJK_RADICALS_SUPPLEMENT_ID = 58; 420 /** 421 * @stable ICU 2.4 422 */ 423 public static final int KANGXI_RADICALS_ID = 59; 424 /** 425 * @stable ICU 2.4 426 */ 427 public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60; 428 /** 429 * @stable ICU 2.4 430 */ 431 public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61; 432 /** 433 * @stable ICU 2.4 434 */ 435 public static final int HIRAGANA_ID = 62; 436 /** 437 * @stable ICU 2.4 438 */ 439 public static final int KATAKANA_ID = 63; 440 /** 441 * @stable ICU 2.4 442 */ 443 public static final int BOPOMOFO_ID = 64; 444 /** 445 * @stable ICU 2.4 446 */ 447 public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65; 448 /** 449 * @stable ICU 2.4 450 */ 451 public static final int KANBUN_ID = 66; 452 /** 453 * @stable ICU 2.4 454 */ 455 public static final int BOPOMOFO_EXTENDED_ID = 67; 456 /** 457 * @stable ICU 2.4 458 */ 459 public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68; 460 /** 461 * @stable ICU 2.4 462 */ 463 public static final int CJK_COMPATIBILITY_ID = 69; 464 /** 465 * @stable ICU 2.4 466 */ 467 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70; 468 /** 469 * @stable ICU 2.4 470 */ 471 public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71; 472 /** 473 * @stable ICU 2.4 474 */ 475 public static final int YI_SYLLABLES_ID = 72; 476 /** 477 * @stable ICU 2.4 478 */ 479 public static final int YI_RADICALS_ID = 73; 480 /** 481 * @stable ICU 2.4 482 */ 483 public static final int HANGUL_SYLLABLES_ID = 74; 484 /** 485 * @stable ICU 2.4 486 */ 487 public static final int HIGH_SURROGATES_ID = 75; 488 /** 489 * @stable ICU 2.4 490 */ 491 public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76; 492 /** 493 * @stable ICU 2.4 494 */ 495 public static final int LOW_SURROGATES_ID = 77; 496 /** 497 * Same as public static final int PRIVATE_USE. 498 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 499 * and multiple code point ranges had this block. 500 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 501 * and adds separate blocks for the supplementary PUAs. 502 * @stable ICU 2.4 503 */ 504 public static final int PRIVATE_USE_AREA_ID = 78; 505 /** 506 * Same as public static final int PRIVATE_USE_AREA. 507 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 508 * and multiple code point ranges had this block. 509 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 510 * and adds separate blocks for the supplementary PUAs. 511 * @stable ICU 2.4 512 */ 513 public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID; 514 /** 515 * @stable ICU 2.4 516 */ 517 public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79; 518 /** 519 * @stable ICU 2.4 520 */ 521 public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80; 522 /** 523 * @stable ICU 2.4 524 */ 525 public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81; 526 /** 527 * @stable ICU 2.4 528 */ 529 public static final int COMBINING_HALF_MARKS_ID = 82; 530 /** 531 * @stable ICU 2.4 532 */ 533 public static final int CJK_COMPATIBILITY_FORMS_ID = 83; 534 /** 535 * @stable ICU 2.4 536 */ 537 public static final int SMALL_FORM_VARIANTS_ID = 84; 538 /** 539 * @stable ICU 2.4 540 */ 541 public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85; 542 /** 543 * @stable ICU 2.4 544 */ 545 public static final int SPECIALS_ID = 86; 546 /** 547 * @stable ICU 2.4 548 */ 549 public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87; 550 /** 551 * @stable ICU 2.4 552 */ 553 public static final int OLD_ITALIC_ID = 88; 554 /** 555 * @stable ICU 2.4 556 */ 557 public static final int GOTHIC_ID = 89; 558 /** 559 * @stable ICU 2.4 560 */ 561 public static final int DESERET_ID = 90; 562 /** 563 * @stable ICU 2.4 564 */ 565 public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91; 566 /** 567 * @stable ICU 2.4 568 */ 569 public static final int MUSICAL_SYMBOLS_ID = 92; 570 /** 571 * @stable ICU 2.4 572 */ 573 public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93; 574 /** 575 * @stable ICU 2.4 576 */ 577 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94; 578 /** 579 * @stable ICU 2.4 580 */ 581 public static final int 582 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95; 583 /** 584 * @stable ICU 2.4 585 */ 586 public static final int TAGS_ID = 96; 587 588 // New blocks in Unicode 3.2 589 590 /** 591 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 592 * @stable ICU 2.4 593 */ 594 public static final int CYRILLIC_SUPPLEMENTARY_ID = 97; 595 /** 596 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 597 * @stable ICU 3.0 598 */ 599 600 public static final int CYRILLIC_SUPPLEMENT_ID = 97; 601 /** 602 * @stable ICU 2.4 603 */ 604 public static final int TAGALOG_ID = 98; 605 /** 606 * @stable ICU 2.4 607 */ 608 public static final int HANUNOO_ID = 99; 609 /** 610 * @stable ICU 2.4 611 */ 612 public static final int BUHID_ID = 100; 613 /** 614 * @stable ICU 2.4 615 */ 616 public static final int TAGBANWA_ID = 101; 617 /** 618 * @stable ICU 2.4 619 */ 620 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102; 621 /** 622 * @stable ICU 2.4 623 */ 624 public static final int SUPPLEMENTAL_ARROWS_A_ID = 103; 625 /** 626 * @stable ICU 2.4 627 */ 628 public static final int SUPPLEMENTAL_ARROWS_B_ID = 104; 629 /** 630 * @stable ICU 2.4 631 */ 632 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105; 633 /** 634 * @stable ICU 2.4 635 */ 636 public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106; 637 /** 638 * @stable ICU 2.4 639 */ 640 public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107; 641 /** 642 * @stable ICU 2.4 643 */ 644 public static final int VARIATION_SELECTORS_ID = 108; 645 /** 646 * @stable ICU 2.4 647 */ 648 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109; 649 /** 650 * @stable ICU 2.4 651 */ 652 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110; 653 654 /** 655 * @stable ICU 2.6 656 */ 657 public static final int LIMBU_ID = 111; /*[1900]*/ 658 /** 659 * @stable ICU 2.6 660 */ 661 public static final int TAI_LE_ID = 112; /*[1950]*/ 662 /** 663 * @stable ICU 2.6 664 */ 665 public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/ 666 /** 667 * @stable ICU 2.6 668 */ 669 public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/ 670 /** 671 * @stable ICU 2.6 672 */ 673 public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/ 674 /** 675 * @stable ICU 2.6 676 */ 677 public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/ 678 /** 679 * @stable ICU 2.6 680 */ 681 public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/ 682 /** 683 * @stable ICU 2.6 684 */ 685 public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/ 686 /** 687 * @stable ICU 2.6 688 */ 689 public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/ 690 /** 691 * @stable ICU 2.6 692 */ 693 public static final int UGARITIC_ID = 120; /*[10380]*/ 694 /** 695 * @stable ICU 2.6 696 */ 697 public static final int SHAVIAN_ID = 121; /*[10450]*/ 698 /** 699 * @stable ICU 2.6 700 */ 701 public static final int OSMANYA_ID = 122; /*[10480]*/ 702 /** 703 * @stable ICU 2.6 704 */ 705 public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/ 706 /** 707 * @stable ICU 2.6 708 */ 709 public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/ 710 /** 711 * @stable ICU 2.6 712 */ 713 public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/ 714 715 /* New blocks in Unicode 4.1 */ 716 717 /** 718 * @stable ICU 3.4 719 */ 720 public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/ 721 722 /** 723 * @stable ICU 3.4 724 */ 725 public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/ 726 727 /** 728 * @stable ICU 3.4 729 */ 730 public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/ 731 732 /** 733 * @stable ICU 3.4 734 */ 735 public static final int BUGINESE_ID = 129; /*[1A00]*/ 736 737 /** 738 * @stable ICU 3.4 739 */ 740 public static final int CJK_STROKES_ID = 130; /*[31C0]*/ 741 742 /** 743 * @stable ICU 3.4 744 */ 745 public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/ 746 747 /** 748 * @stable ICU 3.4 749 */ 750 public static final int COPTIC_ID = 132; /*[2C80]*/ 751 752 /** 753 * @stable ICU 3.4 754 */ 755 public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/ 756 757 /** 758 * @stable ICU 3.4 759 */ 760 public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/ 761 762 /** 763 * @stable ICU 3.4 764 */ 765 public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/ 766 767 /** 768 * @stable ICU 3.4 769 */ 770 public static final int GLAGOLITIC_ID = 136; /*[2C00]*/ 771 772 /** 773 * @stable ICU 3.4 774 */ 775 public static final int KHAROSHTHI_ID = 137; /*[10A00]*/ 776 777 /** 778 * @stable ICU 3.4 779 */ 780 public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/ 781 782 /** 783 * @stable ICU 3.4 784 */ 785 public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/ 786 787 /** 788 * @stable ICU 3.4 789 */ 790 public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/ 791 792 /** 793 * @stable ICU 3.4 794 */ 795 public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/ 796 797 /** 798 * @stable ICU 3.4 799 */ 800 public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/ 801 802 /** 803 * @stable ICU 3.4 804 */ 805 public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/ 806 807 /** 808 * @stable ICU 3.4 809 */ 810 public static final int TIFINAGH_ID = 144; /*[2D30]*/ 811 812 /** 813 * @stable ICU 3.4 814 */ 815 public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/ 816 817 /* New blocks in Unicode 5.0 */ 818 819 /** 820 * @stable ICU 3.6 821 */ 822 public static final int NKO_ID = 146; /*[07C0]*/ 823 /** 824 * @stable ICU 3.6 825 */ 826 public static final int BALINESE_ID = 147; /*[1B00]*/ 827 /** 828 * @stable ICU 3.6 829 */ 830 public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/ 831 /** 832 * @stable ICU 3.6 833 */ 834 public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/ 835 /** 836 * @stable ICU 3.6 837 */ 838 public static final int PHAGS_PA_ID = 150; /*[A840]*/ 839 /** 840 * @stable ICU 3.6 841 */ 842 public static final int PHOENICIAN_ID = 151; /*[10900]*/ 843 /** 844 * @stable ICU 3.6 845 */ 846 public static final int CUNEIFORM_ID = 152; /*[12000]*/ 847 /** 848 * @stable ICU 3.6 849 */ 850 public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/ 851 /** 852 * @stable ICU 3.6 853 */ 854 public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/ 855 856 /** 857 * @stable ICU 4.0 858 */ 859 public static final int SUNDANESE_ID = 155; /* [1B80] */ 860 861 /** 862 * @stable ICU 4.0 863 */ 864 public static final int LEPCHA_ID = 156; /* [1C00] */ 865 866 /** 867 * @stable ICU 4.0 868 */ 869 public static final int OL_CHIKI_ID = 157; /* [1C50] */ 870 871 /** 872 * @stable ICU 4.0 873 */ 874 public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */ 875 876 /** 877 * @stable ICU 4.0 878 */ 879 public static final int VAI_ID = 159; /* [A500] */ 880 881 /** 882 * @stable ICU 4.0 883 */ 884 public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */ 885 886 /** 887 * @stable ICU 4.0 888 */ 889 public static final int SAURASHTRA_ID = 161; /* [A880] */ 890 891 /** 892 * @stable ICU 4.0 893 */ 894 public static final int KAYAH_LI_ID = 162; /* [A900] */ 895 896 /** 897 * @stable ICU 4.0 898 */ 899 public static final int REJANG_ID = 163; /* [A930] */ 900 901 /** 902 * @stable ICU 4.0 903 */ 904 public static final int CHAM_ID = 164; /* [AA00] */ 905 906 /** 907 * @stable ICU 4.0 908 */ 909 public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */ 910 911 /** 912 * @stable ICU 4.0 913 */ 914 public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */ 915 916 /** 917 * @stable ICU 4.0 918 */ 919 public static final int LYCIAN_ID = 167; /* [10280] */ 920 921 /** 922 * @stable ICU 4.0 923 */ 924 public static final int CARIAN_ID = 168; /* [102A0] */ 925 926 /** 927 * @stable ICU 4.0 928 */ 929 public static final int LYDIAN_ID = 169; /* [10920] */ 930 931 /** 932 * @stable ICU 4.0 933 */ 934 public static final int MAHJONG_TILES_ID = 170; /* [1F000] */ 935 936 /** 937 * @stable ICU 4.0 938 */ 939 public static final int DOMINO_TILES_ID = 171; /* [1F030] */ 940 941 /* New blocks in Unicode 5.2 */ 942 943 /** @stable ICU 4.4 */ 944 public static final int SAMARITAN_ID = 172; /*[0800]*/ 945 /** @stable ICU 4.4 */ 946 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/ 947 /** @stable ICU 4.4 */ 948 public static final int TAI_THAM_ID = 174; /*[1A20]*/ 949 /** @stable ICU 4.4 */ 950 public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/ 951 /** @stable ICU 4.4 */ 952 public static final int LISU_ID = 176; /*[A4D0]*/ 953 /** @stable ICU 4.4 */ 954 public static final int BAMUM_ID = 177; /*[A6A0]*/ 955 /** @stable ICU 4.4 */ 956 public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/ 957 /** @stable ICU 4.4 */ 958 public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/ 959 /** @stable ICU 4.4 */ 960 public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/ 961 /** @stable ICU 4.4 */ 962 public static final int JAVANESE_ID = 181; /*[A980]*/ 963 /** @stable ICU 4.4 */ 964 public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/ 965 /** @stable ICU 4.4 */ 966 public static final int TAI_VIET_ID = 183; /*[AA80]*/ 967 /** @stable ICU 4.4 */ 968 public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/ 969 /** @stable ICU 4.4 */ 970 public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/ 971 /** @stable ICU 4.4 */ 972 public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/ 973 /** @stable ICU 4.4 */ 974 public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/ 975 /** @stable ICU 4.4 */ 976 public static final int AVESTAN_ID = 188; /*[10B00]*/ 977 /** @stable ICU 4.4 */ 978 public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/ 979 /** @stable ICU 4.4 */ 980 public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/ 981 /** @stable ICU 4.4 */ 982 public static final int OLD_TURKIC_ID = 191; /*[10C00]*/ 983 /** @stable ICU 4.4 */ 984 public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/ 985 /** @stable ICU 4.4 */ 986 public static final int KAITHI_ID = 193; /*[11080]*/ 987 /** @stable ICU 4.4 */ 988 public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/ 989 /** @stable ICU 4.4 */ 990 public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/ 991 /** @stable ICU 4.4 */ 992 public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/ 993 /** @stable ICU 4.4 */ 994 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/ 995 996 /* New blocks in Unicode 6.0 */ 997 998 /** @stable ICU 4.6 */ 999 public static final int MANDAIC_ID = 198; /*[0840]*/ 1000 /** @stable ICU 4.6 */ 1001 public static final int BATAK_ID = 199; /*[1BC0]*/ 1002 /** @stable ICU 4.6 */ 1003 public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/ 1004 /** @stable ICU 4.6 */ 1005 public static final int BRAHMI_ID = 201; /*[11000]*/ 1006 /** @stable ICU 4.6 */ 1007 public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/ 1008 /** @stable ICU 4.6 */ 1009 public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/ 1010 /** @stable ICU 4.6 */ 1011 public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/ 1012 /** @stable ICU 4.6 */ 1013 public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/ 1014 /** @stable ICU 4.6 */ 1015 public static final int EMOTICONS_ID = 206; /*[1F600]*/ 1016 /** @stable ICU 4.6 */ 1017 public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/ 1018 /** @stable ICU 4.6 */ 1019 public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/ 1020 /** @stable ICU 4.6 */ 1021 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/ 1022 1023 /* New blocks in Unicode 6.1 */ 1024 1025 /** @stable ICU 49 */ 1026 public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/ 1027 /** @stable ICU 49 */ 1028 public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/ 1029 /** @stable ICU 49 */ 1030 public static final int CHAKMA_ID = 212; /*[11100]*/ 1031 /** @stable ICU 49 */ 1032 public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/ 1033 /** @stable ICU 49 */ 1034 public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/ 1035 /** @stable ICU 49 */ 1036 public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/ 1037 /** @stable ICU 49 */ 1038 public static final int MIAO_ID = 216; /*[16F00]*/ 1039 /** @stable ICU 49 */ 1040 public static final int SHARADA_ID = 217; /*[11180]*/ 1041 /** @stable ICU 49 */ 1042 public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/ 1043 /** @stable ICU 49 */ 1044 public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/ 1045 /** @stable ICU 49 */ 1046 public static final int TAKRI_ID = 220; /*[11680]*/ 1047 1048 /* New blocks in Unicode 7.0 */ 1049 1050 /** @stable ICU 54 */ 1051 public static final int BASSA_VAH_ID = 221; /*[16AD0]*/ 1052 /** @stable ICU 54 */ 1053 public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/ 1054 /** @stable ICU 54 */ 1055 public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/ 1056 /** @stable ICU 54 */ 1057 public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/ 1058 /** @stable ICU 54 */ 1059 public static final int DUPLOYAN_ID = 225; /*[1BC00]*/ 1060 /** @stable ICU 54 */ 1061 public static final int ELBASAN_ID = 226; /*[10500]*/ 1062 /** @stable ICU 54 */ 1063 public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/ 1064 /** @stable ICU 54 */ 1065 public static final int GRANTHA_ID = 228; /*[11300]*/ 1066 /** @stable ICU 54 */ 1067 public static final int KHOJKI_ID = 229; /*[11200]*/ 1068 /** @stable ICU 54 */ 1069 public static final int KHUDAWADI_ID = 230; /*[112B0]*/ 1070 /** @stable ICU 54 */ 1071 public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/ 1072 /** @stable ICU 54 */ 1073 public static final int LINEAR_A_ID = 232; /*[10600]*/ 1074 /** @stable ICU 54 */ 1075 public static final int MAHAJANI_ID = 233; /*[11150]*/ 1076 /** @stable ICU 54 */ 1077 public static final int MANICHAEAN_ID = 234; /*[10AC0]*/ 1078 /** @stable ICU 54 */ 1079 public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/ 1080 /** @stable ICU 54 */ 1081 public static final int MODI_ID = 236; /*[11600]*/ 1082 /** @stable ICU 54 */ 1083 public static final int MRO_ID = 237; /*[16A40]*/ 1084 /** @stable ICU 54 */ 1085 public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/ 1086 /** @stable ICU 54 */ 1087 public static final int NABATAEAN_ID = 239; /*[10880]*/ 1088 /** @stable ICU 54 */ 1089 public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/ 1090 /** @stable ICU 54 */ 1091 public static final int OLD_PERMIC_ID = 241; /*[10350]*/ 1092 /** @stable ICU 54 */ 1093 public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/ 1094 /** @stable ICU 54 */ 1095 public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/ 1096 /** @stable ICU 54 */ 1097 public static final int PALMYRENE_ID = 244; /*[10860]*/ 1098 /** @stable ICU 54 */ 1099 public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/ 1100 /** @stable ICU 54 */ 1101 public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/ 1102 /** @stable ICU 54 */ 1103 public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/ 1104 /** @stable ICU 54 */ 1105 public static final int SIDDHAM_ID = 248; /*[11580]*/ 1106 /** @stable ICU 54 */ 1107 public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/ 1108 /** @stable ICU 54 */ 1109 public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/ 1110 /** @stable ICU 54 */ 1111 public static final int TIRHUTA_ID = 251; /*[11480]*/ 1112 /** @stable ICU 54 */ 1113 public static final int WARANG_CITI_ID = 252; /*[118A0]*/ 1114 1115 /* New blocks in Unicode 8.0 */ 1116 1117 /** @stable ICU 56 */ 1118 public static final int AHOM_ID = 253; /*[11700]*/ 1119 /** @stable ICU 56 */ 1120 public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/ 1121 /** @stable ICU 56 */ 1122 public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/ 1123 /** @stable ICU 56 */ 1124 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/ 1125 /** @stable ICU 56 */ 1126 public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/ 1127 /** @stable ICU 56 */ 1128 public static final int HATRAN_ID = 258; /*[108E0]*/ 1129 /** @stable ICU 56 */ 1130 public static final int MULTANI_ID = 259; /*[11280]*/ 1131 /** @stable ICU 56 */ 1132 public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/ 1133 /** @stable ICU 56 */ 1134 public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/ 1135 /** @stable ICU 56 */ 1136 public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/ 1137 1138 /* New blocks in Unicode 9.0 */ 1139 1140 /** @stable ICU 58 */ 1141 public static final int ADLAM_ID = 263; /*[1E900]*/ 1142 /** @stable ICU 58 */ 1143 public static final int BHAIKSUKI_ID = 264; /*[11C00]*/ 1144 /** @stable ICU 58 */ 1145 public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/ 1146 /** @stable ICU 58 */ 1147 public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/ 1148 /** @stable ICU 58 */ 1149 public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/ 1150 /** @stable ICU 58 */ 1151 public static final int MARCHEN_ID = 268; /*[11C70]*/ 1152 /** @stable ICU 58 */ 1153 public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/ 1154 /** @stable ICU 58 */ 1155 public static final int NEWA_ID = 270; /*[11400]*/ 1156 /** @stable ICU 58 */ 1157 public static final int OSAGE_ID = 271; /*[104B0]*/ 1158 /** @stable ICU 58 */ 1159 public static final int TANGUT_ID = 272; /*[17000]*/ 1160 /** @stable ICU 58 */ 1161 public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/ 1162 1163 // New blocks in Unicode 10.0 1164 1165 /** @stable ICU 60 */ 1166 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID = 274; /*[2CEB0]*/ 1167 /** @stable ICU 60 */ 1168 public static final int KANA_EXTENDED_A_ID = 275; /*[1B100]*/ 1169 /** @stable ICU 60 */ 1170 public static final int MASARAM_GONDI_ID = 276; /*[11D00]*/ 1171 /** @stable ICU 60 */ 1172 public static final int NUSHU_ID = 277; /*[1B170]*/ 1173 /** @stable ICU 60 */ 1174 public static final int SOYOMBO_ID = 278; /*[11A50]*/ 1175 /** @stable ICU 60 */ 1176 public static final int SYRIAC_SUPPLEMENT_ID = 279; /*[0860]*/ 1177 /** @stable ICU 60 */ 1178 public static final int ZANABAZAR_SQUARE_ID = 280; /*[11A00]*/ 1179 1180 /** 1181 * One more than the highest normal UnicodeBlock value. 1182 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK). 1183 * 1184 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 1185 */ 1186 @Deprecated 1187 public static final int COUNT = 281; 1188 1189 // blocks objects --------------------------------------------------- 1190 1191 /** 1192 * Array of UnicodeBlocks, for easy access in getInstance(int) 1193 */ 1194 private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT]; 1195 1196 /** 1197 * @stable ICU 2.6 1198 */ 1199 public static final UnicodeBlock NO_BLOCK 1200 = new UnicodeBlock("NO_BLOCK", 0); 1201 1202 /** 1203 * @stable ICU 2.4 1204 */ 1205 public static final UnicodeBlock BASIC_LATIN 1206 = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID); 1207 /** 1208 * @stable ICU 2.4 1209 */ 1210 public static final UnicodeBlock LATIN_1_SUPPLEMENT 1211 = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID); 1212 /** 1213 * @stable ICU 2.4 1214 */ 1215 public static final UnicodeBlock LATIN_EXTENDED_A 1216 = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID); 1217 /** 1218 * @stable ICU 2.4 1219 */ 1220 public static final UnicodeBlock LATIN_EXTENDED_B 1221 = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID); 1222 /** 1223 * @stable ICU 2.4 1224 */ 1225 public static final UnicodeBlock IPA_EXTENSIONS 1226 = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID); 1227 /** 1228 * @stable ICU 2.4 1229 */ 1230 public static final UnicodeBlock SPACING_MODIFIER_LETTERS 1231 = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID); 1232 /** 1233 * @stable ICU 2.4 1234 */ 1235 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS 1236 = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID); 1237 /** 1238 * Unicode 3.2 renames this block to "Greek and Coptic". 1239 * @stable ICU 2.4 1240 */ 1241 public static final UnicodeBlock GREEK 1242 = new UnicodeBlock("GREEK", GREEK_ID); 1243 /** 1244 * @stable ICU 2.4 1245 */ 1246 public static final UnicodeBlock CYRILLIC 1247 = new UnicodeBlock("CYRILLIC", CYRILLIC_ID); 1248 /** 1249 * @stable ICU 2.4 1250 */ 1251 public static final UnicodeBlock ARMENIAN 1252 = new UnicodeBlock("ARMENIAN", ARMENIAN_ID); 1253 /** 1254 * @stable ICU 2.4 1255 */ 1256 public static final UnicodeBlock HEBREW 1257 = new UnicodeBlock("HEBREW", HEBREW_ID); 1258 /** 1259 * @stable ICU 2.4 1260 */ 1261 public static final UnicodeBlock ARABIC 1262 = new UnicodeBlock("ARABIC", ARABIC_ID); 1263 /** 1264 * @stable ICU 2.4 1265 */ 1266 public static final UnicodeBlock SYRIAC 1267 = new UnicodeBlock("SYRIAC", SYRIAC_ID); 1268 /** 1269 * @stable ICU 2.4 1270 */ 1271 public static final UnicodeBlock THAANA 1272 = new UnicodeBlock("THAANA", THAANA_ID); 1273 /** 1274 * @stable ICU 2.4 1275 */ 1276 public static final UnicodeBlock DEVANAGARI 1277 = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID); 1278 /** 1279 * @stable ICU 2.4 1280 */ 1281 public static final UnicodeBlock BENGALI 1282 = new UnicodeBlock("BENGALI", BENGALI_ID); 1283 /** 1284 * @stable ICU 2.4 1285 */ 1286 public static final UnicodeBlock GURMUKHI 1287 = new UnicodeBlock("GURMUKHI", GURMUKHI_ID); 1288 /** 1289 * @stable ICU 2.4 1290 */ 1291 public static final UnicodeBlock GUJARATI 1292 = new UnicodeBlock("GUJARATI", GUJARATI_ID); 1293 /** 1294 * @stable ICU 2.4 1295 */ 1296 public static final UnicodeBlock ORIYA 1297 = new UnicodeBlock("ORIYA", ORIYA_ID); 1298 /** 1299 * @stable ICU 2.4 1300 */ 1301 public static final UnicodeBlock TAMIL 1302 = new UnicodeBlock("TAMIL", TAMIL_ID); 1303 /** 1304 * @stable ICU 2.4 1305 */ 1306 public static final UnicodeBlock TELUGU 1307 = new UnicodeBlock("TELUGU", TELUGU_ID); 1308 /** 1309 * @stable ICU 2.4 1310 */ 1311 public static final UnicodeBlock KANNADA 1312 = new UnicodeBlock("KANNADA", KANNADA_ID); 1313 /** 1314 * @stable ICU 2.4 1315 */ 1316 public static final UnicodeBlock MALAYALAM 1317 = new UnicodeBlock("MALAYALAM", MALAYALAM_ID); 1318 /** 1319 * @stable ICU 2.4 1320 */ 1321 public static final UnicodeBlock SINHALA 1322 = new UnicodeBlock("SINHALA", SINHALA_ID); 1323 /** 1324 * @stable ICU 2.4 1325 */ 1326 public static final UnicodeBlock THAI 1327 = new UnicodeBlock("THAI", THAI_ID); 1328 /** 1329 * @stable ICU 2.4 1330 */ 1331 public static final UnicodeBlock LAO 1332 = new UnicodeBlock("LAO", LAO_ID); 1333 /** 1334 * @stable ICU 2.4 1335 */ 1336 public static final UnicodeBlock TIBETAN 1337 = new UnicodeBlock("TIBETAN", TIBETAN_ID); 1338 /** 1339 * @stable ICU 2.4 1340 */ 1341 public static final UnicodeBlock MYANMAR 1342 = new UnicodeBlock("MYANMAR", MYANMAR_ID); 1343 /** 1344 * @stable ICU 2.4 1345 */ 1346 public static final UnicodeBlock GEORGIAN 1347 = new UnicodeBlock("GEORGIAN", GEORGIAN_ID); 1348 /** 1349 * @stable ICU 2.4 1350 */ 1351 public static final UnicodeBlock HANGUL_JAMO 1352 = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID); 1353 /** 1354 * @stable ICU 2.4 1355 */ 1356 public static final UnicodeBlock ETHIOPIC 1357 = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID); 1358 /** 1359 * @stable ICU 2.4 1360 */ 1361 public static final UnicodeBlock CHEROKEE 1362 = new UnicodeBlock("CHEROKEE", CHEROKEE_ID); 1363 /** 1364 * @stable ICU 2.4 1365 */ 1366 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 1367 = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1368 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID); 1369 /** 1370 * @stable ICU 2.4 1371 */ 1372 public static final UnicodeBlock OGHAM 1373 = new UnicodeBlock("OGHAM", OGHAM_ID); 1374 /** 1375 * @stable ICU 2.4 1376 */ 1377 public static final UnicodeBlock RUNIC 1378 = new UnicodeBlock("RUNIC", RUNIC_ID); 1379 /** 1380 * @stable ICU 2.4 1381 */ 1382 public static final UnicodeBlock KHMER 1383 = new UnicodeBlock("KHMER", KHMER_ID); 1384 /** 1385 * @stable ICU 2.4 1386 */ 1387 public static final UnicodeBlock MONGOLIAN 1388 = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID); 1389 /** 1390 * @stable ICU 2.4 1391 */ 1392 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL 1393 = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID); 1394 /** 1395 * @stable ICU 2.4 1396 */ 1397 public static final UnicodeBlock GREEK_EXTENDED 1398 = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID); 1399 /** 1400 * @stable ICU 2.4 1401 */ 1402 public static final UnicodeBlock GENERAL_PUNCTUATION 1403 = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID); 1404 /** 1405 * @stable ICU 2.4 1406 */ 1407 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 1408 = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID); 1409 /** 1410 * @stable ICU 2.4 1411 */ 1412 public static final UnicodeBlock CURRENCY_SYMBOLS 1413 = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID); 1414 /** 1415 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 1416 * Symbols". 1417 * @stable ICU 2.4 1418 */ 1419 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 1420 = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID); 1421 /** 1422 * @stable ICU 2.4 1423 */ 1424 public static final UnicodeBlock LETTERLIKE_SYMBOLS 1425 = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID); 1426 /** 1427 * @stable ICU 2.4 1428 */ 1429 public static final UnicodeBlock NUMBER_FORMS 1430 = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID); 1431 /** 1432 * @stable ICU 2.4 1433 */ 1434 public static final UnicodeBlock ARROWS 1435 = new UnicodeBlock("ARROWS", ARROWS_ID); 1436 /** 1437 * @stable ICU 2.4 1438 */ 1439 public static final UnicodeBlock MATHEMATICAL_OPERATORS 1440 = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID); 1441 /** 1442 * @stable ICU 2.4 1443 */ 1444 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL 1445 = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID); 1446 /** 1447 * @stable ICU 2.4 1448 */ 1449 public static final UnicodeBlock CONTROL_PICTURES 1450 = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID); 1451 /** 1452 * @stable ICU 2.4 1453 */ 1454 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 1455 = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID); 1456 /** 1457 * @stable ICU 2.4 1458 */ 1459 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS 1460 = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID); 1461 /** 1462 * @stable ICU 2.4 1463 */ 1464 public static final UnicodeBlock BOX_DRAWING 1465 = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID); 1466 /** 1467 * @stable ICU 2.4 1468 */ 1469 public static final UnicodeBlock BLOCK_ELEMENTS 1470 = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID); 1471 /** 1472 * @stable ICU 2.4 1473 */ 1474 public static final UnicodeBlock GEOMETRIC_SHAPES 1475 = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID); 1476 /** 1477 * @stable ICU 2.4 1478 */ 1479 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS 1480 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID); 1481 /** 1482 * @stable ICU 2.4 1483 */ 1484 public static final UnicodeBlock DINGBATS 1485 = new UnicodeBlock("DINGBATS", DINGBATS_ID); 1486 /** 1487 * @stable ICU 2.4 1488 */ 1489 public static final UnicodeBlock BRAILLE_PATTERNS 1490 = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID); 1491 /** 1492 * @stable ICU 2.4 1493 */ 1494 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT 1495 = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID); 1496 /** 1497 * @stable ICU 2.4 1498 */ 1499 public static final UnicodeBlock KANGXI_RADICALS 1500 = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID); 1501 /** 1502 * @stable ICU 2.4 1503 */ 1504 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 1505 = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1506 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID); 1507 /** 1508 * @stable ICU 2.4 1509 */ 1510 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 1511 = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID); 1512 /** 1513 * @stable ICU 2.4 1514 */ 1515 public static final UnicodeBlock HIRAGANA 1516 = new UnicodeBlock("HIRAGANA", HIRAGANA_ID); 1517 /** 1518 * @stable ICU 2.4 1519 */ 1520 public static final UnicodeBlock KATAKANA 1521 = new UnicodeBlock("KATAKANA", KATAKANA_ID); 1522 /** 1523 * @stable ICU 2.4 1524 */ 1525 public static final UnicodeBlock BOPOMOFO 1526 = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID); 1527 /** 1528 * @stable ICU 2.4 1529 */ 1530 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO 1531 = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID); 1532 /** 1533 * @stable ICU 2.4 1534 */ 1535 public static final UnicodeBlock KANBUN 1536 = new UnicodeBlock("KANBUN", KANBUN_ID); 1537 /** 1538 * @stable ICU 2.4 1539 */ 1540 public static final UnicodeBlock BOPOMOFO_EXTENDED 1541 = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID); 1542 /** 1543 * @stable ICU 2.4 1544 */ 1545 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 1546 = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1547 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID); 1548 /** 1549 * @stable ICU 2.4 1550 */ 1551 public static final UnicodeBlock CJK_COMPATIBILITY 1552 = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID); 1553 /** 1554 * @stable ICU 2.4 1555 */ 1556 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 1557 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1558 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID); 1559 /** 1560 * @stable ICU 2.4 1561 */ 1562 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 1563 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID); 1564 /** 1565 * @stable ICU 2.4 1566 */ 1567 public static final UnicodeBlock YI_SYLLABLES 1568 = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID); 1569 /** 1570 * @stable ICU 2.4 1571 */ 1572 public static final UnicodeBlock YI_RADICALS 1573 = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID); 1574 /** 1575 * @stable ICU 2.4 1576 */ 1577 public static final UnicodeBlock HANGUL_SYLLABLES 1578 = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID); 1579 /** 1580 * @stable ICU 2.4 1581 */ 1582 public static final UnicodeBlock HIGH_SURROGATES 1583 = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID); 1584 /** 1585 * @stable ICU 2.4 1586 */ 1587 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES 1588 = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID); 1589 /** 1590 * @stable ICU 2.4 1591 */ 1592 public static final UnicodeBlock LOW_SURROGATES 1593 = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID); 1594 /** 1595 * Same as public static final int PRIVATE_USE. 1596 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1597 * and multiple code point ranges had this block. 1598 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1599 * and adds separate blocks for the supplementary PUAs. 1600 * @stable ICU 2.4 1601 */ 1602 public static final UnicodeBlock PRIVATE_USE_AREA 1603 = new UnicodeBlock("PRIVATE_USE_AREA", 78); 1604 /** 1605 * Same as public static final int PRIVATE_USE_AREA. 1606 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1607 * and multiple code point ranges had this block. 1608 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1609 * and adds separate blocks for the supplementary PUAs. 1610 * @stable ICU 2.4 1611 */ 1612 public static final UnicodeBlock PRIVATE_USE 1613 = PRIVATE_USE_AREA; 1614 /** 1615 * @stable ICU 2.4 1616 */ 1617 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 1618 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID); 1619 /** 1620 * @stable ICU 2.4 1621 */ 1622 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 1623 = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID); 1624 /** 1625 * @stable ICU 2.4 1626 */ 1627 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A 1628 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID); 1629 /** 1630 * @stable ICU 2.4 1631 */ 1632 public static final UnicodeBlock COMBINING_HALF_MARKS 1633 = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID); 1634 /** 1635 * @stable ICU 2.4 1636 */ 1637 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS 1638 = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID); 1639 /** 1640 * @stable ICU 2.4 1641 */ 1642 public static final UnicodeBlock SMALL_FORM_VARIANTS 1643 = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID); 1644 /** 1645 * @stable ICU 2.4 1646 */ 1647 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B 1648 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID); 1649 /** 1650 * @stable ICU 2.4 1651 */ 1652 public static final UnicodeBlock SPECIALS 1653 = new UnicodeBlock("SPECIALS", SPECIALS_ID); 1654 /** 1655 * @stable ICU 2.4 1656 */ 1657 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 1658 = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID); 1659 /** 1660 * @stable ICU 2.4 1661 */ 1662 public static final UnicodeBlock OLD_ITALIC 1663 = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID); 1664 /** 1665 * @stable ICU 2.4 1666 */ 1667 public static final UnicodeBlock GOTHIC 1668 = new UnicodeBlock("GOTHIC", GOTHIC_ID); 1669 /** 1670 * @stable ICU 2.4 1671 */ 1672 public static final UnicodeBlock DESERET 1673 = new UnicodeBlock("DESERET", DESERET_ID); 1674 /** 1675 * @stable ICU 2.4 1676 */ 1677 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS 1678 = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID); 1679 /** 1680 * @stable ICU 2.4 1681 */ 1682 public static final UnicodeBlock MUSICAL_SYMBOLS 1683 = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID); 1684 /** 1685 * @stable ICU 2.4 1686 */ 1687 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS 1688 = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1689 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID); 1690 /** 1691 * @stable ICU 2.4 1692 */ 1693 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 1694 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1695 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID); 1696 /** 1697 * @stable ICU 2.4 1698 */ 1699 public static final UnicodeBlock 1700 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT 1701 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1702 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID); 1703 /** 1704 * @stable ICU 2.4 1705 */ 1706 public static final UnicodeBlock TAGS 1707 = new UnicodeBlock("TAGS", TAGS_ID); 1708 1709 // New blocks in Unicode 3.2 1710 1711 /** 1712 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1713 * @stable ICU 2.4 1714 */ 1715 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY 1716 = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID); 1717 /** 1718 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1719 * @stable ICU 3.0 1720 */ 1721 public static final UnicodeBlock CYRILLIC_SUPPLEMENT 1722 = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID); 1723 /** 1724 * @stable ICU 2.4 1725 */ 1726 public static final UnicodeBlock TAGALOG 1727 = new UnicodeBlock("TAGALOG", TAGALOG_ID); 1728 /** 1729 * @stable ICU 2.4 1730 */ 1731 public static final UnicodeBlock HANUNOO 1732 = new UnicodeBlock("HANUNOO", HANUNOO_ID); 1733 /** 1734 * @stable ICU 2.4 1735 */ 1736 public static final UnicodeBlock BUHID 1737 = new UnicodeBlock("BUHID", BUHID_ID); 1738 /** 1739 * @stable ICU 2.4 1740 */ 1741 public static final UnicodeBlock TAGBANWA 1742 = new UnicodeBlock("TAGBANWA", TAGBANWA_ID); 1743 /** 1744 * @stable ICU 2.4 1745 */ 1746 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A 1747 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1748 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID); 1749 /** 1750 * @stable ICU 2.4 1751 */ 1752 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A 1753 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID); 1754 /** 1755 * @stable ICU 2.4 1756 */ 1757 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B 1758 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID); 1759 /** 1760 * @stable ICU 2.4 1761 */ 1762 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B 1763 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1764 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID); 1765 /** 1766 * @stable ICU 2.4 1767 */ 1768 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS 1769 = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1770 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID); 1771 /** 1772 * @stable ICU 2.4 1773 */ 1774 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS 1775 = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID); 1776 /** 1777 * @stable ICU 2.4 1778 */ 1779 public static final UnicodeBlock VARIATION_SELECTORS 1780 = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID); 1781 /** 1782 * @stable ICU 2.4 1783 */ 1784 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A 1785 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1786 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID); 1787 /** 1788 * @stable ICU 2.4 1789 */ 1790 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B 1791 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1792 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID); 1793 1794 /** 1795 * @stable ICU 2.6 1796 */ 1797 public static final UnicodeBlock LIMBU 1798 = new UnicodeBlock("LIMBU", LIMBU_ID); 1799 /** 1800 * @stable ICU 2.6 1801 */ 1802 public static final UnicodeBlock TAI_LE 1803 = new UnicodeBlock("TAI_LE", TAI_LE_ID); 1804 /** 1805 * @stable ICU 2.6 1806 */ 1807 public static final UnicodeBlock KHMER_SYMBOLS 1808 = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID); 1809 1810 /** 1811 * @stable ICU 2.6 1812 */ 1813 public static final UnicodeBlock PHONETIC_EXTENSIONS 1814 = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID); 1815 1816 /** 1817 * @stable ICU 2.6 1818 */ 1819 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS 1820 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1821 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID); 1822 /** 1823 * @stable ICU 2.6 1824 */ 1825 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS 1826 = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID); 1827 /** 1828 * @stable ICU 2.6 1829 */ 1830 public static final UnicodeBlock LINEAR_B_SYLLABARY 1831 = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID); 1832 /** 1833 * @stable ICU 2.6 1834 */ 1835 public static final UnicodeBlock LINEAR_B_IDEOGRAMS 1836 = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID); 1837 /** 1838 * @stable ICU 2.6 1839 */ 1840 public static final UnicodeBlock AEGEAN_NUMBERS 1841 = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID); 1842 /** 1843 * @stable ICU 2.6 1844 */ 1845 public static final UnicodeBlock UGARITIC 1846 = new UnicodeBlock("UGARITIC", UGARITIC_ID); 1847 /** 1848 * @stable ICU 2.6 1849 */ 1850 public static final UnicodeBlock SHAVIAN 1851 = new UnicodeBlock("SHAVIAN", SHAVIAN_ID); 1852 /** 1853 * @stable ICU 2.6 1854 */ 1855 public static final UnicodeBlock OSMANYA 1856 = new UnicodeBlock("OSMANYA", OSMANYA_ID); 1857 /** 1858 * @stable ICU 2.6 1859 */ 1860 public static final UnicodeBlock CYPRIOT_SYLLABARY 1861 = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID); 1862 /** 1863 * @stable ICU 2.6 1864 */ 1865 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS 1866 = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID); 1867 1868 /** 1869 * @stable ICU 2.6 1870 */ 1871 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT 1872 = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID); 1873 1874 /* New blocks in Unicode 4.1 */ 1875 1876 /** 1877 * @stable ICU 3.4 1878 */ 1879 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 1880 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 1881 ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/ 1882 1883 /** 1884 * @stable ICU 3.4 1885 */ 1886 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 1887 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/ 1888 1889 /** 1890 * @stable ICU 3.4 1891 */ 1892 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1893 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/ 1894 1895 /** 1896 * @stable ICU 3.4 1897 */ 1898 public static final UnicodeBlock BUGINESE = 1899 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/ 1900 1901 /** 1902 * @stable ICU 3.4 1903 */ 1904 public static final UnicodeBlock CJK_STROKES = 1905 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/ 1906 1907 /** 1908 * @stable ICU 3.4 1909 */ 1910 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1911 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1912 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/ 1913 1914 /** 1915 * @stable ICU 3.4 1916 */ 1917 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/ 1918 1919 /** 1920 * @stable ICU 3.4 1921 */ 1922 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1923 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/ 1924 1925 /** 1926 * @stable ICU 3.4 1927 */ 1928 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1929 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/ 1930 1931 /** 1932 * @stable ICU 3.4 1933 */ 1934 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1935 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/ 1936 1937 /** 1938 * @stable ICU 3.4 1939 */ 1940 public static final UnicodeBlock GLAGOLITIC = 1941 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/ 1942 1943 /** 1944 * @stable ICU 3.4 1945 */ 1946 public static final UnicodeBlock KHAROSHTHI = 1947 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/ 1948 1949 /** 1950 * @stable ICU 3.4 1951 */ 1952 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 1953 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/ 1954 1955 /** 1956 * @stable ICU 3.4 1957 */ 1958 public static final UnicodeBlock NEW_TAI_LUE = 1959 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/ 1960 1961 /** 1962 * @stable ICU 3.4 1963 */ 1964 public static final UnicodeBlock OLD_PERSIAN = 1965 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/ 1966 1967 /** 1968 * @stable ICU 3.4 1969 */ 1970 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1971 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1972 PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/ 1973 1974 /** 1975 * @stable ICU 3.4 1976 */ 1977 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1978 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/ 1979 1980 /** 1981 * @stable ICU 3.4 1982 */ 1983 public static final UnicodeBlock SYLOTI_NAGRI = 1984 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/ 1985 1986 /** 1987 * @stable ICU 3.4 1988 */ 1989 public static final UnicodeBlock TIFINAGH = 1990 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/ 1991 1992 /** 1993 * @stable ICU 3.4 1994 */ 1995 public static final UnicodeBlock VERTICAL_FORMS = 1996 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/ 1997 1998 /** 1999 * @stable ICU 3.6 2000 */ 2001 public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/ 2002 /** 2003 * @stable ICU 3.6 2004 */ 2005 public static final UnicodeBlock BALINESE = 2006 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/ 2007 /** 2008 * @stable ICU 3.6 2009 */ 2010 public static final UnicodeBlock LATIN_EXTENDED_C = 2011 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/ 2012 /** 2013 * @stable ICU 3.6 2014 */ 2015 public static final UnicodeBlock LATIN_EXTENDED_D = 2016 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/ 2017 /** 2018 * @stable ICU 3.6 2019 */ 2020 public static final UnicodeBlock PHAGS_PA = 2021 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/ 2022 /** 2023 * @stable ICU 3.6 2024 */ 2025 public static final UnicodeBlock PHOENICIAN = 2026 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/ 2027 /** 2028 * @stable ICU 3.6 2029 */ 2030 public static final UnicodeBlock CUNEIFORM = 2031 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/ 2032 /** 2033 * @stable ICU 3.6 2034 */ 2035 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2036 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2037 CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/ 2038 /** 2039 * @stable ICU 3.6 2040 */ 2041 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2042 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/ 2043 2044 /** 2045 * @stable ICU 4.0 2046 */ 2047 public static final UnicodeBlock SUNDANESE = 2048 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */ 2049 2050 /** 2051 * @stable ICU 4.0 2052 */ 2053 public static final UnicodeBlock LEPCHA = 2054 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */ 2055 2056 /** 2057 * @stable ICU 4.0 2058 */ 2059 public static final UnicodeBlock OL_CHIKI = 2060 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */ 2061 2062 /** 2063 * @stable ICU 4.0 2064 */ 2065 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2066 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */ 2067 2068 /** 2069 * @stable ICU 4.0 2070 */ 2071 public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */ 2072 2073 /** 2074 * @stable ICU 4.0 2075 */ 2076 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2077 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */ 2078 2079 /** 2080 * @stable ICU 4.0 2081 */ 2082 public static final UnicodeBlock SAURASHTRA = 2083 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */ 2084 2085 /** 2086 * @stable ICU 4.0 2087 */ 2088 public static final UnicodeBlock KAYAH_LI = 2089 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */ 2090 2091 /** 2092 * @stable ICU 4.0 2093 */ 2094 public static final UnicodeBlock REJANG = 2095 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */ 2096 2097 /** 2098 * @stable ICU 4.0 2099 */ 2100 public static final UnicodeBlock CHAM = 2101 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */ 2102 2103 /** 2104 * @stable ICU 4.0 2105 */ 2106 public static final UnicodeBlock ANCIENT_SYMBOLS = 2107 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */ 2108 2109 /** 2110 * @stable ICU 4.0 2111 */ 2112 public static final UnicodeBlock PHAISTOS_DISC = 2113 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */ 2114 2115 /** 2116 * @stable ICU 4.0 2117 */ 2118 public static final UnicodeBlock LYCIAN = 2119 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */ 2120 2121 /** 2122 * @stable ICU 4.0 2123 */ 2124 public static final UnicodeBlock CARIAN = 2125 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */ 2126 2127 /** 2128 * @stable ICU 4.0 2129 */ 2130 public static final UnicodeBlock LYDIAN = 2131 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */ 2132 2133 /** 2134 * @stable ICU 4.0 2135 */ 2136 public static final UnicodeBlock MAHJONG_TILES = 2137 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */ 2138 2139 /** 2140 * @stable ICU 4.0 2141 */ 2142 public static final UnicodeBlock DOMINO_TILES = 2143 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */ 2144 2145 /* New blocks in Unicode 5.2 */ 2146 2147 /** @stable ICU 4.4 */ 2148 public static final UnicodeBlock SAMARITAN = 2149 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/ 2150 /** @stable ICU 4.4 */ 2151 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 2152 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 2153 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/ 2154 /** @stable ICU 4.4 */ 2155 public static final UnicodeBlock TAI_THAM = 2156 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/ 2157 /** @stable ICU 4.4 */ 2158 public static final UnicodeBlock VEDIC_EXTENSIONS = 2159 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/ 2160 /** @stable ICU 4.4 */ 2161 public static final UnicodeBlock LISU = 2162 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/ 2163 /** @stable ICU 4.4 */ 2164 public static final UnicodeBlock BAMUM = 2165 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/ 2166 /** @stable ICU 4.4 */ 2167 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2168 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/ 2169 /** @stable ICU 4.4 */ 2170 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2171 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/ 2172 /** @stable ICU 4.4 */ 2173 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2174 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/ 2175 /** @stable ICU 4.4 */ 2176 public static final UnicodeBlock JAVANESE = 2177 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/ 2178 /** @stable ICU 4.4 */ 2179 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2180 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/ 2181 /** @stable ICU 4.4 */ 2182 public static final UnicodeBlock TAI_VIET = 2183 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/ 2184 /** @stable ICU 4.4 */ 2185 public static final UnicodeBlock MEETEI_MAYEK = 2186 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/ 2187 /** @stable ICU 4.4 */ 2188 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2189 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/ 2190 /** @stable ICU 4.4 */ 2191 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2192 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/ 2193 /** @stable ICU 4.4 */ 2194 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2195 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/ 2196 /** @stable ICU 4.4 */ 2197 public static final UnicodeBlock AVESTAN = 2198 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/ 2199 /** @stable ICU 4.4 */ 2200 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2201 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/ 2202 /** @stable ICU 4.4 */ 2203 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2204 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/ 2205 /** @stable ICU 4.4 */ 2206 public static final UnicodeBlock OLD_TURKIC = 2207 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/ 2208 /** @stable ICU 4.4 */ 2209 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2210 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/ 2211 /** @stable ICU 4.4 */ 2212 public static final UnicodeBlock KAITHI = 2213 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/ 2214 /** @stable ICU 4.4 */ 2215 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2216 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/ 2217 /** @stable ICU 4.4 */ 2218 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2219 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2220 ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/ 2221 /** @stable ICU 4.4 */ 2222 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2223 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2224 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/ 2225 /** @stable ICU 4.4 */ 2226 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2227 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2228 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/ 2229 2230 /* New blocks in Unicode 6.0 */ 2231 2232 /** @stable ICU 4.6 */ 2233 public static final UnicodeBlock MANDAIC = 2234 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/ 2235 /** @stable ICU 4.6 */ 2236 public static final UnicodeBlock BATAK = 2237 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/ 2238 /** @stable ICU 4.6 */ 2239 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2240 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/ 2241 /** @stable ICU 4.6 */ 2242 public static final UnicodeBlock BRAHMI = 2243 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/ 2244 /** @stable ICU 4.6 */ 2245 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2246 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/ 2247 /** @stable ICU 4.6 */ 2248 public static final UnicodeBlock KANA_SUPPLEMENT = 2249 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/ 2250 /** @stable ICU 4.6 */ 2251 public static final UnicodeBlock PLAYING_CARDS = 2252 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/ 2253 /** @stable ICU 4.6 */ 2254 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2255 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2256 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/ 2257 /** @stable ICU 4.6 */ 2258 public static final UnicodeBlock EMOTICONS = 2259 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/ 2260 /** @stable ICU 4.6 */ 2261 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2262 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/ 2263 /** @stable ICU 4.6 */ 2264 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2265 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/ 2266 /** @stable ICU 4.6 */ 2267 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2268 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2269 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/ 2270 2271 /* New blocks in Unicode 6.1 */ 2272 2273 /** @stable ICU 49 */ 2274 public static final UnicodeBlock ARABIC_EXTENDED_A = 2275 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/ 2276 /** @stable ICU 49 */ 2277 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2278 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/ 2279 /** @stable ICU 49 */ 2280 public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/ 2281 /** @stable ICU 49 */ 2282 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2283 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/ 2284 /** @stable ICU 49 */ 2285 public static final UnicodeBlock MEROITIC_CURSIVE = 2286 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/ 2287 /** @stable ICU 49 */ 2288 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2289 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/ 2290 /** @stable ICU 49 */ 2291 public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/ 2292 /** @stable ICU 49 */ 2293 public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/ 2294 /** @stable ICU 49 */ 2295 public static final UnicodeBlock SORA_SOMPENG = 2296 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/ 2297 /** @stable ICU 49 */ 2298 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2299 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/ 2300 /** @stable ICU 49 */ 2301 public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/ 2302 2303 /* New blocks in Unicode 7.0 */ 2304 2305 /** @stable ICU 54 */ 2306 public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/ 2307 /** @stable ICU 54 */ 2308 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2309 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/ 2310 /** @stable ICU 54 */ 2311 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2312 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/ 2313 /** @stable ICU 54 */ 2314 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2315 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/ 2316 /** @stable ICU 54 */ 2317 public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/ 2318 /** @stable ICU 54 */ 2319 public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/ 2320 /** @stable ICU 54 */ 2321 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2322 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/ 2323 /** @stable ICU 54 */ 2324 public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/ 2325 /** @stable ICU 54 */ 2326 public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/ 2327 /** @stable ICU 54 */ 2328 public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/ 2329 /** @stable ICU 54 */ 2330 public static final UnicodeBlock LATIN_EXTENDED_E = 2331 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/ 2332 /** @stable ICU 54 */ 2333 public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/ 2334 /** @stable ICU 54 */ 2335 public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/ 2336 /** @stable ICU 54 */ 2337 public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/ 2338 /** @stable ICU 54 */ 2339 public static final UnicodeBlock MENDE_KIKAKUI = 2340 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/ 2341 /** @stable ICU 54 */ 2342 public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/ 2343 /** @stable ICU 54 */ 2344 public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/ 2345 /** @stable ICU 54 */ 2346 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2347 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/ 2348 /** @stable ICU 54 */ 2349 public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/ 2350 /** @stable ICU 54 */ 2351 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2352 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/ 2353 /** @stable ICU 54 */ 2354 public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/ 2355 /** @stable ICU 54 */ 2356 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2357 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/ 2358 /** @stable ICU 54 */ 2359 public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/ 2360 /** @stable ICU 54 */ 2361 public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/ 2362 /** @stable ICU 54 */ 2363 public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/ 2364 /** @stable ICU 54 */ 2365 public static final UnicodeBlock PSALTER_PAHLAVI = 2366 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/ 2367 /** @stable ICU 54 */ 2368 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2369 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/ 2370 /** @stable ICU 54 */ 2371 public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/ 2372 /** @stable ICU 54 */ 2373 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2374 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/ 2375 /** @stable ICU 54 */ 2376 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2377 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/ 2378 /** @stable ICU 54 */ 2379 public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/ 2380 /** @stable ICU 54 */ 2381 public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/ 2382 2383 /* New blocks in Unicode 8.0 */ 2384 2385 /** @stable ICU 56 */ 2386 public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/ 2387 /** @stable ICU 56 */ 2388 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2389 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/ 2390 /** @stable ICU 56 */ 2391 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2392 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/ 2393 /** @stable ICU 56 */ 2394 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 2395 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 2396 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/ 2397 /** @stable ICU 56 */ 2398 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2399 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/ 2400 /** @stable ICU 56 */ 2401 public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/ 2402 /** @stable ICU 56 */ 2403 public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/ 2404 /** @stable ICU 56 */ 2405 public static final UnicodeBlock OLD_HUNGARIAN = 2406 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/ 2407 /** @stable ICU 56 */ 2408 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2409 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2410 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/ 2411 /** @stable ICU 56 */ 2412 public static final UnicodeBlock SUTTON_SIGNWRITING = 2413 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/ 2414 2415 /* New blocks in Unicode 9.0 */ 2416 2417 /** @stable ICU 58 */ 2418 public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/ 2419 /** @stable ICU 58 */ 2420 public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/ 2421 /** @stable ICU 58 */ 2422 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 2423 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/ 2424 /** @stable ICU 58 */ 2425 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 2426 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/ 2427 /** @stable ICU 58 */ 2428 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 2429 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/ 2430 /** @stable ICU 58 */ 2431 public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/ 2432 /** @stable ICU 58 */ 2433 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 2434 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/ 2435 /** @stable ICU 58 */ 2436 public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/ 2437 /** @stable ICU 58 */ 2438 public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/ 2439 /** @stable ICU 58 */ 2440 public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/ 2441 /** @stable ICU 58 */ 2442 public static final UnicodeBlock TANGUT_COMPONENTS = 2443 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/ 2444 2445 // New blocks in Unicode 10.0 2446 2447 /** @stable ICU 60 */ 2448 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 2449 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID); /*[2CEB0]*/ 2450 /** @stable ICU 60 */ 2451 public static final UnicodeBlock KANA_EXTENDED_A = 2452 new UnicodeBlock("KANA_EXTENDED_A", KANA_EXTENDED_A_ID); /*[1B100]*/ 2453 /** @stable ICU 60 */ 2454 public static final UnicodeBlock MASARAM_GONDI = 2455 new UnicodeBlock("MASARAM_GONDI", MASARAM_GONDI_ID); /*[11D00]*/ 2456 /** @stable ICU 60 */ 2457 public static final UnicodeBlock NUSHU = new UnicodeBlock("NUSHU", NUSHU_ID); /*[1B170]*/ 2458 /** @stable ICU 60 */ 2459 public static final UnicodeBlock SOYOMBO = new UnicodeBlock("SOYOMBO", SOYOMBO_ID); /*[11A50]*/ 2460 /** @stable ICU 60 */ 2461 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 2462 new UnicodeBlock("SYRIAC_SUPPLEMENT", SYRIAC_SUPPLEMENT_ID); /*[0860]*/ 2463 /** @stable ICU 60 */ 2464 public static final UnicodeBlock ZANABAZAR_SQUARE = 2465 new UnicodeBlock("ZANABAZAR_SQUARE", ZANABAZAR_SQUARE_ID); /*[11A00]*/ 2466 2467 /** 2468 * @stable ICU 2.4 2469 */ 2470 public static final UnicodeBlock INVALID_CODE 2471 = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID); 2472 2473 static { 2474 for (int blockId = 0; blockId < COUNT; ++blockId) { 2475 if (BLOCKS_[blockId] == null) { 2476 throw new java.lang.IllegalStateException( 2477 "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized"); 2478 } 2479 } 2480 } 2481 2482 // public methods -------------------------------------------------- 2483 2484 /** 2485 * {@icu} Returns the only instance of the UnicodeBlock with the argument ID. 2486 * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned. 2487 * @param id UnicodeBlock ID 2488 * @return the only instance of the UnicodeBlock with the argument ID 2489 * if it exists, otherwise a INVALID_CODE UnicodeBlock will be 2490 * returned. 2491 * @stable ICU 2.4 2492 */ 2493 public static UnicodeBlock getInstance(int id) 2494 { 2495 if (id >= 0 && id < BLOCKS_.length) { 2496 return BLOCKS_[id]; 2497 } 2498 return INVALID_CODE; 2499 } 2500 2501 /** 2502 * Returns the Unicode allocation block that contains the code point, 2503 * or null if the code point is not a member of a defined block. 2504 * @param ch code point to be tested 2505 * @return the Unicode allocation block that contains the code point 2506 * @stable ICU 2.4 2507 */ 2508 public static UnicodeBlock of(int ch) 2509 { 2510 if (ch > MAX_VALUE) { 2511 return INVALID_CODE; 2512 } 2513 2514 return UnicodeBlock.getInstance( 2515 UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK)); 2516 } 2517 2518 /** 2519 * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method. 2520 * Returns the Unicode block with the given name. {@icunote} Unlike 2521 * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches 2522 * against the official UCD name and the Java block name 2523 * (ignoring case). 2524 * @param blockName the name of the block to match 2525 * @return the UnicodeBlock with that name 2526 * @throws IllegalArgumentException if the blockName could not be matched 2527 * @stable ICU 3.0 2528 */ 2529 public static final UnicodeBlock forName(String blockName) { 2530 Map<String, UnicodeBlock> m = null; 2531 if (mref != null) { 2532 m = mref.get(); 2533 } 2534 if (m == null) { 2535 m = new HashMap<String, UnicodeBlock>(BLOCKS_.length); 2536 for (int i = 0; i < BLOCKS_.length; ++i) { 2537 UnicodeBlock b = BLOCKS_[i]; 2538 String name = trimBlockName( 2539 getPropertyValueName(UProperty.BLOCK, b.getID(), 2540 UProperty.NameChoice.LONG)); 2541 m.put(name, b); 2542 } 2543 mref = new SoftReference<Map<String, UnicodeBlock>>(m); 2544 } 2545 UnicodeBlock b = m.get(trimBlockName(blockName)); 2546 if (b == null) { 2547 throw new IllegalArgumentException(); 2548 } 2549 return b; 2550 } 2551 private static SoftReference<Map<String, UnicodeBlock>> mref; 2552 2553 private static String trimBlockName(String name) { 2554 String upper = name.toUpperCase(Locale.ENGLISH); 2555 StringBuilder result = new StringBuilder(upper.length()); 2556 for (int i = 0; i < upper.length(); i++) { 2557 char c = upper.charAt(i); 2558 if (c != ' ' && c != '_' && c != '-') { 2559 result.append(c); 2560 } 2561 } 2562 return result.toString(); 2563 } 2564 2565 /** 2566 * {icu} Returns the type ID of this Unicode block 2567 * @return integer type ID of this Unicode block 2568 * @stable ICU 2.4 2569 */ 2570 public int getID() 2571 { 2572 return m_id_; 2573 } 2574 2575 // private data members --------------------------------------------- 2576 2577 /** 2578 * Identification code for this UnicodeBlock 2579 */ 2580 private int m_id_; 2581 2582 // private constructor ---------------------------------------------- 2583 2584 /** 2585 * UnicodeBlock constructor 2586 * @param name name of this UnicodeBlock 2587 * @param id unique id of this UnicodeBlock 2588 * @exception NullPointerException if name is <code>null</code> 2589 */ 2590 private UnicodeBlock(String name, int id) 2591 { 2592 super(name); 2593 m_id_ = id; 2594 if (id >= 0) { 2595 BLOCKS_[id] = this; 2596 } 2597 } 2598 } 2599 2600 /** 2601 * East Asian Width constants. 2602 * @see UProperty#EAST_ASIAN_WIDTH 2603 * @see UCharacter#getIntPropertyValue 2604 * @stable ICU 2.4 2605 */ 2606 public static interface EastAsianWidth 2607 { 2608 /** 2609 * @stable ICU 2.4 2610 */ 2611 public static final int NEUTRAL = 0; 2612 /** 2613 * @stable ICU 2.4 2614 */ 2615 public static final int AMBIGUOUS = 1; 2616 /** 2617 * @stable ICU 2.4 2618 */ 2619 public static final int HALFWIDTH = 2; 2620 /** 2621 * @stable ICU 2.4 2622 */ 2623 public static final int FULLWIDTH = 3; 2624 /** 2625 * @stable ICU 2.4 2626 */ 2627 public static final int NARROW = 4; 2628 /** 2629 * @stable ICU 2.4 2630 */ 2631 public static final int WIDE = 5; 2632 /** 2633 * One more than the highest normal EastAsianWidth value. 2634 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH). 2635 * 2636 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2637 */ 2638 @Deprecated 2639 public static final int COUNT = 6; 2640 } 2641 2642 /** 2643 * Decomposition Type constants. 2644 * @see UProperty#DECOMPOSITION_TYPE 2645 * @stable ICU 2.4 2646 */ 2647 public static interface DecompositionType 2648 { 2649 /** 2650 * @stable ICU 2.4 2651 */ 2652 public static final int NONE = 0; 2653 /** 2654 * @stable ICU 2.4 2655 */ 2656 public static final int CANONICAL = 1; 2657 /** 2658 * @stable ICU 2.4 2659 */ 2660 public static final int COMPAT = 2; 2661 /** 2662 * @stable ICU 2.4 2663 */ 2664 public static final int CIRCLE = 3; 2665 /** 2666 * @stable ICU 2.4 2667 */ 2668 public static final int FINAL = 4; 2669 /** 2670 * @stable ICU 2.4 2671 */ 2672 public static final int FONT = 5; 2673 /** 2674 * @stable ICU 2.4 2675 */ 2676 public static final int FRACTION = 6; 2677 /** 2678 * @stable ICU 2.4 2679 */ 2680 public static final int INITIAL = 7; 2681 /** 2682 * @stable ICU 2.4 2683 */ 2684 public static final int ISOLATED = 8; 2685 /** 2686 * @stable ICU 2.4 2687 */ 2688 public static final int MEDIAL = 9; 2689 /** 2690 * @stable ICU 2.4 2691 */ 2692 public static final int NARROW = 10; 2693 /** 2694 * @stable ICU 2.4 2695 */ 2696 public static final int NOBREAK = 11; 2697 /** 2698 * @stable ICU 2.4 2699 */ 2700 public static final int SMALL = 12; 2701 /** 2702 * @stable ICU 2.4 2703 */ 2704 public static final int SQUARE = 13; 2705 /** 2706 * @stable ICU 2.4 2707 */ 2708 public static final int SUB = 14; 2709 /** 2710 * @stable ICU 2.4 2711 */ 2712 public static final int SUPER = 15; 2713 /** 2714 * @stable ICU 2.4 2715 */ 2716 public static final int VERTICAL = 16; 2717 /** 2718 * @stable ICU 2.4 2719 */ 2720 public static final int WIDE = 17; 2721 /** 2722 * One more than the highest normal DecompositionType value. 2723 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE). 2724 * 2725 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2726 */ 2727 @Deprecated 2728 public static final int COUNT = 18; 2729 } 2730 2731 /** 2732 * Joining Type constants. 2733 * @see UProperty#JOINING_TYPE 2734 * @stable ICU 2.4 2735 */ 2736 public static interface JoiningType 2737 { 2738 /** 2739 * @stable ICU 2.4 2740 */ 2741 public static final int NON_JOINING = 0; 2742 /** 2743 * @stable ICU 2.4 2744 */ 2745 public static final int JOIN_CAUSING = 1; 2746 /** 2747 * @stable ICU 2.4 2748 */ 2749 public static final int DUAL_JOINING = 2; 2750 /** 2751 * @stable ICU 2.4 2752 */ 2753 public static final int LEFT_JOINING = 3; 2754 /** 2755 * @stable ICU 2.4 2756 */ 2757 public static final int RIGHT_JOINING = 4; 2758 /** 2759 * @stable ICU 2.4 2760 */ 2761 public static final int TRANSPARENT = 5; 2762 /** 2763 * One more than the highest normal JoiningType value. 2764 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE). 2765 * 2766 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2767 */ 2768 @Deprecated 2769 public static final int COUNT = 6; 2770 } 2771 2772 /** 2773 * Joining Group constants. 2774 * @see UProperty#JOINING_GROUP 2775 * @stable ICU 2.4 2776 */ 2777 public static interface JoiningGroup 2778 { 2779 /** 2780 * @stable ICU 2.4 2781 */ 2782 public static final int NO_JOINING_GROUP = 0; 2783 /** 2784 * @stable ICU 2.4 2785 */ 2786 public static final int AIN = 1; 2787 /** 2788 * @stable ICU 2.4 2789 */ 2790 public static final int ALAPH = 2; 2791 /** 2792 * @stable ICU 2.4 2793 */ 2794 public static final int ALEF = 3; 2795 /** 2796 * @stable ICU 2.4 2797 */ 2798 public static final int BEH = 4; 2799 /** 2800 * @stable ICU 2.4 2801 */ 2802 public static final int BETH = 5; 2803 /** 2804 * @stable ICU 2.4 2805 */ 2806 public static final int DAL = 6; 2807 /** 2808 * @stable ICU 2.4 2809 */ 2810 public static final int DALATH_RISH = 7; 2811 /** 2812 * @stable ICU 2.4 2813 */ 2814 public static final int E = 8; 2815 /** 2816 * @stable ICU 2.4 2817 */ 2818 public static final int FEH = 9; 2819 /** 2820 * @stable ICU 2.4 2821 */ 2822 public static final int FINAL_SEMKATH = 10; 2823 /** 2824 * @stable ICU 2.4 2825 */ 2826 public static final int GAF = 11; 2827 /** 2828 * @stable ICU 2.4 2829 */ 2830 public static final int GAMAL = 12; 2831 /** 2832 * @stable ICU 2.4 2833 */ 2834 public static final int HAH = 13; 2835 /** @stable ICU 4.6 */ 2836 public static final int TEH_MARBUTA_GOAL = 14; 2837 /** 2838 * @stable ICU 2.4 2839 */ 2840 public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL; 2841 /** 2842 * @stable ICU 2.4 2843 */ 2844 public static final int HE = 15; 2845 /** 2846 * @stable ICU 2.4 2847 */ 2848 public static final int HEH = 16; 2849 /** 2850 * @stable ICU 2.4 2851 */ 2852 public static final int HEH_GOAL = 17; 2853 /** 2854 * @stable ICU 2.4 2855 */ 2856 public static final int HETH = 18; 2857 /** 2858 * @stable ICU 2.4 2859 */ 2860 public static final int KAF = 19; 2861 /** 2862 * @stable ICU 2.4 2863 */ 2864 public static final int KAPH = 20; 2865 /** 2866 * @stable ICU 2.4 2867 */ 2868 public static final int KNOTTED_HEH = 21; 2869 /** 2870 * @stable ICU 2.4 2871 */ 2872 public static final int LAM = 22; 2873 /** 2874 * @stable ICU 2.4 2875 */ 2876 public static final int LAMADH = 23; 2877 /** 2878 * @stable ICU 2.4 2879 */ 2880 public static final int MEEM = 24; 2881 /** 2882 * @stable ICU 2.4 2883 */ 2884 public static final int MIM = 25; 2885 /** 2886 * @stable ICU 2.4 2887 */ 2888 public static final int NOON = 26; 2889 /** 2890 * @stable ICU 2.4 2891 */ 2892 public static final int NUN = 27; 2893 /** 2894 * @stable ICU 2.4 2895 */ 2896 public static final int PE = 28; 2897 /** 2898 * @stable ICU 2.4 2899 */ 2900 public static final int QAF = 29; 2901 /** 2902 * @stable ICU 2.4 2903 */ 2904 public static final int QAPH = 30; 2905 /** 2906 * @stable ICU 2.4 2907 */ 2908 public static final int REH = 31; 2909 /** 2910 * @stable ICU 2.4 2911 */ 2912 public static final int REVERSED_PE = 32; 2913 /** 2914 * @stable ICU 2.4 2915 */ 2916 public static final int SAD = 33; 2917 /** 2918 * @stable ICU 2.4 2919 */ 2920 public static final int SADHE = 34; 2921 /** 2922 * @stable ICU 2.4 2923 */ 2924 public static final int SEEN = 35; 2925 /** 2926 * @stable ICU 2.4 2927 */ 2928 public static final int SEMKATH = 36; 2929 /** 2930 * @stable ICU 2.4 2931 */ 2932 public static final int SHIN = 37; 2933 /** 2934 * @stable ICU 2.4 2935 */ 2936 public static final int SWASH_KAF = 38; 2937 /** 2938 * @stable ICU 2.4 2939 */ 2940 public static final int SYRIAC_WAW = 39; 2941 /** 2942 * @stable ICU 2.4 2943 */ 2944 public static final int TAH = 40; 2945 /** 2946 * @stable ICU 2.4 2947 */ 2948 public static final int TAW = 41; 2949 /** 2950 * @stable ICU 2.4 2951 */ 2952 public static final int TEH_MARBUTA = 42; 2953 /** 2954 * @stable ICU 2.4 2955 */ 2956 public static final int TETH = 43; 2957 /** 2958 * @stable ICU 2.4 2959 */ 2960 public static final int WAW = 44; 2961 /** 2962 * @stable ICU 2.4 2963 */ 2964 public static final int YEH = 45; 2965 /** 2966 * @stable ICU 2.4 2967 */ 2968 public static final int YEH_BARREE = 46; 2969 /** 2970 * @stable ICU 2.4 2971 */ 2972 public static final int YEH_WITH_TAIL = 47; 2973 /** 2974 * @stable ICU 2.4 2975 */ 2976 public static final int YUDH = 48; 2977 /** 2978 * @stable ICU 2.4 2979 */ 2980 public static final int YUDH_HE = 49; 2981 /** 2982 * @stable ICU 2.4 2983 */ 2984 public static final int ZAIN = 50; 2985 /** 2986 * @stable ICU 2.6 2987 */ 2988 public static final int FE = 51; 2989 /** 2990 * @stable ICU 2.6 2991 */ 2992 public static final int KHAPH = 52; 2993 /** 2994 * @stable ICU 2.6 2995 */ 2996 public static final int ZHAIN = 53; 2997 /** 2998 * @stable ICU 4.0 2999 */ 3000 public static final int BURUSHASKI_YEH_BARREE = 54; 3001 /** @stable ICU 4.4 */ 3002 public static final int FARSI_YEH = 55; 3003 /** @stable ICU 4.4 */ 3004 public static final int NYA = 56; 3005 /** @stable ICU 49 */ 3006 public static final int ROHINGYA_YEH = 57; 3007 3008 /** @stable ICU 54 */ 3009 public static final int MANICHAEAN_ALEPH = 58; 3010 /** @stable ICU 54 */ 3011 public static final int MANICHAEAN_AYIN = 59; 3012 /** @stable ICU 54 */ 3013 public static final int MANICHAEAN_BETH = 60; 3014 /** @stable ICU 54 */ 3015 public static final int MANICHAEAN_DALETH = 61; 3016 /** @stable ICU 54 */ 3017 public static final int MANICHAEAN_DHAMEDH = 62; 3018 /** @stable ICU 54 */ 3019 public static final int MANICHAEAN_FIVE = 63; 3020 /** @stable ICU 54 */ 3021 public static final int MANICHAEAN_GIMEL = 64; 3022 /** @stable ICU 54 */ 3023 public static final int MANICHAEAN_HETH = 65; 3024 /** @stable ICU 54 */ 3025 public static final int MANICHAEAN_HUNDRED = 66; 3026 /** @stable ICU 54 */ 3027 public static final int MANICHAEAN_KAPH = 67; 3028 /** @stable ICU 54 */ 3029 public static final int MANICHAEAN_LAMEDH = 68; 3030 /** @stable ICU 54 */ 3031 public static final int MANICHAEAN_MEM = 69; 3032 /** @stable ICU 54 */ 3033 public static final int MANICHAEAN_NUN = 70; 3034 /** @stable ICU 54 */ 3035 public static final int MANICHAEAN_ONE = 71; 3036 /** @stable ICU 54 */ 3037 public static final int MANICHAEAN_PE = 72; 3038 /** @stable ICU 54 */ 3039 public static final int MANICHAEAN_QOPH = 73; 3040 /** @stable ICU 54 */ 3041 public static final int MANICHAEAN_RESH = 74; 3042 /** @stable ICU 54 */ 3043 public static final int MANICHAEAN_SADHE = 75; 3044 /** @stable ICU 54 */ 3045 public static final int MANICHAEAN_SAMEKH = 76; 3046 /** @stable ICU 54 */ 3047 public static final int MANICHAEAN_TAW = 77; 3048 /** @stable ICU 54 */ 3049 public static final int MANICHAEAN_TEN = 78; 3050 /** @stable ICU 54 */ 3051 public static final int MANICHAEAN_TETH = 79; 3052 /** @stable ICU 54 */ 3053 public static final int MANICHAEAN_THAMEDH = 80; 3054 /** @stable ICU 54 */ 3055 public static final int MANICHAEAN_TWENTY = 81; 3056 /** @stable ICU 54 */ 3057 public static final int MANICHAEAN_WAW = 82; 3058 /** @stable ICU 54 */ 3059 public static final int MANICHAEAN_YODH = 83; 3060 /** @stable ICU 54 */ 3061 public static final int MANICHAEAN_ZAYIN = 84; 3062 /** @stable ICU 54 */ 3063 public static final int STRAIGHT_WAW = 85; 3064 3065 /** @stable ICU 58 */ 3066 public static final int AFRICAN_FEH = 86; 3067 /** @stable ICU 58 */ 3068 public static final int AFRICAN_NOON = 87; 3069 /** @stable ICU 58 */ 3070 public static final int AFRICAN_QAF = 88; 3071 3072 /** @stable ICU 60 */ 3073 public static final int MALAYALAM_BHA = 89; 3074 /** @stable ICU 60 */ 3075 public static final int MALAYALAM_JA = 90; 3076 /** @stable ICU 60 */ 3077 public static final int MALAYALAM_LLA = 91; 3078 /** @stable ICU 60 */ 3079 public static final int MALAYALAM_LLLA = 92; 3080 /** @stable ICU 60 */ 3081 public static final int MALAYALAM_NGA = 93; 3082 /** @stable ICU 60 */ 3083 public static final int MALAYALAM_NNA = 94; 3084 /** @stable ICU 60 */ 3085 public static final int MALAYALAM_NNNA = 95; 3086 /** @stable ICU 60 */ 3087 public static final int MALAYALAM_NYA = 96; 3088 /** @stable ICU 60 */ 3089 public static final int MALAYALAM_RA = 97; 3090 /** @stable ICU 60 */ 3091 public static final int MALAYALAM_SSA = 98; 3092 /** @stable ICU 60 */ 3093 public static final int MALAYALAM_TTA = 99; 3094 3095 /** 3096 * One more than the highest normal JoiningGroup value. 3097 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup). 3098 * 3099 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3100 */ 3101 @Deprecated 3102 public static final int COUNT = 100; 3103 } 3104 3105 /** 3106 * Grapheme Cluster Break constants. 3107 * @see UProperty#GRAPHEME_CLUSTER_BREAK 3108 * @stable ICU 3.4 3109 */ 3110 public static interface GraphemeClusterBreak { 3111 /** 3112 * @stable ICU 3.4 3113 */ 3114 public static final int OTHER = 0; 3115 /** 3116 * @stable ICU 3.4 3117 */ 3118 public static final int CONTROL = 1; 3119 /** 3120 * @stable ICU 3.4 3121 */ 3122 public static final int CR = 2; 3123 /** 3124 * @stable ICU 3.4 3125 */ 3126 public static final int EXTEND = 3; 3127 /** 3128 * @stable ICU 3.4 3129 */ 3130 public static final int L = 4; 3131 /** 3132 * @stable ICU 3.4 3133 */ 3134 public static final int LF = 5; 3135 /** 3136 * @stable ICU 3.4 3137 */ 3138 public static final int LV = 6; 3139 /** 3140 * @stable ICU 3.4 3141 */ 3142 public static final int LVT = 7; 3143 /** 3144 * @stable ICU 3.4 3145 */ 3146 public static final int T = 8; 3147 /** 3148 * @stable ICU 3.4 3149 */ 3150 public static final int V = 9; 3151 /** 3152 * @stable ICU 4.0 3153 */ 3154 public static final int SPACING_MARK = 10; 3155 /** 3156 * @stable ICU 4.0 3157 */ 3158 public static final int PREPEND = 11; 3159 /** @stable ICU 50 */ 3160 public static final int REGIONAL_INDICATOR = 12; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3161 /** @stable ICU 58 */ 3162 public static final int E_BASE = 13; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3163 /** @stable ICU 58 */ 3164 public static final int E_BASE_GAZ = 14; /*[EBG]*/ 3165 /** @stable ICU 58 */ 3166 public static final int E_MODIFIER = 15; /*[EM]*/ 3167 /** @stable ICU 58 */ 3168 public static final int GLUE_AFTER_ZWJ = 16; /*[GAZ]*/ 3169 /** @stable ICU 58 */ 3170 public static final int ZWJ = 17; /*[ZWJ]*/ 3171 /** 3172 * One more than the highest normal GraphemeClusterBreak value. 3173 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK). 3174 * 3175 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3176 */ 3177 @Deprecated 3178 public static final int COUNT = 18; 3179 } 3180 3181 /** 3182 * Word Break constants. 3183 * @see UProperty#WORD_BREAK 3184 * @stable ICU 3.4 3185 */ 3186 public static interface WordBreak { 3187 /** 3188 * @stable ICU 3.8 3189 */ 3190 public static final int OTHER = 0; 3191 /** 3192 * @stable ICU 3.8 3193 */ 3194 public static final int ALETTER = 1; 3195 /** 3196 * @stable ICU 3.8 3197 */ 3198 public static final int FORMAT = 2; 3199 /** 3200 * @stable ICU 3.8 3201 */ 3202 public static final int KATAKANA = 3; 3203 /** 3204 * @stable ICU 3.8 3205 */ 3206 public static final int MIDLETTER = 4; 3207 /** 3208 * @stable ICU 3.8 3209 */ 3210 public static final int MIDNUM = 5; 3211 /** 3212 * @stable ICU 3.8 3213 */ 3214 public static final int NUMERIC = 6; 3215 /** 3216 * @stable ICU 3.8 3217 */ 3218 public static final int EXTENDNUMLET = 7; 3219 /** 3220 * @stable ICU 4.0 3221 */ 3222 public static final int CR = 8; 3223 /** 3224 * @stable ICU 4.0 3225 */ 3226 public static final int EXTEND = 9; 3227 /** 3228 * @stable ICU 4.0 3229 */ 3230 public static final int LF = 10; 3231 /** 3232 * @stable ICU 4.0 3233 */ 3234 public static final int MIDNUMLET = 11; 3235 /** 3236 * @stable ICU 4.0 3237 */ 3238 public static final int NEWLINE = 12; 3239 /** @stable ICU 50 */ 3240 public static final int REGIONAL_INDICATOR = 13; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3241 /** @stable ICU 52 */ 3242 public static final int HEBREW_LETTER = 14; /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ 3243 /** @stable ICU 52 */ 3244 public static final int SINGLE_QUOTE = 15; /*[SQ]*/ 3245 /** @stable ICU 52 */ 3246 public static final int DOUBLE_QUOTE = 16; /*[DQ]*/ 3247 /** @stable ICU 58 */ 3248 public static final int E_BASE = 17; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3249 /** @stable ICU 58 */ 3250 public static final int E_BASE_GAZ = 18; /*[EBG]*/ 3251 /** @stable ICU 58 */ 3252 public static final int E_MODIFIER = 19; /*[EM]*/ 3253 /** @stable ICU 58 */ 3254 public static final int GLUE_AFTER_ZWJ = 20; /*[GAZ]*/ 3255 /** @stable ICU 58 */ 3256 public static final int ZWJ = 21; /*[ZWJ]*/ 3257 /** 3258 * One more than the highest normal WordBreak value. 3259 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK). 3260 * 3261 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3262 */ 3263 @Deprecated 3264 public static final int COUNT = 22; 3265 } 3266 3267 /** 3268 * Sentence Break constants. 3269 * @see UProperty#SENTENCE_BREAK 3270 * @stable ICU 3.4 3271 */ 3272 public static interface SentenceBreak { 3273 /** 3274 * @stable ICU 3.8 3275 */ 3276 public static final int OTHER = 0; 3277 /** 3278 * @stable ICU 3.8 3279 */ 3280 public static final int ATERM = 1; 3281 /** 3282 * @stable ICU 3.8 3283 */ 3284 public static final int CLOSE = 2; 3285 /** 3286 * @stable ICU 3.8 3287 */ 3288 public static final int FORMAT = 3; 3289 /** 3290 * @stable ICU 3.8 3291 */ 3292 public static final int LOWER = 4; 3293 /** 3294 * @stable ICU 3.8 3295 */ 3296 public static final int NUMERIC = 5; 3297 /** 3298 * @stable ICU 3.8 3299 */ 3300 public static final int OLETTER = 6; 3301 /** 3302 * @stable ICU 3.8 3303 */ 3304 public static final int SEP = 7; 3305 /** 3306 * @stable ICU 3.8 3307 */ 3308 public static final int SP = 8; 3309 /** 3310 * @stable ICU 3.8 3311 */ 3312 public static final int STERM = 9; 3313 /** 3314 * @stable ICU 3.8 3315 */ 3316 public static final int UPPER = 10; 3317 /** 3318 * @stable ICU 4.0 3319 */ 3320 public static final int CR = 11; 3321 /** 3322 * @stable ICU 4.0 3323 */ 3324 public static final int EXTEND = 12; 3325 /** 3326 * @stable ICU 4.0 3327 */ 3328 public static final int LF = 13; 3329 /** 3330 * @stable ICU 4.0 3331 */ 3332 public static final int SCONTINUE = 14; 3333 /** 3334 * One more than the highest normal SentenceBreak value. 3335 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK). 3336 * 3337 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3338 */ 3339 @Deprecated 3340 public static final int COUNT = 15; 3341 } 3342 3343 /** 3344 * Line Break constants. 3345 * @see UProperty#LINE_BREAK 3346 * @stable ICU 2.4 3347 */ 3348 public static interface LineBreak 3349 { 3350 /** 3351 * @stable ICU 2.4 3352 */ 3353 public static final int UNKNOWN = 0; 3354 /** 3355 * @stable ICU 2.4 3356 */ 3357 public static final int AMBIGUOUS = 1; 3358 /** 3359 * @stable ICU 2.4 3360 */ 3361 public static final int ALPHABETIC = 2; 3362 /** 3363 * @stable ICU 2.4 3364 */ 3365 public static final int BREAK_BOTH = 3; 3366 /** 3367 * @stable ICU 2.4 3368 */ 3369 public static final int BREAK_AFTER = 4; 3370 /** 3371 * @stable ICU 2.4 3372 */ 3373 public static final int BREAK_BEFORE = 5; 3374 /** 3375 * @stable ICU 2.4 3376 */ 3377 public static final int MANDATORY_BREAK = 6; 3378 /** 3379 * @stable ICU 2.4 3380 */ 3381 public static final int CONTINGENT_BREAK = 7; 3382 /** 3383 * @stable ICU 2.4 3384 */ 3385 public static final int CLOSE_PUNCTUATION = 8; 3386 /** 3387 * @stable ICU 2.4 3388 */ 3389 public static final int COMBINING_MARK = 9; 3390 /** 3391 * @stable ICU 2.4 3392 */ 3393 public static final int CARRIAGE_RETURN = 10; 3394 /** 3395 * @stable ICU 2.4 3396 */ 3397 public static final int EXCLAMATION = 11; 3398 /** 3399 * @stable ICU 2.4 3400 */ 3401 public static final int GLUE = 12; 3402 /** 3403 * @stable ICU 2.4 3404 */ 3405 public static final int HYPHEN = 13; 3406 /** 3407 * @stable ICU 2.4 3408 */ 3409 public static final int IDEOGRAPHIC = 14; 3410 /** 3411 * @see #INSEPARABLE 3412 * @stable ICU 2.4 3413 */ 3414 public static final int INSEPERABLE = 15; 3415 /** 3416 * Renamed from the misspelled "inseperable" in Unicode 4.0.1. 3417 * @stable ICU 3.0 3418 */ 3419 public static final int INSEPARABLE = 15; 3420 /** 3421 * @stable ICU 2.4 3422 */ 3423 public static final int INFIX_NUMERIC = 16; 3424 /** 3425 * @stable ICU 2.4 3426 */ 3427 public static final int LINE_FEED = 17; 3428 /** 3429 * @stable ICU 2.4 3430 */ 3431 public static final int NONSTARTER = 18; 3432 /** 3433 * @stable ICU 2.4 3434 */ 3435 public static final int NUMERIC = 19; 3436 /** 3437 * @stable ICU 2.4 3438 */ 3439 public static final int OPEN_PUNCTUATION = 20; 3440 /** 3441 * @stable ICU 2.4 3442 */ 3443 public static final int POSTFIX_NUMERIC = 21; 3444 /** 3445 * @stable ICU 2.4 3446 */ 3447 public static final int PREFIX_NUMERIC = 22; 3448 /** 3449 * @stable ICU 2.4 3450 */ 3451 public static final int QUOTATION = 23; 3452 /** 3453 * @stable ICU 2.4 3454 */ 3455 public static final int COMPLEX_CONTEXT = 24; 3456 /** 3457 * @stable ICU 2.4 3458 */ 3459 public static final int SURROGATE = 25; 3460 /** 3461 * @stable ICU 2.4 3462 */ 3463 public static final int SPACE = 26; 3464 /** 3465 * @stable ICU 2.4 3466 */ 3467 public static final int BREAK_SYMBOLS = 27; 3468 /** 3469 * @stable ICU 2.4 3470 */ 3471 public static final int ZWSPACE = 28; 3472 /** 3473 * @stable ICU 2.6 3474 */ 3475 public static final int NEXT_LINE = 29; /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ 3476 /** 3477 * @stable ICU 2.6 3478 */ 3479 public static final int WORD_JOINER = 30; /*[WJ]*/ 3480 /** 3481 * @stable ICU 3.4 3482 */ 3483 public static final int H2 = 31; /* from here on: new in Unicode 4.1/ICU 3.4 */ 3484 /** 3485 * @stable ICU 3.4 3486 */ 3487 public static final int H3 = 32; 3488 /** 3489 * @stable ICU 3.4 3490 */ 3491 public static final int JL = 33; 3492 /** 3493 * @stable ICU 3.4 3494 */ 3495 public static final int JT = 34; 3496 /** 3497 * @stable ICU 3.4 3498 */ 3499 public static final int JV = 35; 3500 /** @stable ICU 4.4 */ 3501 public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ 3502 /** @stable ICU 49 */ 3503 public static final int CONDITIONAL_JAPANESE_STARTER = 37; /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ 3504 /** @stable ICU 49 */ 3505 public static final int HEBREW_LETTER = 38; /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ 3506 /** @stable ICU 50 */ 3507 public static final int REGIONAL_INDICATOR = 39; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3508 /** @stable ICU 58 */ 3509 public static final int E_BASE = 40; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3510 /** @stable ICU 58 */ 3511 public static final int E_MODIFIER = 41; /*[EM]*/ 3512 /** @stable ICU 58 */ 3513 public static final int ZWJ = 42; /*[ZWJ]*/ 3514 /** 3515 * One more than the highest normal LineBreak value. 3516 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK). 3517 * 3518 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3519 */ 3520 @Deprecated 3521 public static final int COUNT = 43; 3522 } 3523 3524 /** 3525 * Numeric Type constants. 3526 * @see UProperty#NUMERIC_TYPE 3527 * @stable ICU 2.4 3528 */ 3529 public static interface NumericType 3530 { 3531 /** 3532 * @stable ICU 2.4 3533 */ 3534 public static final int NONE = 0; 3535 /** 3536 * @stable ICU 2.4 3537 */ 3538 public static final int DECIMAL = 1; 3539 /** 3540 * @stable ICU 2.4 3541 */ 3542 public static final int DIGIT = 2; 3543 /** 3544 * @stable ICU 2.4 3545 */ 3546 public static final int NUMERIC = 3; 3547 /** 3548 * One more than the highest normal NumericType value. 3549 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE). 3550 * 3551 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3552 */ 3553 @Deprecated 3554 public static final int COUNT = 4; 3555 } 3556 3557 /** 3558 * Hangul Syllable Type constants. 3559 * 3560 * @see UProperty#HANGUL_SYLLABLE_TYPE 3561 * @stable ICU 2.6 3562 */ 3563 public static interface HangulSyllableType 3564 { 3565 /** 3566 * @stable ICU 2.6 3567 */ 3568 public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ 3569 /** 3570 * @stable ICU 2.6 3571 */ 3572 public static final int LEADING_JAMO = 1; /*[L]*/ 3573 /** 3574 * @stable ICU 2.6 3575 */ 3576 public static final int VOWEL_JAMO = 2; /*[V]*/ 3577 /** 3578 * @stable ICU 2.6 3579 */ 3580 public static final int TRAILING_JAMO = 3; /*[T]*/ 3581 /** 3582 * @stable ICU 2.6 3583 */ 3584 public static final int LV_SYLLABLE = 4; /*[LV]*/ 3585 /** 3586 * @stable ICU 2.6 3587 */ 3588 public static final int LVT_SYLLABLE = 5; /*[LVT]*/ 3589 /** 3590 * One more than the highest normal HangulSyllableType value. 3591 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE). 3592 * 3593 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3594 */ 3595 @Deprecated 3596 public static final int COUNT = 6; 3597 } 3598 3599 /** 3600 * Bidi Paired Bracket Type constants. 3601 * 3602 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 3603 * @stable ICU 52 3604 */ 3605 public static interface BidiPairedBracketType { 3606 /** 3607 * Not a paired bracket. 3608 * @stable ICU 52 3609 */ 3610 public static final int NONE = 0; 3611 /** 3612 * Open paired bracket. 3613 * @stable ICU 52 3614 */ 3615 public static final int OPEN = 1; 3616 /** 3617 * Close paired bracket. 3618 * @stable ICU 52 3619 */ 3620 public static final int CLOSE = 2; 3621 /** 3622 * One more than the highest normal BidiPairedBracketType value. 3623 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE). 3624 * 3625 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3626 */ 3627 @Deprecated 3628 public static final int COUNT = 3; 3629 } 3630 3631 // public data members ----------------------------------------------- 3632 3633 /** 3634 * The lowest Unicode code point value, constant 0. 3635 * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}. 3636 * 3637 * @stable ICU 2.1 3638 */ 3639 public static final int MIN_VALUE = Character.MIN_CODE_POINT; 3640 3641 /** 3642 * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits). 3643 * Same as {@link Character#MAX_CODE_POINT}. 3644 * 3645 * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE} 3646 * which is still a char with the value U+FFFF. 3647 * 3648 * @stable ICU 2.1 3649 */ 3650 public static final int MAX_VALUE = Character.MAX_CODE_POINT; 3651 3652 /** 3653 * The minimum value for Supplementary code points, constant U+10000. 3654 * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 3655 * 3656 * @stable ICU 2.1 3657 */ 3658 public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT; 3659 3660 /** 3661 * Unicode value used when translating into Unicode encoding form and there 3662 * is no existing character. 3663 * @stable ICU 2.1 3664 */ 3665 public static final int REPLACEMENT_CHAR = '\uFFFD'; 3666 3667 /** 3668 * Special value that is returned by getUnicodeNumericValue(int) when no 3669 * numeric value is defined for a code point. 3670 * @stable ICU 2.4 3671 * @see #getUnicodeNumericValue 3672 */ 3673 public static final double NO_NUMERIC_VALUE = -123456789; 3674 3675 /** 3676 * Compatibility constant for Java Character's MIN_RADIX. 3677 * @stable ICU 3.4 3678 */ 3679 public static final int MIN_RADIX = java.lang.Character.MIN_RADIX; 3680 3681 /** 3682 * Compatibility constant for Java Character's MAX_RADIX. 3683 * @stable ICU 3.4 3684 */ 3685 public static final int MAX_RADIX = java.lang.Character.MAX_RADIX; 3686 3687 /** 3688 * Do not lowercase non-initial parts of words when titlecasing. 3689 * Option bit for titlecasing APIs that take an options bit set. 3690 * 3691 * By default, titlecasing will titlecase the first cased character 3692 * of a word and lowercase all other characters. 3693 * With this option, the other characters will not be modified. 3694 * 3695 * @see #toTitleCase 3696 * @stable ICU 3.8 3697 */ 3698 public static final int TITLECASE_NO_LOWERCASE = 0x100; 3699 3700 /** 3701 * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; 3702 * titlecase exactly the characters at breaks from the iterator. 3703 * Option bit for titlecasing APIs that take an options bit set. 3704 * 3705 * By default, titlecasing will take each break iterator index, 3706 * adjust it by looking for the next cased character, and titlecase that one. 3707 * Other characters are lowercased. 3708 * 3709 * This follows Unicode 4 & 5 section 3.13 Default Case Operations: 3710 * 3711 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 3712 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 3713 * cased character F. If F exists, map F to default_title(F); then map each 3714 * subsequent character C to default_lower(C). 3715 * 3716 * @see #toTitleCase 3717 * @see #TITLECASE_NO_LOWERCASE 3718 * @stable ICU 3.8 3719 */ 3720 public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200; 3721 3722 // public methods ---------------------------------------------------- 3723 3724 /** 3725 * Returnss the numeric value of a decimal digit code point. 3726 * <br>This method observes the semantics of 3727 * <code>java.lang.Character.digit()</code>. Note that this 3728 * will return positive values for code points for which isDigit 3729 * returns false, just like java.lang.Character. 3730 * <br><em>Semantic Change:</em> In release 1.3.1 and 3731 * prior, this did not treat the European letters as having a 3732 * digit value, and also treated numeric letters and other numbers as 3733 * digits. 3734 * This has been changed to conform to the java semantics. 3735 * <br>A code point is a valid digit if and only if: 3736 * <ul> 3737 * <li>ch is a decimal digit or one of the european letters, and 3738 * <li>the value of ch is less than the specified radix. 3739 * </ul> 3740 * @param ch the code point to query 3741 * @param radix the radix 3742 * @return the numeric value represented by the code point in the 3743 * specified radix, or -1 if the code point is not a decimal digit 3744 * or if its value is too large for the radix 3745 * @stable ICU 2.1 3746 */ 3747 public static int digit(int ch, int radix) 3748 { 3749 if (2 <= radix && radix <= 36) { 3750 int value = digit(ch); 3751 if (value < 0) { 3752 // ch is not a decimal digit, try latin letters 3753 value = UCharacterProperty.getEuropeanDigit(ch); 3754 } 3755 return (value < radix) ? value : -1; 3756 } else { 3757 return -1; // invalid radix 3758 } 3759 } 3760 3761 /** 3762 * Returnss the numeric value of a decimal digit code point. 3763 * <br>This is a convenience overload of <code>digit(int, int)</code> 3764 * that provides a decimal radix. 3765 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this 3766 * treated numeric letters and other numbers as digits. This has 3767 * been changed to conform to the java semantics. 3768 * @param ch the code point to query 3769 * @return the numeric value represented by the code point, 3770 * or -1 if the code point is not a decimal digit or if its 3771 * value is too large for a decimal radix 3772 * @stable ICU 2.1 3773 */ 3774 public static int digit(int ch) 3775 { 3776 return UCharacterProperty.INSTANCE.digit(ch); 3777 } 3778 3779 /** 3780 * Returns the numeric value of the code point as a nonnegative 3781 * integer. 3782 * <br>If the code point does not have a numeric value, then -1 is returned. 3783 * <br> 3784 * If the code point has a numeric value that cannot be represented as a 3785 * nonnegative integer (for example, a fractional value), then -2 is 3786 * returned. 3787 * @param ch the code point to query 3788 * @return the numeric value of the code point, or -1 if it has no numeric 3789 * value, or -2 if it has a numeric value that cannot be represented as a 3790 * nonnegative integer 3791 * @stable ICU 2.1 3792 */ 3793 public static int getNumericValue(int ch) 3794 { 3795 return UCharacterProperty.INSTANCE.getNumericValue(ch); 3796 } 3797 3798 /** 3799 * {@icu} Returns the numeric value for a Unicode code point as defined in the 3800 * Unicode Character Database. 3801 * <p>A "double" return type is necessary because some numeric values are 3802 * fractions, negative, or too large for int. 3803 * <p>For characters without any numeric values in the Unicode Character 3804 * Database, this function will return NO_NUMERIC_VALUE. 3805 * Note: This is different from the Unicode Standard which specifies NaN as the default value. 3806 * <p><em>API Change:</em> In release 2.2 and prior, this API has a 3807 * return type int and returns -1 when the argument ch does not have a 3808 * corresponding numeric value. This has been changed to synch with ICU4C 3809 * 3810 * This corresponds to the ICU4C function u_getNumericValue. 3811 * @param ch Code point to get the numeric value for. 3812 * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. 3813 * @stable ICU 2.4 3814 */ 3815 public static double getUnicodeNumericValue(int ch) 3816 { 3817 return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch); 3818 } 3819 3820 /** 3821 * Compatibility override of Java deprecated method. This 3822 * method will always remain deprecated. 3823 * Same as java.lang.Character.isSpace(). 3824 * @param ch the code point 3825 * @return true if the code point is a space character as 3826 * defined by java.lang.Character.isSpace. 3827 * @deprecated ICU 3.4 (Java) 3828 */ 3829 @Deprecated 3830 public static boolean isSpace(int ch) { 3831 return ch <= 0x20 && 3832 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d); 3833 } 3834 3835 /** 3836 * Returns a value indicating a code point's Unicode category. 3837 * Up-to-date Unicode implementation of java.lang.Character.getType() 3838 * except for the above mentioned code points that had their category 3839 * changed.<br> 3840 * Return results are constants from the interface 3841 * <a href=UCharacterCategory.html>UCharacterCategory</a><br> 3842 * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with 3843 * those returned by java.lang.Character.getType. UCharacterCategory values 3844 * match the ones used in ICU4C, while java.lang.Character type 3845 * values, though similar, skip the value 17. 3846 * @param ch code point whose type is to be determined 3847 * @return category which is a value of UCharacterCategory 3848 * @stable ICU 2.1 3849 */ 3850 public static int getType(int ch) 3851 { 3852 return UCharacterProperty.INSTANCE.getType(ch); 3853 } 3854 3855 /** 3856 * Determines if a code point has a defined meaning in the up-to-date 3857 * Unicode standard. 3858 * E.g. supplementary code points though allocated space are not defined in 3859 * Unicode yet.<br> 3860 * Up-to-date Unicode implementation of java.lang.Character.isDefined() 3861 * @param ch code point to be determined if it is defined in the most 3862 * current version of Unicode 3863 * @return true if this code point is defined in unicode 3864 * @stable ICU 2.1 3865 */ 3866 public static boolean isDefined(int ch) 3867 { 3868 return getType(ch) != 0; 3869 } 3870 3871 /** 3872 * Determines if a code point is a Java digit. 3873 * <br>This method observes the semantics of 3874 * <code>java.lang.Character.isDigit()</code>. It returns true for decimal 3875 * digits only. 3876 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated 3877 * numeric letters and other numbers as digits. 3878 * This has been changed to conform to the java semantics. 3879 * @param ch code point to query 3880 * @return true if this code point is a digit 3881 * @stable ICU 2.1 3882 */ 3883 public static boolean isDigit(int ch) 3884 { 3885 return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER; 3886 } 3887 3888 /** 3889 * Determines if the specified code point is an ISO control character. 3890 * A code point is considered to be an ISO control character if it is in 3891 * the range \u0000 through \u001F or in the range \u007F through 3892 * \u009F.<br> 3893 * Up-to-date Unicode implementation of java.lang.Character.isISOControl() 3894 * @param ch code point to determine if it is an ISO control character 3895 * @return true if code point is a ISO control character 3896 * @stable ICU 2.1 3897 */ 3898 public static boolean isISOControl(int ch) 3899 { 3900 return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && 3901 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_)); 3902 } 3903 3904 /** 3905 * Determines if the specified code point is a letter. 3906 * Up-to-date Unicode implementation of java.lang.Character.isLetter() 3907 * @param ch code point to determine if it is a letter 3908 * @return true if code point is a letter 3909 * @stable ICU 2.1 3910 */ 3911 public static boolean isLetter(int ch) 3912 { 3913 // if props == 0, it will just fall through and return false 3914 return ((1 << getType(ch)) 3915 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3916 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3917 | (1 << UCharacterCategory.TITLECASE_LETTER) 3918 | (1 << UCharacterCategory.MODIFIER_LETTER) 3919 | (1 << UCharacterCategory.OTHER_LETTER))) != 0; 3920 } 3921 3922 /** 3923 * Determines if the specified code point is a letter or digit. 3924 * {@icunote} This method, unlike java.lang.Character does not regard the ascii 3925 * characters 'A' - 'Z' and 'a' - 'z' as digits. 3926 * @param ch code point to determine if it is a letter or a digit 3927 * @return true if code point is a letter or a digit 3928 * @stable ICU 2.1 3929 */ 3930 public static boolean isLetterOrDigit(int ch) 3931 { 3932 return ((1 << getType(ch)) 3933 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3934 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3935 | (1 << UCharacterCategory.TITLECASE_LETTER) 3936 | (1 << UCharacterCategory.MODIFIER_LETTER) 3937 | (1 << UCharacterCategory.OTHER_LETTER) 3938 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0; 3939 } 3940 3941 /** 3942 * Compatibility override of Java deprecated method. This 3943 * method will always remain deprecated. Delegates to 3944 * java.lang.Character.isJavaIdentifierStart. 3945 * @param cp the code point 3946 * @return true if the code point can start a java identifier. 3947 * @deprecated ICU 3.4 (Java) 3948 */ 3949 @Deprecated 3950 public static boolean isJavaLetter(int cp) { 3951 return isJavaIdentifierStart(cp); 3952 } 3953 3954 /** 3955 * Compatibility override of Java deprecated method. This 3956 * method will always remain deprecated. Delegates to 3957 * java.lang.Character.isJavaIdentifierPart. 3958 * @param cp the code point 3959 * @return true if the code point can continue a java identifier. 3960 * @deprecated ICU 3.4 (Java) 3961 */ 3962 @Deprecated 3963 public static boolean isJavaLetterOrDigit(int cp) { 3964 return isJavaIdentifierPart(cp); 3965 } 3966 3967 /** 3968 * Compatibility override of Java method, delegates to 3969 * java.lang.Character.isJavaIdentifierStart. 3970 * @param cp the code point 3971 * @return true if the code point can start a java identifier. 3972 * @stable ICU 3.4 3973 */ 3974 public static boolean isJavaIdentifierStart(int cp) { 3975 // note, downcast to char for jdk 1.4 compatibility 3976 return java.lang.Character.isJavaIdentifierStart((char)cp); 3977 } 3978 3979 /** 3980 * Compatibility override of Java method, delegates to 3981 * java.lang.Character.isJavaIdentifierPart. 3982 * @param cp the code point 3983 * @return true if the code point can continue a java identifier. 3984 * @stable ICU 3.4 3985 */ 3986 public static boolean isJavaIdentifierPart(int cp) { 3987 // note, downcast to char for jdk 1.4 compatibility 3988 return java.lang.Character.isJavaIdentifierPart((char)cp); 3989 } 3990 3991 /** 3992 * Determines if the specified code point is a lowercase character. 3993 * UnicodeData only contains case mappings for code points where they are 3994 * one-to-one mappings; it also omits information about context-sensitive 3995 * case mappings.<br> For more information about Unicode case mapping 3996 * please refer to the 3997 * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report 3998 * #21</a>.<br> 3999 * Up-to-date Unicode implementation of java.lang.Character.isLowerCase() 4000 * @param ch code point to determine if it is in lowercase 4001 * @return true if code point is a lowercase character 4002 * @stable ICU 2.1 4003 */ 4004 public static boolean isLowerCase(int ch) 4005 { 4006 // if props == 0, it will just fall through and return false 4007 return getType(ch) == UCharacterCategory.LOWERCASE_LETTER; 4008 } 4009 4010 /** 4011 * Determines if the specified code point is a white space character. 4012 * A code point is considered to be an whitespace character if and only 4013 * if it satisfies one of the following criteria: 4014 * <ul> 4015 * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not 4016 * also a non-breaking space (\u00A0 or \u2007 or \u202F). 4017 * <li> It is \u0009, HORIZONTAL TABULATION. 4018 * <li> It is \u000A, LINE FEED. 4019 * <li> It is \u000B, VERTICAL TABULATION. 4020 * <li> It is \u000C, FORM FEED. 4021 * <li> It is \u000D, CARRIAGE RETURN. 4022 * <li> It is \u001C, FILE SEPARATOR. 4023 * <li> It is \u001D, GROUP SEPARATOR. 4024 * <li> It is \u001E, RECORD SEPARATOR. 4025 * <li> It is \u001F, UNIT SEPARATOR. 4026 * </ul> 4027 * 4028 * This API tries to sync with the semantics of Java's 4029 * java.lang.Character.isWhitespace(), but it may not return 4030 * the exact same results because of the Unicode version 4031 * difference. 4032 * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) 4033 * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. 4034 * See http://www.unicode.org/versions/Unicode4.0.1/ 4035 * @param ch code point to determine if it is a white space 4036 * @return true if the specified code point is a white space character 4037 * @stable ICU 2.1 4038 */ 4039 public static boolean isWhitespace(int ch) 4040 { 4041 // exclude no-break spaces 4042 // if props == 0, it will just fall through and return false 4043 return ((1 << getType(ch)) & 4044 ((1 << UCharacterCategory.SPACE_SEPARATOR) 4045 | (1 << UCharacterCategory.LINE_SEPARATOR) 4046 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0 4047 && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_) 4048 // TAB VT LF FF CR FS GS RS US NL are all control characters 4049 // that are white spaces. 4050 || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f); 4051 } 4052 4053 /** 4054 * Determines if the specified code point is a Unicode specified space 4055 * character, i.e. if code point is in the category Zs, Zl and Zp. 4056 * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar(). 4057 * @param ch code point to determine if it is a space 4058 * @return true if the specified code point is a space character 4059 * @stable ICU 2.1 4060 */ 4061 public static boolean isSpaceChar(int ch) 4062 { 4063 // if props == 0, it will just fall through and return false 4064 return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR) 4065 | (1 << UCharacterCategory.LINE_SEPARATOR) 4066 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) 4067 != 0; 4068 } 4069 4070 /** 4071 * Determines if the specified code point is a titlecase character. 4072 * UnicodeData only contains case mappings for code points where they are 4073 * one-to-one mappings; it also omits information about context-sensitive 4074 * case mappings.<br> 4075 * For more information about Unicode case mapping please refer to the 4076 * <a href=http://www.unicode.org/unicode/reports/tr21/> 4077 * Technical report #21</a>.<br> 4078 * Up-to-date Unicode implementation of java.lang.Character.isTitleCase(). 4079 * @param ch code point to determine if it is in title case 4080 * @return true if the specified code point is a titlecase character 4081 * @stable ICU 2.1 4082 */ 4083 public static boolean isTitleCase(int ch) 4084 { 4085 // if props == 0, it will just fall through and return false 4086 return getType(ch) == UCharacterCategory.TITLECASE_LETTER; 4087 } 4088 4089 /** 4090 * Determines if the specified code point may be any part of a Unicode 4091 * identifier other than the starting character. 4092 * A code point may be part of a Unicode identifier if and only if it is 4093 * one of the following: 4094 * <ul> 4095 * <li> Lu Uppercase letter 4096 * <li> Ll Lowercase letter 4097 * <li> Lt Titlecase letter 4098 * <li> Lm Modifier letter 4099 * <li> Lo Other letter 4100 * <li> Nl Letter number 4101 * <li> Pc Connecting punctuation character 4102 * <li> Nd decimal number 4103 * <li> Mc Spacing combining mark 4104 * <li> Mn Non-spacing mark 4105 * <li> Cf formatting code 4106 * </ul> 4107 * Up-to-date Unicode implementation of 4108 * java.lang.Character.isUnicodeIdentifierPart().<br> 4109 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 4110 * @param ch code point to determine if is can be part of a Unicode 4111 * identifier 4112 * @return true if code point is any character belonging a unicode 4113 * identifier suffix after the first character 4114 * @stable ICU 2.1 4115 */ 4116 public static boolean isUnicodeIdentifierPart(int ch) 4117 { 4118 // if props == 0, it will just fall through and return false 4119 // cat == format 4120 return ((1 << getType(ch)) 4121 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4122 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4123 | (1 << UCharacterCategory.TITLECASE_LETTER) 4124 | (1 << UCharacterCategory.MODIFIER_LETTER) 4125 | (1 << UCharacterCategory.OTHER_LETTER) 4126 | (1 << UCharacterCategory.LETTER_NUMBER) 4127 | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION) 4128 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) 4129 | (1 << UCharacterCategory.COMBINING_SPACING_MARK) 4130 | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0 4131 || isIdentifierIgnorable(ch); 4132 } 4133 4134 /** 4135 * Determines if the specified code point is permissible as the first 4136 * character in a Unicode identifier. 4137 * A code point may start a Unicode identifier if it is of type either 4138 * <ul> 4139 * <li> Lu Uppercase letter 4140 * <li> Ll Lowercase letter 4141 * <li> Lt Titlecase letter 4142 * <li> Lm Modifier letter 4143 * <li> Lo Other letter 4144 * <li> Nl Letter number 4145 * </ul> 4146 * Up-to-date Unicode implementation of 4147 * java.lang.Character.isUnicodeIdentifierStart().<br> 4148 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 4149 * @param ch code point to determine if it can start a Unicode identifier 4150 * @return true if code point is the first character belonging a unicode 4151 * identifier 4152 * @stable ICU 2.1 4153 */ 4154 public static boolean isUnicodeIdentifierStart(int ch) 4155 { 4156 /*int cat = getType(ch);*/ 4157 // if props == 0, it will just fall through and return false 4158 return ((1 << getType(ch)) 4159 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4160 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4161 | (1 << UCharacterCategory.TITLECASE_LETTER) 4162 | (1 << UCharacterCategory.MODIFIER_LETTER) 4163 | (1 << UCharacterCategory.OTHER_LETTER) 4164 | (1 << UCharacterCategory.LETTER_NUMBER))) != 0; 4165 } 4166 4167 /** 4168 * Determines if the specified code point should be regarded as an 4169 * ignorable character in a Java identifier. 4170 * A character is Java-identifier-ignorable if it has the general category 4171 * Cf Formatting Control, or it is a non-Java-whitespace ISO control: 4172 * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br> 4173 * Up-to-date Unicode implementation of 4174 * java.lang.Character.isIdentifierIgnorable().<br> 4175 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 4176 * <p>Note that Unicode just recommends to ignore Cf (format controls). 4177 * @param ch code point to be determined if it can be ignored in a Unicode 4178 * identifier. 4179 * @return true if the code point is ignorable 4180 * @stable ICU 2.1 4181 */ 4182 public static boolean isIdentifierIgnorable(int ch) 4183 { 4184 // see java.lang.Character.isIdentifierIgnorable() on range of 4185 // ignorable characters. 4186 if (ch <= 0x9f) { 4187 return isISOControl(ch) 4188 && !((ch >= 0x9 && ch <= 0xd) 4189 || (ch >= 0x1c && ch <= 0x1f)); 4190 } 4191 return getType(ch) == UCharacterCategory.FORMAT; 4192 } 4193 4194 /** 4195 * Determines if the specified code point is an uppercase character. 4196 * UnicodeData only contains case mappings for code point where they are 4197 * one-to-one mappings; it also omits information about context-sensitive 4198 * case mappings.<br> 4199 * For language specific case conversion behavior, use 4200 * toUpperCase(locale, str). <br> 4201 * For example, the case conversion for dot-less i and dotted I in Turkish, 4202 * or for final sigma in Greek. 4203 * For more information about Unicode case mapping please refer to the 4204 * <a href=http://www.unicode.org/unicode/reports/tr21/> 4205 * Technical report #21</a>.<br> 4206 * Up-to-date Unicode implementation of java.lang.Character.isUpperCase(). 4207 * @param ch code point to determine if it is in uppercase 4208 * @return true if the code point is an uppercase character 4209 * @stable ICU 2.1 4210 */ 4211 public static boolean isUpperCase(int ch) 4212 { 4213 // if props == 0, it will just fall through and return false 4214 return getType(ch) == UCharacterCategory.UPPERCASE_LETTER; 4215 } 4216 4217 /** 4218 * The given code point is mapped to its lowercase equivalent; if the code 4219 * point has no lowercase equivalent, the code point itself is returned. 4220 * Up-to-date Unicode implementation of java.lang.Character.toLowerCase() 4221 * 4222 * <p>This function only returns the simple, single-code point case mapping. 4223 * Full case mappings should be used whenever possible because they produce 4224 * better results by working on whole strings. 4225 * They take into account the string context and the language and can map 4226 * to a result string with a different length as appropriate. 4227 * Full case mappings are applied by the case mapping functions 4228 * that take String parameters rather than code points (int). 4229 * See also the User Guide chapter on C/POSIX migration: 4230 * http://www.icu-project.org/userguide/posix.html#case_mappings 4231 * 4232 * @param ch code point whose lowercase equivalent is to be retrieved 4233 * @return the lowercase equivalent code point 4234 * @stable ICU 2.1 4235 */ 4236 public static int toLowerCase(int ch) { 4237 return UCaseProps.INSTANCE.tolower(ch); 4238 } 4239 4240 /** 4241 * Converts argument code point and returns a String object representing 4242 * the code point's value in UTF-16 format. 4243 * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones. 4244 * 4245 * <p>Up-to-date Unicode implementation of java.lang.Character.toString(). 4246 * 4247 * @param ch code point 4248 * @return string representation of the code point, null if code point is not 4249 * defined in unicode 4250 * @stable ICU 2.1 4251 */ 4252 public static String toString(int ch) 4253 { 4254 if (ch < MIN_VALUE || ch > MAX_VALUE) { 4255 return null; 4256 } 4257 4258 if (ch < SUPPLEMENTARY_MIN_VALUE) { 4259 return String.valueOf((char)ch); 4260 } 4261 4262 return new String(Character.toChars(ch)); 4263 } 4264 4265 /** 4266 * Converts the code point argument to titlecase. 4267 * If no titlecase is available, the uppercase is returned. If no uppercase 4268 * is available, the code point itself is returned. 4269 * Up-to-date Unicode implementation of java.lang.Character.toTitleCase() 4270 * 4271 * <p>This function only returns the simple, single-code point case mapping. 4272 * Full case mappings should be used whenever possible because they produce 4273 * better results by working on whole strings. 4274 * They take into account the string context and the language and can map 4275 * to a result string with a different length as appropriate. 4276 * Full case mappings are applied by the case mapping functions 4277 * that take String parameters rather than code points (int). 4278 * See also the User Guide chapter on C/POSIX migration: 4279 * http://www.icu-project.org/userguide/posix.html#case_mappings 4280 * 4281 * @param ch code point whose title case is to be retrieved 4282 * @return titlecase code point 4283 * @stable ICU 2.1 4284 */ 4285 public static int toTitleCase(int ch) { 4286 return UCaseProps.INSTANCE.totitle(ch); 4287 } 4288 4289 /** 4290 * Converts the character argument to uppercase. 4291 * If no uppercase is available, the character itself is returned. 4292 * Up-to-date Unicode implementation of java.lang.Character.toUpperCase() 4293 * 4294 * <p>This function only returns the simple, single-code point case mapping. 4295 * Full case mappings should be used whenever possible because they produce 4296 * better results by working on whole strings. 4297 * They take into account the string context and the language and can map 4298 * to a result string with a different length as appropriate. 4299 * Full case mappings are applied by the case mapping functions 4300 * that take String parameters rather than code points (int). 4301 * See also the User Guide chapter on C/POSIX migration: 4302 * http://www.icu-project.org/userguide/posix.html#case_mappings 4303 * 4304 * @param ch code point whose uppercase is to be retrieved 4305 * @return uppercase code point 4306 * @stable ICU 2.1 4307 */ 4308 public static int toUpperCase(int ch) { 4309 return UCaseProps.INSTANCE.toupper(ch); 4310 } 4311 4312 // extra methods not in java.lang.Character -------------------------- 4313 4314 /** 4315 * {@icu} Determines if the code point is a supplementary character. 4316 * A code point is a supplementary character if and only if it is greater 4317 * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a> 4318 * @param ch code point to be determined if it is in the supplementary 4319 * plane 4320 * @return true if code point is a supplementary character 4321 * @stable ICU 2.1 4322 */ 4323 public static boolean isSupplementary(int ch) 4324 { 4325 return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && 4326 ch <= UCharacter.MAX_VALUE; 4327 } 4328 4329 /** 4330 * {@icu} Determines if the code point is in the BMP plane. 4331 * @param ch code point to be determined if it is not a supplementary 4332 * character 4333 * @return true if code point is not a supplementary character 4334 * @stable ICU 2.1 4335 */ 4336 public static boolean isBMP(int ch) 4337 { 4338 return (ch >= 0 && ch <= LAST_CHAR_MASK_); 4339 } 4340 4341 /** 4342 * {@icu} Determines whether the specified code point is a printable character 4343 * according to the Unicode standard. 4344 * @param ch code point to be determined if it is printable 4345 * @return true if the code point is a printable character 4346 * @stable ICU 2.1 4347 */ 4348 public static boolean isPrintable(int ch) 4349 { 4350 int cat = getType(ch); 4351 // if props == 0, it will just fall through and return false 4352 return (cat != UCharacterCategory.UNASSIGNED && 4353 cat != UCharacterCategory.CONTROL && 4354 cat != UCharacterCategory.FORMAT && 4355 cat != UCharacterCategory.PRIVATE_USE && 4356 cat != UCharacterCategory.SURROGATE && 4357 cat != UCharacterCategory.GENERAL_OTHER_TYPES); 4358 } 4359 4360 /** 4361 * {@icu} Determines whether the specified code point is of base form. 4362 * A code point of base form does not graphically combine with preceding 4363 * characters, and is neither a control nor a format character. 4364 * @param ch code point to be determined if it is of base form 4365 * @return true if the code point is of base form 4366 * @stable ICU 2.1 4367 */ 4368 public static boolean isBaseForm(int ch) 4369 { 4370 int cat = getType(ch); 4371 // if props == 0, it will just fall through and return false 4372 return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER || 4373 cat == UCharacterCategory.OTHER_NUMBER || 4374 cat == UCharacterCategory.LETTER_NUMBER || 4375 cat == UCharacterCategory.UPPERCASE_LETTER || 4376 cat == UCharacterCategory.LOWERCASE_LETTER || 4377 cat == UCharacterCategory.TITLECASE_LETTER || 4378 cat == UCharacterCategory.MODIFIER_LETTER || 4379 cat == UCharacterCategory.OTHER_LETTER || 4380 cat == UCharacterCategory.NON_SPACING_MARK || 4381 cat == UCharacterCategory.ENCLOSING_MARK || 4382 cat == UCharacterCategory.COMBINING_SPACING_MARK; 4383 } 4384 4385 /** 4386 * {@icu} Returns the Bidirection property of a code point. 4387 * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional 4388 * property.<br> 4389 * Result returned belongs to the interface 4390 * <a href=UCharacterDirection.html>UCharacterDirection</a> 4391 * @param ch the code point to be determined its direction 4392 * @return direction constant from UCharacterDirection. 4393 * @stable ICU 2.1 4394 */ 4395 public static int getDirection(int ch) 4396 { 4397 return UBiDiProps.INSTANCE.getClass(ch); 4398 } 4399 4400 /** 4401 * Determines whether the code point has the "mirrored" property. 4402 * This property is set for characters that are commonly used in 4403 * Right-To-Left contexts and need to be displayed with a "mirrored" 4404 * glyph. 4405 * @param ch code point whose mirror is to be determined 4406 * @return true if the code point has the "mirrored" property 4407 * @stable ICU 2.1 4408 */ 4409 public static boolean isMirrored(int ch) 4410 { 4411 return UBiDiProps.INSTANCE.isMirrored(ch); 4412 } 4413 4414 /** 4415 * {@icu} Maps the specified code point to a "mirror-image" code point. 4416 * For code points with the "mirrored" property, implementations sometimes 4417 * need a "poor man's" mapping to another code point such that the default 4418 * glyph may serve as the mirror-image of the default glyph of the 4419 * specified code point.<br> 4420 * This is useful for text conversion to and from codepages with visual 4421 * order, and for displays without glyph selection capabilities. 4422 * @param ch code point whose mirror is to be retrieved 4423 * @return another code point that may serve as a mirror-image substitute, 4424 * or ch itself if there is no such mapping or ch does not have the 4425 * "mirrored" property 4426 * @stable ICU 2.1 4427 */ 4428 public static int getMirror(int ch) 4429 { 4430 return UBiDiProps.INSTANCE.getMirror(ch); 4431 } 4432 4433 /** 4434 * {@icu} Maps the specified character to its paired bracket character. 4435 * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int). 4436 * Otherwise c itself is returned. 4437 * See http://www.unicode.org/reports/tr9/ 4438 * 4439 * @param c the code point to be mapped 4440 * @return the paired bracket code point, 4441 * or c itself if there is no such mapping 4442 * (Bidi_Paired_Bracket_Type=None) 4443 * 4444 * @see UProperty#BIDI_PAIRED_BRACKET 4445 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 4446 * @see #getMirror(int) 4447 * @stable ICU 52 4448 */ 4449 public static int getBidiPairedBracket(int c) { 4450 return UBiDiProps.INSTANCE.getPairedBracket(c); 4451 } 4452 4453 /** 4454 * {@icu} Returns the combining class of the argument codepoint 4455 * @param ch code point whose combining is to be retrieved 4456 * @return the combining class of the codepoint 4457 * @stable ICU 2.1 4458 */ 4459 public static int getCombiningClass(int ch) 4460 { 4461 return Normalizer2.getNFDInstance().getCombiningClass(ch); 4462 } 4463 4464 /** 4465 * {@icu} A code point is illegal if and only if 4466 * <ul> 4467 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4468 * <li> A surrogate value, 0xD800 to 0xDFFF 4469 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4470 * </ul> 4471 * Note: legal does not mean that it is assigned in this version of Unicode. 4472 * @param ch code point to determine if it is a legal code point by itself 4473 * @return true if and only if legal. 4474 * @stable ICU 2.1 4475 */ 4476 public static boolean isLegal(int ch) 4477 { 4478 if (ch < MIN_VALUE) { 4479 return false; 4480 } 4481 if (ch < Character.MIN_SURROGATE) { 4482 return true; 4483 } 4484 if (ch <= Character.MAX_SURROGATE) { 4485 return false; 4486 } 4487 if (UCharacterUtility.isNonCharacter(ch)) { 4488 return false; 4489 } 4490 return (ch <= MAX_VALUE); 4491 } 4492 4493 /** 4494 * {@icu} A string is legal iff all its code points are legal. 4495 * A code point is illegal if and only if 4496 * <ul> 4497 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4498 * <li> A surrogate value, 0xD800 to 0xDFFF 4499 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4500 * </ul> 4501 * Note: legal does not mean that it is assigned in this version of Unicode. 4502 * @param str containing code points to examin 4503 * @return true if and only if legal. 4504 * @stable ICU 2.1 4505 */ 4506 public static boolean isLegal(String str) 4507 { 4508 int size = str.length(); 4509 int codepoint; 4510 for (int i = 0; i < size; i += Character.charCount(codepoint)) 4511 { 4512 codepoint = str.codePointAt(i); 4513 if (!isLegal(codepoint)) { 4514 return false; 4515 } 4516 } 4517 return true; 4518 } 4519 4520 /** 4521 * {@icu} Returns the version of Unicode data used. 4522 * @return the unicode version number used 4523 * @stable ICU 2.1 4524 */ 4525 public static VersionInfo getUnicodeVersion() 4526 { 4527 return UCharacterProperty.INSTANCE.m_unicodeVersion_; 4528 } 4529 4530 /** 4531 * {@icu} Returns the most current Unicode name of the argument code point, or 4532 * null if the character is unassigned or outside the range 4533 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4534 * <br> 4535 * Note calling any methods related to code point names, e.g. get*Name*() 4536 * incurs a one-time initialisation cost to construct the name tables. 4537 * @param ch the code point for which to get the name 4538 * @return most current Unicode name 4539 * @stable ICU 2.1 4540 */ 4541 public static String getName(int ch) 4542 { 4543 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 4544 } 4545 4546 /** 4547 * {@icu} Returns the names for each of the characters in a string 4548 * @param s string to format 4549 * @param separator string to go between names 4550 * @return string of names 4551 * @stable ICU 3.8 4552 */ 4553 public static String getName(String s, String separator) { 4554 if (s.length() == 1) { // handle common case 4555 return getName(s.charAt(0)); 4556 } 4557 int cp; 4558 StringBuilder sb = new StringBuilder(); 4559 for (int i = 0; i < s.length(); i += Character.charCount(cp)) { 4560 cp = s.codePointAt(i); 4561 if (i != 0) sb.append(separator); 4562 sb.append(UCharacter.getName(cp)); 4563 } 4564 return sb.toString(); 4565 } 4566 4567 /** 4568 * {@icu} Returns null. 4569 * Used to return the Unicode_1_Name property value which was of little practical value. 4570 * @param ch the code point for which to get the name 4571 * @return null 4572 * @deprecated ICU 49 4573 */ 4574 @Deprecated 4575 public static String getName1_0(int ch) 4576 { 4577 return null; 4578 } 4579 4580 /** 4581 * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and 4582 * getName1_0(int), this method will return a name even for codepoints that 4583 * are not assigned a name in UnicodeData.txt. 4584 * 4585 * <p>The names are returned in the following order. 4586 * <ul> 4587 * <li> Most current Unicode name if there is any 4588 * <li> Unicode 1.0 name if there is any 4589 * <li> Extended name in the form of 4590 * "<codepoint_type-codepoint_hex_digits>". E.g., <noncharacter-fffe> 4591 * </ul> 4592 * Note calling any methods related to code point names, e.g. get*Name*() 4593 * incurs a one-time initialisation cost to construct the name tables. 4594 * @param ch the code point for which to get the name 4595 * @return a name for the argument codepoint 4596 * @stable ICU 2.6 4597 */ 4598 public static String getExtendedName(int ch) { 4599 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); 4600 } 4601 4602 /** 4603 * {@icu} Returns the corrected name from NameAliases.txt if there is one. 4604 * Returns null if the character is unassigned or outside the range 4605 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4606 * <br> 4607 * Note calling any methods related to code point names, e.g. get*Name*() 4608 * incurs a one-time initialisation cost to construct the name tables. 4609 * @param ch the code point for which to get the name alias 4610 * @return Unicode name alias, or null 4611 * @stable ICU 4.4 4612 */ 4613 public static String getNameAlias(int ch) 4614 { 4615 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS); 4616 } 4617 4618 /** 4619 * {@icu} Returns null. 4620 * Used to return the ISO 10646 comment for a character. 4621 * The Unicode ISO_Comment property is deprecated and has no values. 4622 * 4623 * @param ch The code point for which to get the ISO comment. 4624 * It must be the case that {@code 0 <= ch <= 0x10ffff}. 4625 * @return null 4626 * @deprecated ICU 49 4627 */ 4628 @Deprecated 4629 public static String getISOComment(int ch) 4630 { 4631 return null; 4632 } 4633 4634 /** 4635 * {@icu} <p>Finds a Unicode code point by its most current Unicode name and 4636 * return its code point value. All Unicode names are in uppercase. 4637 * Note calling any methods related to code point names, e.g. get*Name*() 4638 * incurs a one-time initialisation cost to construct the name tables. 4639 * @param name most current Unicode character name whose code point is to 4640 * be returned 4641 * @return code point or -1 if name is not found 4642 * @stable ICU 2.1 4643 */ 4644 public static int getCharFromName(String name){ 4645 return UCharacterName.INSTANCE.getCharFromName( 4646 UCharacterNameChoice.UNICODE_CHAR_NAME, name); 4647 } 4648 4649 /** 4650 * {@icu} Returns -1. 4651 * <p>Used to find a Unicode character by its version 1.0 Unicode name and return 4652 * its code point value. 4653 * @param name Unicode 1.0 code point name whose code point is to be 4654 * returned 4655 * @return -1 4656 * @deprecated ICU 49 4657 * @see #getName1_0(int) 4658 */ 4659 @Deprecated 4660 public static int getCharFromName1_0(String name){ 4661 return -1; 4662 } 4663 4664 /** 4665 * {@icu} <p>Find a Unicode character by either its name and return its code 4666 * point value. All Unicode names are in uppercase. 4667 * Extended names are all lowercase except for numbers and are contained 4668 * within angle brackets. 4669 * The names are searched in the following order 4670 * <ul> 4671 * <li> Most current Unicode name if there is any 4672 * <li> Unicode 1.0 name if there is any 4673 * <li> Extended name in the form of 4674 * "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE> 4675 * </ul> 4676 * Note calling any methods related to code point names, e.g. get*Name*() 4677 * incurs a one-time initialisation cost to construct the name tables. 4678 * @param name codepoint name 4679 * @return code point associated with the name or -1 if the name is not 4680 * found. 4681 * @stable ICU 2.6 4682 */ 4683 public static int getCharFromExtendedName(String name){ 4684 return UCharacterName.INSTANCE.getCharFromName( 4685 UCharacterNameChoice.EXTENDED_CHAR_NAME, name); 4686 } 4687 4688 /** 4689 * {@icu} <p>Find a Unicode character by its corrected name alias and return 4690 * its code point value. All Unicode names are in uppercase. 4691 * Note calling any methods related to code point names, e.g. get*Name*() 4692 * incurs a one-time initialisation cost to construct the name tables. 4693 * @param name Unicode name alias whose code point is to be returned 4694 * @return code point or -1 if name is not found 4695 * @stable ICU 4.4 4696 */ 4697 public static int getCharFromNameAlias(String name){ 4698 return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name); 4699 } 4700 4701 /** 4702 * {@icu} Return the Unicode name for a given property, as given in the 4703 * Unicode database file PropertyAliases.txt. Most properties 4704 * have more than one name. The nameChoice determines which one 4705 * is returned. 4706 * 4707 * In addition, this function maps the property 4708 * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" / 4709 * "General_Category_Mask". These names are not in 4710 * PropertyAliases.txt. 4711 * 4712 * @param property UProperty selector. 4713 * 4714 * @param nameChoice UProperty.NameChoice selector for which name 4715 * to get. All properties have a long name. Most have a short 4716 * name, but some do not. Unicode allows for additional names; if 4717 * present these will be returned by UProperty.NameChoice.LONG + i, 4718 * where i=1, 2,... 4719 * 4720 * @return a name, or null if Unicode explicitly defines no name 4721 * ("n/a") for a given property/nameChoice. If a given nameChoice 4722 * throws an exception, then all larger values of nameChoice will 4723 * throw an exception. If null is returned for a given 4724 * nameChoice, then other nameChoice values may return non-null 4725 * results. 4726 * 4727 * @exception IllegalArgumentException thrown if property or 4728 * nameChoice are invalid. 4729 * 4730 * @see UProperty 4731 * @see UProperty.NameChoice 4732 * @stable ICU 2.4 4733 */ 4734 public static String getPropertyName(int property, 4735 int nameChoice) { 4736 return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice); 4737 } 4738 4739 /** 4740 * {@icu} Return the UProperty selector for a given property name, as 4741 * specified in the Unicode database file PropertyAliases.txt. 4742 * Short, long, and any other variants are recognized. 4743 * 4744 * In addition, this function maps the synthetic names "gcm" / 4745 * "General_Category_Mask" to the property 4746 * UProperty.GENERAL_CATEGORY_MASK. These names are not in 4747 * PropertyAliases.txt. 4748 * 4749 * @param propertyAlias the property name to be matched. The name 4750 * is compared using "loose matching" as described in 4751 * PropertyAliases.txt. 4752 * 4753 * @return a UProperty enum. 4754 * 4755 * @exception IllegalArgumentException thrown if propertyAlias 4756 * is not recognized. 4757 * 4758 * @see UProperty 4759 * @stable ICU 2.4 4760 */ 4761 public static int getPropertyEnum(CharSequence propertyAlias) { 4762 int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias); 4763 if (propEnum == UProperty.UNDEFINED) { 4764 throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias); 4765 } 4766 return propEnum; 4767 } 4768 4769 /** 4770 * {@icu} Return the Unicode name for a given property value, as given in 4771 * the Unicode database file PropertyValueAliases.txt. Most 4772 * values have more than one name. The nameChoice determines 4773 * which one is returned. 4774 * 4775 * Note: Some of the names in PropertyValueAliases.txt can only be 4776 * retrieved using UProperty.GENERAL_CATEGORY_MASK, not 4777 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4778 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4779 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4780 * 4781 * @param property UProperty selector constant. 4782 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4783 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4784 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4785 * If out of range, null is returned. 4786 * 4787 * @param value selector for a value for the given property. In 4788 * general, valid values range from 0 up to some maximum. There 4789 * are a few exceptions: (1.) UProperty.BLOCK values begin at the 4790 * non-zero value BASIC_LATIN.getID(). (2.) 4791 * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous 4792 * and range from 0..240. (3.) UProperty.GENERAL_CATEGORY_MASK values 4793 * are mask values produced by left-shifting 1 by 4794 * UCharacter.getType(). This allows grouped categories such as 4795 * [:L:] to be represented. Mask values are non-contiguous. 4796 * 4797 * @param nameChoice UProperty.NameChoice selector for which name 4798 * to get. All values have a long name. Most have a short name, 4799 * but some do not. Unicode allows for additional names; if 4800 * present these will be returned by UProperty.NameChoice.LONG + i, 4801 * where i=1, 2,... 4802 * 4803 * @return a name, or null if Unicode explicitly defines no name 4804 * ("n/a") for a given property/value/nameChoice. If a given 4805 * nameChoice throws an exception, then all larger values of 4806 * nameChoice will throw an exception. If null is returned for a 4807 * given nameChoice, then other nameChoice values may return 4808 * non-null results. 4809 * 4810 * @exception IllegalArgumentException thrown if property, value, 4811 * or nameChoice are invalid. 4812 * 4813 * @see UProperty 4814 * @see UProperty.NameChoice 4815 * @stable ICU 2.4 4816 */ 4817 public static String getPropertyValueName(int property, 4818 int value, 4819 int nameChoice) 4820 { 4821 if ((property == UProperty.CANONICAL_COMBINING_CLASS 4822 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS 4823 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) 4824 && value >= UCharacter.getIntPropertyMinValue( 4825 UProperty.CANONICAL_COMBINING_CLASS) 4826 && value <= UCharacter.getIntPropertyMaxValue( 4827 UProperty.CANONICAL_COMBINING_CLASS) 4828 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) { 4829 // this is hard coded for the valid cc 4830 // because PropertyValueAliases.txt does not contain all of them 4831 try { 4832 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, 4833 nameChoice); 4834 } 4835 catch (IllegalArgumentException e) { 4836 return null; 4837 } 4838 } 4839 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice); 4840 } 4841 4842 /** 4843 * {@icu} Return the property value integer for a given value name, as 4844 * specified in the Unicode database file PropertyValueAliases.txt. 4845 * Short, long, and any other variants are recognized. 4846 * 4847 * Note: Some of the names in PropertyValueAliases.txt will only be 4848 * recognized with UProperty.GENERAL_CATEGORY_MASK, not 4849 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4850 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4851 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4852 * 4853 * @param property UProperty selector constant. 4854 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4855 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4856 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4857 * Only these properties can be enumerated. 4858 * 4859 * @param valueAlias the value name to be matched. The name is 4860 * compared using "loose matching" as described in 4861 * PropertyValueAliases.txt. 4862 * 4863 * @return a value integer. Note: UProperty.GENERAL_CATEGORY 4864 * values are mask values produced by left-shifting 1 by 4865 * UCharacter.getType(). This allows grouped categories such as 4866 * [:L:] to be represented. 4867 * 4868 * @see UProperty 4869 * @throws IllegalArgumentException if property is not a valid UProperty 4870 * selector or valueAlias is not a value of this property 4871 * @stable ICU 2.4 4872 */ 4873 public static int getPropertyValueEnum(int property, CharSequence valueAlias) { 4874 int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias); 4875 if (propEnum == UProperty.UNDEFINED) { 4876 throw new IllegalIcuArgumentException("Invalid name: " + valueAlias); 4877 } 4878 return propEnum; 4879 } 4880 4881 /** 4882 * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED. 4883 * @param property Same as {@link #getPropertyValueEnum(int, CharSequence)} 4884 * @param valueAlias Same as {@link #getPropertyValueEnum(int, CharSequence)} 4885 * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value. 4886 * @internal 4887 * @deprecated This API is ICU internal only. 4888 */ 4889 @Deprecated 4890 public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) { 4891 return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias); 4892 } 4893 4894 4895 /** 4896 * {@icu} Returns a code point corresponding to the two surrogate code units. 4897 * 4898 * @param lead the lead char 4899 * @param trail the trail char 4900 * @return code point if surrogate characters are valid. 4901 * @exception IllegalArgumentException thrown when the code units do 4902 * not form a valid code point 4903 * @stable ICU 2.1 4904 */ 4905 public static int getCodePoint(char lead, char trail) 4906 { 4907 if (Character.isSurrogatePair(lead, trail)) { 4908 return Character.toCodePoint(lead, trail); 4909 } 4910 throw new IllegalArgumentException("Illegal surrogate characters"); 4911 } 4912 4913 /** 4914 * {@icu} Returns the code point corresponding to the BMP code point. 4915 * 4916 * @param char16 the BMP code point 4917 * @return code point if argument is a valid character. 4918 * @exception IllegalArgumentException thrown when char16 is not a valid 4919 * code point 4920 * @stable ICU 2.1 4921 */ 4922 public static int getCodePoint(char char16) 4923 { 4924 if (UCharacter.isLegal(char16)) { 4925 return char16; 4926 } 4927 throw new IllegalArgumentException("Illegal codepoint"); 4928 } 4929 4930 /** 4931 * Returns the uppercase version of the argument string. 4932 * Casing is dependent on the default locale and context-sensitive. 4933 * @param str source string to be performed on 4934 * @return uppercase version of the argument string 4935 * @stable ICU 2.1 4936 */ 4937 public static String toUpperCase(String str) 4938 { 4939 return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str); 4940 } 4941 4942 /** 4943 * Returns the lowercase version of the argument string. 4944 * Casing is dependent on the default locale and context-sensitive 4945 * @param str source string to be performed on 4946 * @return lowercase version of the argument string 4947 * @stable ICU 2.1 4948 */ 4949 public static String toLowerCase(String str) 4950 { 4951 return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str); 4952 } 4953 4954 /** 4955 * <p>Returns the titlecase version of the argument string. 4956 * <p>Position for titlecasing is determined by the argument break 4957 * iterator, hence the user can customize his break iterator for 4958 * a specialized titlecasing. In this case only the forward iteration 4959 * needs to be implemented. 4960 * If the break iterator passed in is null, the default Unicode algorithm 4961 * will be used to determine the titlecase positions. 4962 * 4963 * <p>Only positions returned by the break iterator will be title cased, 4964 * character in between the positions will all be in lower case. 4965 * <p>Casing is dependent on the default locale and context-sensitive 4966 * @param str source string to be performed on 4967 * @param breakiter break iterator to determine the positions in which 4968 * the character should be title cased. 4969 * @return titlecase version of the argument string 4970 * @stable ICU 2.6 4971 */ 4972 public static String toTitleCase(String str, BreakIterator breakiter) 4973 { 4974 return toTitleCase(Locale.getDefault(), str, breakiter, 0); 4975 } 4976 4977 private static int getDefaultCaseLocale() { 4978 return UCaseProps.getCaseLocale(Locale.getDefault()); 4979 } 4980 4981 private static int getCaseLocale(Locale locale) { 4982 if (locale == null) { 4983 locale = Locale.getDefault(); 4984 } 4985 return UCaseProps.getCaseLocale(locale); 4986 } 4987 4988 private static int getCaseLocale(ULocale locale) { 4989 if (locale == null) { 4990 locale = ULocale.getDefault(); 4991 } 4992 return UCaseProps.getCaseLocale(locale); 4993 } 4994 4995 /** 4996 * Returns the uppercase version of the argument string. 4997 * Casing is dependent on the argument locale and context-sensitive. 4998 * @param locale which string is to be converted in 4999 * @param str source string to be performed on 5000 * @return uppercase version of the argument string 5001 * @stable ICU 2.1 5002 */ 5003 public static String toUpperCase(Locale locale, String str) 5004 { 5005 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 5006 } 5007 5008 /** 5009 * Returns the uppercase version of the argument string. 5010 * Casing is dependent on the argument locale and context-sensitive. 5011 * @param locale which string is to be converted in 5012 * @param str source string to be performed on 5013 * @return uppercase version of the argument string 5014 * @stable ICU 3.2 5015 */ 5016 public static String toUpperCase(ULocale locale, String str) { 5017 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 5018 } 5019 5020 /** 5021 * Returns the lowercase version of the argument string. 5022 * Casing is dependent on the argument locale and context-sensitive 5023 * @param locale which string is to be converted in 5024 * @param str source string to be performed on 5025 * @return lowercase version of the argument string 5026 * @stable ICU 2.1 5027 */ 5028 public static String toLowerCase(Locale locale, String str) 5029 { 5030 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 5031 } 5032 5033 /** 5034 * Returns the lowercase version of the argument string. 5035 * Casing is dependent on the argument locale and context-sensitive 5036 * @param locale which string is to be converted in 5037 * @param str source string to be performed on 5038 * @return lowercase version of the argument string 5039 * @stable ICU 3.2 5040 */ 5041 public static String toLowerCase(ULocale locale, String str) { 5042 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 5043 } 5044 5045 /** 5046 * <p>Returns the titlecase version of the argument string. 5047 * <p>Position for titlecasing is determined by the argument break 5048 * iterator, hence the user can customize his break iterator for 5049 * a specialized titlecasing. In this case only the forward iteration 5050 * needs to be implemented. 5051 * If the break iterator passed in is null, the default Unicode algorithm 5052 * will be used to determine the titlecase positions. 5053 * 5054 * <p>Only positions returned by the break iterator will be title cased, 5055 * character in between the positions will all be in lower case. 5056 * <p>Casing is dependent on the argument locale and context-sensitive 5057 * @param locale which string is to be converted in 5058 * @param str source string to be performed on 5059 * @param breakiter break iterator to determine the positions in which 5060 * the character should be title cased. 5061 * @return titlecase version of the argument string 5062 * @stable ICU 2.6 5063 */ 5064 public static String toTitleCase(Locale locale, String str, 5065 BreakIterator breakiter) 5066 { 5067 return toTitleCase(locale, str, breakiter, 0); 5068 } 5069 5070 /** 5071 * <p>Returns the titlecase version of the argument string. 5072 * <p>Position for titlecasing is determined by the argument break 5073 * iterator, hence the user can customize his break iterator for 5074 * a specialized titlecasing. In this case only the forward iteration 5075 * needs to be implemented. 5076 * If the break iterator passed in is null, the default Unicode algorithm 5077 * will be used to determine the titlecase positions. 5078 * 5079 * <p>Only positions returned by the break iterator will be title cased, 5080 * character in between the positions will all be in lower case. 5081 * <p>Casing is dependent on the argument locale and context-sensitive 5082 * @param locale which string is to be converted in 5083 * @param str source string to be performed on 5084 * @param titleIter break iterator to determine the positions in which 5085 * the character should be title cased. 5086 * @return titlecase version of the argument string 5087 * @stable ICU 3.2 5088 */ 5089 public static String toTitleCase(ULocale locale, String str, 5090 BreakIterator titleIter) { 5091 return toTitleCase(locale, str, titleIter, 0); 5092 } 5093 5094 /** 5095 * <p>Returns the titlecase version of the argument string. 5096 * <p>Position for titlecasing is determined by the argument break 5097 * iterator, hence the user can customize his break iterator for 5098 * a specialized titlecasing. In this case only the forward iteration 5099 * needs to be implemented. 5100 * If the break iterator passed in is null, the default Unicode algorithm 5101 * will be used to determine the titlecase positions. 5102 * 5103 * <p>Only positions returned by the break iterator will be title cased, 5104 * character in between the positions will all be in lower case. 5105 * <p>Casing is dependent on the argument locale and context-sensitive 5106 * @param locale which string is to be converted in 5107 * @param str source string to be performed on 5108 * @param titleIter break iterator to determine the positions in which 5109 * the character should be title cased. 5110 * @param options bit set to modify the titlecasing operation 5111 * @return titlecase version of the argument string 5112 * @stable ICU 3.8 5113 * @see #TITLECASE_NO_LOWERCASE 5114 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5115 */ 5116 public static String toTitleCase(ULocale locale, String str, 5117 BreakIterator titleIter, int options) { 5118 if (titleIter == null && locale == null) { 5119 locale = ULocale.getDefault(); 5120 } 5121 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 5122 titleIter.setText(str); 5123 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 5124 } 5125 5126 /** 5127 * Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string, 5128 * and sometimes has no effect at all; the original string is returned whenever casing 5129 * would not be appropriate for the first word (such as for CJK characters or initial numbers). 5130 * Initial non-letters are skipped in order to find the character to change. 5131 * Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE. 5132 * <p>Examples: 5133 * <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr> 5134 * <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr> 5135 * <tr><td>“contact us”</td><td>“Contact us”</td></tr> 5136 * <tr><td>49ers win!</td><td>49ers win!</td></tr> 5137 * <tr><td>丰(abc)</td><td>丰(abc)</td></tr> 5138 * <tr><td>«ijs»</td><td>«Ijs»</td></tr> 5139 * <tr><td>«ijs»</td><td>«IJs»</td><td>nl-BE</td></tr> 5140 * <tr><td>«ijs»</td><td>«İjs»</td><td>tr-DE</td></tr> 5141 * </table> 5142 * @param locale the locale for accessing exceptional behavior (eg for tr). 5143 * @param str the source string to change 5144 * @return the modified string, or the original if no modifications were necessary. 5145 * @internal 5146 * @deprecated ICU internal only 5147 */ 5148 @Deprecated 5149 public static String toTitleFirst(ULocale locale, String str) { 5150 // TODO: Remove this function. Inline it where it is called in CLDR. 5151 return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(locale.toLocale(), null, str); 5152 } 5153 5154 private static final com.ibm.icu.text.CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE = 5155 com.ibm.icu.text.CaseMap.toTitle().wholeString().noLowercase(); 5156 5157 /** 5158 * {@icu} <p>Returns the titlecase version of the argument string. 5159 * <p>Position for titlecasing is determined by the argument break 5160 * iterator, hence the user can customize his break iterator for 5161 * a specialized titlecasing. In this case only the forward iteration 5162 * needs to be implemented. 5163 * If the break iterator passed in is null, the default Unicode algorithm 5164 * will be used to determine the titlecase positions. 5165 * 5166 * <p>Only positions returned by the break iterator will be title cased, 5167 * character in between the positions will all be in lower case. 5168 * <p>Casing is dependent on the argument locale and context-sensitive 5169 * @param locale which string is to be converted in 5170 * @param str source string to be performed on 5171 * @param titleIter break iterator to determine the positions in which 5172 * the character should be title cased. 5173 * @param options bit set to modify the titlecasing operation 5174 * @return titlecase version of the argument string 5175 * @see #TITLECASE_NO_LOWERCASE 5176 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5177 * @stable ICU 54 5178 */ 5179 public static String toTitleCase(Locale locale, String str, 5180 BreakIterator titleIter, 5181 int options) { 5182 if (titleIter == null && locale == null) { 5183 locale = Locale.getDefault(); 5184 } 5185 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 5186 titleIter.setText(str); 5187 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 5188 } 5189 5190 /** 5191 * {@icu} The given character is mapped to its case folding equivalent according 5192 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5193 * folding equivalent, the character itself is returned. 5194 * 5195 * <p>This function only returns the simple, single-code point case mapping. 5196 * Full case mappings should be used whenever possible because they produce 5197 * better results by working on whole strings. 5198 * They can map to a result string with a different length as appropriate. 5199 * Full case mappings are applied by the case mapping functions 5200 * that take String parameters rather than code points (int). 5201 * See also the User Guide chapter on C/POSIX migration: 5202 * http://www.icu-project.org/userguide/posix.html#case_mappings 5203 * 5204 * @param ch the character to be converted 5205 * @param defaultmapping Indicates whether the default mappings defined in 5206 * CaseFolding.txt are to be used, otherwise the 5207 * mappings for dotted I and dotless i marked with 5208 * 'T' in CaseFolding.txt are included. 5209 * @return the case folding equivalent of the character, if 5210 * any; otherwise the character itself. 5211 * @see #foldCase(String, boolean) 5212 * @stable ICU 2.1 5213 */ 5214 public static int foldCase(int ch, boolean defaultmapping) { 5215 return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5216 } 5217 5218 /** 5219 * {@icu} The given string is mapped to its case folding equivalent according to 5220 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5221 * folding equivalent, the character itself is returned. 5222 * "Full", multiple-code point case folding mappings are returned here. 5223 * For "simple" single-code point mappings use the API 5224 * foldCase(int ch, boolean defaultmapping). 5225 * @param str the String to be converted 5226 * @param defaultmapping Indicates whether the default mappings defined in 5227 * CaseFolding.txt are to be used, otherwise the 5228 * mappings for dotted I and dotless i marked with 5229 * 'T' in CaseFolding.txt are included. 5230 * @return the case folding equivalent of the character, if 5231 * any; otherwise the character itself. 5232 * @see #foldCase(int, boolean) 5233 * @stable ICU 2.1 5234 */ 5235 public static String foldCase(String str, boolean defaultmapping) { 5236 return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5237 } 5238 5239 /** 5240 * {@icu} Option value for case folding: use default mappings defined in 5241 * CaseFolding.txt. 5242 * @stable ICU 2.6 5243 */ 5244 public static final int FOLD_CASE_DEFAULT = 0x0000; 5245 /** 5246 * {@icu} Option value for case folding: 5247 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 5248 * and dotless i appropriately for Turkic languages (tr, az). 5249 * 5250 * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 5251 * are to be included for default mappings and 5252 * excluded for the Turkic-specific mappings. 5253 * 5254 * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 5255 * are to be excluded for default mappings and 5256 * included for the Turkic-specific mappings. 5257 * 5258 * @stable ICU 2.6 5259 */ 5260 public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001; 5261 5262 /** 5263 * {@icu} The given character is mapped to its case folding equivalent according 5264 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5265 * folding equivalent, the character itself is returned. 5266 * 5267 * <p>This function only returns the simple, single-code point case mapping. 5268 * Full case mappings should be used whenever possible because they produce 5269 * better results by working on whole strings. 5270 * They can map to a result string with a different length as appropriate. 5271 * Full case mappings are applied by the case mapping functions 5272 * that take String parameters rather than code points (int). 5273 * See also the User Guide chapter on C/POSIX migration: 5274 * http://www.icu-project.org/userguide/posix.html#case_mappings 5275 * 5276 * @param ch the character to be converted 5277 * @param options A bit set for special processing. Currently the recognised options 5278 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5279 * @return the case folding equivalent of the character, if any; otherwise the 5280 * character itself. 5281 * @see #foldCase(String, boolean) 5282 * @stable ICU 2.6 5283 */ 5284 public static int foldCase(int ch, int options) { 5285 return UCaseProps.INSTANCE.fold(ch, options); 5286 } 5287 5288 /** 5289 * {@icu} The given string is mapped to its case folding equivalent according to 5290 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5291 * folding equivalent, the character itself is returned. 5292 * "Full", multiple-code point case folding mappings are returned here. 5293 * For "simple" single-code point mappings use the API 5294 * foldCase(int ch, boolean defaultmapping). 5295 * @param str the String to be converted 5296 * @param options A bit set for special processing. Currently the recognised options 5297 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5298 * @return the case folding equivalent of the character, if any; otherwise the 5299 * character itself. 5300 * @see #foldCase(int, boolean) 5301 * @stable ICU 2.6 5302 */ 5303 public static final String foldCase(String str, int options) { 5304 return CaseMapImpl.fold(options, str); 5305 } 5306 5307 /** 5308 * {@icu} Returns the numeric value of a Han character. 5309 * 5310 * <p>This returns the value of Han 'numeric' code points, 5311 * including those for zero, ten, hundred, thousand, ten thousand, 5312 * and hundred million. 5313 * This includes both the standard and 'checkwriting' 5314 * characters, the 'big circle' zero character, and the standard 5315 * zero character. 5316 * 5317 * <p>Note: The Unicode Standard has numeric values for more 5318 * Han characters recognized by this method 5319 * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt), 5320 * and a {@link com.ibm.icu.text.NumberFormat} can be used with 5321 * a Chinese {@link com.ibm.icu.text.NumberingSystem}. 5322 * 5323 * @param ch code point to query 5324 * @return value if it is a Han 'numeric character,' otherwise return -1. 5325 * @stable ICU 2.4 5326 */ 5327 public static int getHanNumericValue(int ch) 5328 { 5329 switch(ch) 5330 { 5331 case IDEOGRAPHIC_NUMBER_ZERO_ : 5332 case CJK_IDEOGRAPH_COMPLEX_ZERO_ : 5333 return 0; // Han Zero 5334 case CJK_IDEOGRAPH_FIRST_ : 5335 case CJK_IDEOGRAPH_COMPLEX_ONE_ : 5336 return 1; // Han One 5337 case CJK_IDEOGRAPH_SECOND_ : 5338 case CJK_IDEOGRAPH_COMPLEX_TWO_ : 5339 return 2; // Han Two 5340 case CJK_IDEOGRAPH_THIRD_ : 5341 case CJK_IDEOGRAPH_COMPLEX_THREE_ : 5342 return 3; // Han Three 5343 case CJK_IDEOGRAPH_FOURTH_ : 5344 case CJK_IDEOGRAPH_COMPLEX_FOUR_ : 5345 return 4; // Han Four 5346 case CJK_IDEOGRAPH_FIFTH_ : 5347 case CJK_IDEOGRAPH_COMPLEX_FIVE_ : 5348 return 5; // Han Five 5349 case CJK_IDEOGRAPH_SIXTH_ : 5350 case CJK_IDEOGRAPH_COMPLEX_SIX_ : 5351 return 6; // Han Six 5352 case CJK_IDEOGRAPH_SEVENTH_ : 5353 case CJK_IDEOGRAPH_COMPLEX_SEVEN_ : 5354 return 7; // Han Seven 5355 case CJK_IDEOGRAPH_EIGHTH_ : 5356 case CJK_IDEOGRAPH_COMPLEX_EIGHT_ : 5357 return 8; // Han Eight 5358 case CJK_IDEOGRAPH_NINETH_ : 5359 case CJK_IDEOGRAPH_COMPLEX_NINE_ : 5360 return 9; // Han Nine 5361 case CJK_IDEOGRAPH_TEN_ : 5362 case CJK_IDEOGRAPH_COMPLEX_TEN_ : 5363 return 10; 5364 case CJK_IDEOGRAPH_HUNDRED_ : 5365 case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ : 5366 return 100; 5367 case CJK_IDEOGRAPH_THOUSAND_ : 5368 case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ : 5369 return 1000; 5370 case CJK_IDEOGRAPH_TEN_THOUSAND_ : 5371 return 10000; 5372 case CJK_IDEOGRAPH_HUNDRED_MILLION_ : 5373 return 100000000; 5374 } 5375 return -1; // no value 5376 } 5377 5378 /** 5379 * {@icu} <p>Returns an iterator for character types, iterating over codepoints. 5380 * <p>Example of use:<br> 5381 * <pre> 5382 * RangeValueIterator iterator = UCharacter.getTypeIterator(); 5383 * RangeValueIterator.Element element = new RangeValueIterator.Element(); 5384 * while (iterator.next(element)) { 5385 * System.out.println("Codepoint \\u" + 5386 * Integer.toHexString(element.start) + 5387 * " to codepoint \\u" + 5388 * Integer.toHexString(element.limit - 1) + 5389 * " has the character type " + 5390 * element.value); 5391 * } 5392 * </pre> 5393 * @return an iterator 5394 * @stable ICU 2.6 5395 */ 5396 public static RangeValueIterator getTypeIterator() 5397 { 5398 return new UCharacterTypeIterator(); 5399 } 5400 5401 private static final class UCharacterTypeIterator implements RangeValueIterator { 5402 UCharacterTypeIterator() { 5403 reset(); 5404 } 5405 5406 // implements RangeValueIterator 5407 @Override 5408 public boolean next(Element element) { 5409 if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5410 element.start=range.startCodePoint; 5411 element.limit=range.endCodePoint+1; 5412 element.value=range.value; 5413 return true; 5414 } else { 5415 return false; 5416 } 5417 } 5418 5419 // implements RangeValueIterator 5420 @Override 5421 public void reset() { 5422 trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE); 5423 } 5424 5425 private Iterator<Trie2.Range> trieIterator; 5426 private Trie2.Range range; 5427 5428 private static final class MaskType implements Trie2.ValueMapper { 5429 // Extracts the general category ("character type") from the trie value. 5430 @Override 5431 public int map(int value) { 5432 return value & UCharacterProperty.TYPE_MASK; 5433 } 5434 } 5435 private static final MaskType MASK_TYPE=new MaskType(); 5436 } 5437 5438 /** 5439 * {@icu} <p>Returns an iterator for character names, iterating over codepoints. 5440 * <p>This API only gets the iterator for the modern, most up-to-date 5441 * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or 5442 * for extended names use getExtendedNameIterator(). 5443 * <p>Example of use:<br> 5444 * <pre> 5445 * ValueIterator iterator = UCharacter.getNameIterator(); 5446 * ValueIterator.Element element = new ValueIterator.Element(); 5447 * while (iterator.next(element)) { 5448 * System.out.println("Codepoint \\u" + 5449 * Integer.toHexString(element.codepoint) + 5450 * " has the name " + (String)element.value); 5451 * } 5452 * </pre> 5453 * <p>The maximal range which the name iterator iterates is from 5454 * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE. 5455 * @return an iterator 5456 * @stable ICU 2.6 5457 */ 5458 public static ValueIterator getNameIterator(){ 5459 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5460 UCharacterNameChoice.UNICODE_CHAR_NAME); 5461 } 5462 5463 /** 5464 * {@icu} Returns an empty iterator. 5465 * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints. 5466 * @return an empty iterator 5467 * @deprecated ICU 49 5468 * @see #getName1_0(int) 5469 */ 5470 @Deprecated 5471 public static ValueIterator getName1_0Iterator(){ 5472 return new DummyValueIterator(); 5473 } 5474 5475 private static final class DummyValueIterator implements ValueIterator { 5476 @Override 5477 public boolean next(Element element) { return false; } 5478 @Override 5479 public void reset() {} 5480 @Override 5481 public void setRange(int start, int limit) {} 5482 } 5483 5484 /** 5485 * {@icu} <p>Returns an iterator for character names, iterating over codepoints. 5486 * <p>This API only gets the iterator for the extended names. 5487 * For modern, most up-to-date Unicode names use getNameIterator() or 5488 * for older 1.0 Unicode names use get1_0NameIterator(). 5489 * <p>Example of use:<br> 5490 * <pre> 5491 * ValueIterator iterator = UCharacter.getExtendedNameIterator(); 5492 * ValueIterator.Element element = new ValueIterator.Element(); 5493 * while (iterator.next(element)) { 5494 * System.out.println("Codepoint \\u" + 5495 * Integer.toHexString(element.codepoint) + 5496 * " has the name " + (String)element.value); 5497 * } 5498 * </pre> 5499 * <p>The maximal range which the name iterator iterates is from 5500 * @return an iterator 5501 * @stable ICU 2.6 5502 */ 5503 public static ValueIterator getExtendedNameIterator(){ 5504 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5505 UCharacterNameChoice.EXTENDED_CHAR_NAME); 5506 } 5507 5508 /** 5509 * {@icu} Returns the "age" of the code point. 5510 * <p>The "age" is the Unicode version when the code point was first 5511 * designated (as a non-character or for Private Use) or assigned a 5512 * character. 5513 * <p>This can be useful to avoid emitting code points to receiving 5514 * processes that do not accept newer characters. 5515 * <p>The data is from the UCD file DerivedAge.txt. 5516 * @param ch The code point. 5517 * @return the Unicode version number 5518 * @stable ICU 2.6 5519 */ 5520 public static VersionInfo getAge(int ch) 5521 { 5522 if (ch < MIN_VALUE || ch > MAX_VALUE) { 5523 throw new IllegalArgumentException("Codepoint out of bounds"); 5524 } 5525 return UCharacterProperty.INSTANCE.getAge(ch); 5526 } 5527 5528 /** 5529 * {@icu} <p>Check a binary Unicode property for a code point. 5530 * <p>Unicode, especially in version 3.2, defines many more properties 5531 * than the original set in UnicodeData.txt. 5532 * <p>This API is intended to reflect Unicode properties as defined in 5533 * the Unicode Character Database (UCD) and Unicode Technical Reports 5534 * (UTR). 5535 * <p>For details about the properties see 5536 * <a href=http://www.unicode.org/>http://www.unicode.org/</a>. 5537 * <p>For names of Unicode properties see the UCD file 5538 * PropertyAliases.txt. 5539 * <p>This API does not check the validity of the codepoint. 5540 * <p>Important: If ICU is built with UCD files from Unicode versions 5541 * below 3.2, then properties marked with "new" are not or 5542 * not fully available. 5543 * @param ch code point to test. 5544 * @param property selector constant from com.ibm.icu.lang.UProperty, 5545 * identifies which binary property to check. 5546 * @return true or false according to the binary Unicode property value 5547 * for ch. Also false if property is out of bounds or if the 5548 * Unicode version does not have data for the property at all, or 5549 * not for this code point. 5550 * @see com.ibm.icu.lang.UProperty 5551 * @stable ICU 2.6 5552 */ 5553 public static boolean hasBinaryProperty(int ch, int property) 5554 { 5555 return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property); 5556 } 5557 5558 /** 5559 * {@icu} <p>Check if a code point has the Alphabetic Unicode property. 5560 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC). 5561 * <p>Different from UCharacter.isLetter(ch)! 5562 * @stable ICU 2.6 5563 * @param ch codepoint to be tested 5564 */ 5565 public static boolean isUAlphabetic(int ch) 5566 { 5567 return hasBinaryProperty(ch, UProperty.ALPHABETIC); 5568 } 5569 5570 /** 5571 * {@icu} <p>Check if a code point has the Lowercase Unicode property. 5572 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE). 5573 * <p>This is different from UCharacter.isLowerCase(ch)! 5574 * @param ch codepoint to be tested 5575 * @stable ICU 2.6 5576 */ 5577 public static boolean isULowercase(int ch) 5578 { 5579 return hasBinaryProperty(ch, UProperty.LOWERCASE); 5580 } 5581 5582 /** 5583 * {@icu} <p>Check if a code point has the Uppercase Unicode property. 5584 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE). 5585 * <p>This is different from UCharacter.isUpperCase(ch)! 5586 * @param ch codepoint to be tested 5587 * @stable ICU 2.6 5588 */ 5589 public static boolean isUUppercase(int ch) 5590 { 5591 return hasBinaryProperty(ch, UProperty.UPPERCASE); 5592 } 5593 5594 /** 5595 * {@icu} <p>Check if a code point has the White_Space Unicode property. 5596 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE). 5597 * <p>This is different from both UCharacter.isSpace(ch) and 5598 * UCharacter.isWhitespace(ch)! 5599 * @param ch codepoint to be tested 5600 * @stable ICU 2.6 5601 */ 5602 public static boolean isUWhiteSpace(int ch) 5603 { 5604 return hasBinaryProperty(ch, UProperty.WHITE_SPACE); 5605 } 5606 5607 /** 5608 * {@icu} <p>Returns the property value for an Unicode property type of a code point. 5609 * Also returns binary and mask property values. 5610 * <p>Unicode, especially in version 3.2, defines many more properties than 5611 * the original set in UnicodeData.txt. 5612 * <p>The properties APIs are intended to reflect Unicode properties as 5613 * defined in the Unicode Character Database (UCD) and Unicode Technical 5614 * Reports (UTR). For details about the properties see 5615 * http://www.unicode.org/. 5616 * <p>For names of Unicode properties see the UCD file PropertyAliases.txt. 5617 * 5618 * <pre> 5619 * Sample usage: 5620 * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH); 5621 * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC); 5622 * boolean b = (ideo == 1) ? true : false; 5623 * </pre> 5624 * @param ch code point to test. 5625 * @param type UProperty selector constant, identifies which binary 5626 * property to check. Must be 5627 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5628 * UProperty.INT_START <= type < UProperty.INT_LIMIT or 5629 * UProperty.MASK_START <= type < UProperty.MASK_LIMIT. 5630 * @return numeric value that is directly the property value or, 5631 * for enumerated properties, corresponds to the numeric value of 5632 * the enumerated constant of the respective property value 5633 * enumeration type (cast to enum type if necessary). 5634 * Returns 0 or 1 (for false / true) for binary Unicode properties. 5635 * Returns a bit-mask for mask properties. 5636 * Returns 0 if 'type' is out of bounds or if the Unicode version 5637 * does not have data for the property at all, or not for this code 5638 * point. 5639 * @see UProperty 5640 * @see #hasBinaryProperty 5641 * @see #getIntPropertyMinValue 5642 * @see #getIntPropertyMaxValue 5643 * @see #getUnicodeVersion 5644 * @stable ICU 2.4 5645 */ 5646 public static int getIntPropertyValue(int ch, int type) 5647 { 5648 return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type); 5649 } 5650 /** 5651 * {@icu} Returns a string version of the property value. 5652 * @param propertyEnum The property enum value. 5653 * @param codepoint The codepoint value. 5654 * @param nameChoice The choice of the name. 5655 * @return value as string 5656 * @internal 5657 * @deprecated This API is ICU internal only. 5658 */ 5659 @Deprecated 5660 ///CLOVER:OFF 5661 public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) { 5662 if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) || 5663 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) { 5664 return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), 5665 nameChoice); 5666 } 5667 if (propertyEnum == UProperty.NUMERIC_VALUE) { 5668 return String.valueOf(getUnicodeNumericValue(codepoint)); 5669 } 5670 // otherwise must be string property 5671 switch (propertyEnum) { 5672 case UProperty.AGE: return getAge(codepoint).toString(); 5673 case UProperty.ISO_COMMENT: return getISOComment(codepoint); 5674 case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint)); 5675 case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true)); 5676 case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5677 case UProperty.NAME: return getName(codepoint); 5678 case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true)); 5679 case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5680 case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5681 case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5682 case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5683 case UProperty.UNICODE_1_NAME: return getName1_0(codepoint); 5684 case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5685 } 5686 throw new IllegalArgumentException("Illegal Property Enum"); 5687 } 5688 ///CLOVER:ON 5689 5690 /** 5691 * {@icu} Returns the minimum value for an integer/binary Unicode property type. 5692 * Can be used together with UCharacter.getIntPropertyMaxValue(int) 5693 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 5694 * @param type UProperty selector constant, identifies which binary 5695 * property to check. Must be 5696 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5697 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5698 * @return Minimum value returned by UCharacter.getIntPropertyValue(int) 5699 * for a Unicode property. 0 if the property 5700 * selector 'type' is out of range. 5701 * @see UProperty 5702 * @see #hasBinaryProperty 5703 * @see #getUnicodeVersion 5704 * @see #getIntPropertyMaxValue 5705 * @see #getIntPropertyValue 5706 * @stable ICU 2.4 5707 */ 5708 public static int getIntPropertyMinValue(int type){ 5709 5710 return 0; // undefined; and: all other properties have a minimum value of 0 5711 } 5712 5713 5714 /** 5715 * {@icu} Returns the maximum value for an integer/binary Unicode property. 5716 * Can be used together with UCharacter.getIntPropertyMinValue(int) 5717 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 5718 * Examples for min/max values (for Unicode 3.2): 5719 * <ul> 5720 * <li> UProperty.BIDI_CLASS: 0/18 5721 * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL) 5722 * <li> UProperty.SCRIPT: 0/45 (UScript.COMMON/UScript.TAGBANWA) 5723 * <li> UProperty.IDEOGRAPHIC: 0/1 (false/true) 5724 * </ul> 5725 * For undefined UProperty constant values, min/max values will be 0/-1. 5726 * @param type UProperty selector constant, identifies which binary 5727 * property to check. Must be 5728 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5729 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5730 * @return Maximum value returned by u_getIntPropertyValue for a Unicode 5731 * property. <= 0 if the property selector 'type' is out of range. 5732 * @see UProperty 5733 * @see #hasBinaryProperty 5734 * @see #getUnicodeVersion 5735 * @see #getIntPropertyMaxValue 5736 * @see #getIntPropertyValue 5737 * @stable ICU 2.4 5738 */ 5739 public static int getIntPropertyMaxValue(int type) 5740 { 5741 return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type); 5742 } 5743 5744 /** 5745 * Provide the java.lang.Character forDigit API, for convenience. 5746 * @stable ICU 3.0 5747 */ 5748 public static char forDigit(int digit, int radix) { 5749 return java.lang.Character.forDigit(digit, radix); 5750 } 5751 5752 // JDK 1.5 API coverage 5753 5754 /** 5755 * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}. 5756 * 5757 * @stable ICU 3.0 5758 */ 5759 public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE; 5760 5761 /** 5762 * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}. 5763 * 5764 * @stable ICU 3.0 5765 */ 5766 public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE; 5767 5768 /** 5769 * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}. 5770 * 5771 * @stable ICU 3.0 5772 */ 5773 public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE; 5774 5775 /** 5776 * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}. 5777 * 5778 * @stable ICU 3.0 5779 */ 5780 public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE; 5781 5782 /** 5783 * Constant U+D800, same as {@link Character#MIN_SURROGATE}. 5784 * 5785 * @stable ICU 3.0 5786 */ 5787 public static final char MIN_SURROGATE = Character.MIN_SURROGATE; 5788 5789 /** 5790 * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}. 5791 * 5792 * @stable ICU 3.0 5793 */ 5794 public static final char MAX_SURROGATE = Character.MAX_SURROGATE; 5795 5796 /** 5797 * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 5798 * 5799 * @stable ICU 3.0 5800 */ 5801 public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT; 5802 5803 /** 5804 * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}. 5805 * 5806 * @stable ICU 3.0 5807 */ 5808 public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT; 5809 5810 /** 5811 * Constant U+0000, same as {@link Character#MIN_CODE_POINT}. 5812 * 5813 * @stable ICU 3.0 5814 */ 5815 public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT; 5816 5817 /** 5818 * Equivalent to {@link Character#isValidCodePoint}. 5819 * 5820 * @param cp the code point to check 5821 * @return true if cp is a valid code point 5822 * @stable ICU 3.0 5823 */ 5824 public static final boolean isValidCodePoint(int cp) { 5825 return cp >= 0 && cp <= MAX_CODE_POINT; 5826 } 5827 5828 /** 5829 * Same as {@link Character#isSupplementaryCodePoint}. 5830 * 5831 * @param cp the code point to check 5832 * @return true if cp is a supplementary code point 5833 * @stable ICU 3.0 5834 */ 5835 public static final boolean isSupplementaryCodePoint(int cp) { 5836 return Character.isSupplementaryCodePoint(cp); 5837 } 5838 5839 /** 5840 * Same as {@link Character#isHighSurrogate}. 5841 * 5842 * @param ch the char to check 5843 * @return true if ch is a high (lead) surrogate 5844 * @stable ICU 3.0 5845 */ 5846 public static boolean isHighSurrogate(char ch) { 5847 return Character.isHighSurrogate(ch); 5848 } 5849 5850 /** 5851 * Same as {@link Character#isLowSurrogate}. 5852 * 5853 * @param ch the char to check 5854 * @return true if ch is a low (trail) surrogate 5855 * @stable ICU 3.0 5856 */ 5857 public static boolean isLowSurrogate(char ch) { 5858 return Character.isLowSurrogate(ch); 5859 } 5860 5861 /** 5862 * Same as {@link Character#isSurrogatePair}. 5863 * 5864 * @param high the high (lead) char 5865 * @param low the low (trail) char 5866 * @return true if high, low form a surrogate pair 5867 * @stable ICU 3.0 5868 */ 5869 public static final boolean isSurrogatePair(char high, char low) { 5870 return Character.isSurrogatePair(high, low); 5871 } 5872 5873 /** 5874 * Same as {@link Character#charCount}. 5875 * Returns the number of chars needed to represent the code point (1 or 2). 5876 * This does not check the code point for validity. 5877 * 5878 * @param cp the code point to check 5879 * @return the number of chars needed to represent the code point 5880 * @stable ICU 3.0 5881 */ 5882 public static int charCount(int cp) { 5883 return Character.charCount(cp); 5884 } 5885 5886 /** 5887 * Same as {@link Character#toCodePoint}. 5888 * Returns the code point represented by the two surrogate code units. 5889 * This does not check the surrogate pair for validity. 5890 * 5891 * @param high the high (lead) surrogate 5892 * @param low the low (trail) surrogate 5893 * @return the code point formed by the surrogate pair 5894 * @stable ICU 3.0 5895 */ 5896 public static final int toCodePoint(char high, char low) { 5897 return Character.toCodePoint(high, low); 5898 } 5899 5900 /** 5901 * Same as {@link Character#codePointAt(CharSequence, int)}. 5902 * Returns the code point at index. 5903 * This examines only the characters at index and index+1. 5904 * 5905 * @param seq the characters to check 5906 * @param index the index of the first or only char forming the code point 5907 * @return the code point at the index 5908 * @stable ICU 3.0 5909 */ 5910 public static final int codePointAt(CharSequence seq, int index) { 5911 char c1 = seq.charAt(index++); 5912 if (isHighSurrogate(c1)) { 5913 if (index < seq.length()) { 5914 char c2 = seq.charAt(index); 5915 if (isLowSurrogate(c2)) { 5916 return toCodePoint(c1, c2); 5917 } 5918 } 5919 } 5920 return c1; 5921 } 5922 5923 /** 5924 * Same as {@link Character#codePointAt(char[], int)}. 5925 * Returns the code point at index. 5926 * This examines only the characters at index and index+1. 5927 * 5928 * @param text the characters to check 5929 * @param index the index of the first or only char forming the code point 5930 * @return the code point at the index 5931 * @stable ICU 3.0 5932 */ 5933 public static final int codePointAt(char[] text, int index) { 5934 char c1 = text[index++]; 5935 if (isHighSurrogate(c1)) { 5936 if (index < text.length) { 5937 char c2 = text[index]; 5938 if (isLowSurrogate(c2)) { 5939 return toCodePoint(c1, c2); 5940 } 5941 } 5942 } 5943 return c1; 5944 } 5945 5946 /** 5947 * Same as {@link Character#codePointAt(char[], int, int)}. 5948 * Returns the code point at index. 5949 * This examines only the characters at index and index+1. 5950 * 5951 * @param text the characters to check 5952 * @param index the index of the first or only char forming the code point 5953 * @param limit the limit of the valid text 5954 * @return the code point at the index 5955 * @stable ICU 3.0 5956 */ 5957 public static final int codePointAt(char[] text, int index, int limit) { 5958 if (index >= limit || limit > text.length) { 5959 throw new IndexOutOfBoundsException(); 5960 } 5961 char c1 = text[index++]; 5962 if (isHighSurrogate(c1)) { 5963 if (index < limit) { 5964 char c2 = text[index]; 5965 if (isLowSurrogate(c2)) { 5966 return toCodePoint(c1, c2); 5967 } 5968 } 5969 } 5970 return c1; 5971 } 5972 5973 /** 5974 * Same as {@link Character#codePointBefore(CharSequence, int)}. 5975 * Return the code point before index. 5976 * This examines only the characters at index-1 and index-2. 5977 * 5978 * @param seq the characters to check 5979 * @param index the index after the last or only char forming the code point 5980 * @return the code point before the index 5981 * @stable ICU 3.0 5982 */ 5983 public static final int codePointBefore(CharSequence seq, int index) { 5984 char c2 = seq.charAt(--index); 5985 if (isLowSurrogate(c2)) { 5986 if (index > 0) { 5987 char c1 = seq.charAt(--index); 5988 if (isHighSurrogate(c1)) { 5989 return toCodePoint(c1, c2); 5990 } 5991 } 5992 } 5993 return c2; 5994 } 5995 5996 /** 5997 * Same as {@link Character#codePointBefore(char[], int)}. 5998 * Returns the code point before index. 5999 * This examines only the characters at index-1 and index-2. 6000 * 6001 * @param text the characters to check 6002 * @param index the index after the last or only char forming the code point 6003 * @return the code point before the index 6004 * @stable ICU 3.0 6005 */ 6006 public static final int codePointBefore(char[] text, int index) { 6007 char c2 = text[--index]; 6008 if (isLowSurrogate(c2)) { 6009 if (index > 0) { 6010 char c1 = text[--index]; 6011 if (isHighSurrogate(c1)) { 6012 return toCodePoint(c1, c2); 6013 } 6014 } 6015 } 6016 return c2; 6017 } 6018 6019 /** 6020 * Same as {@link Character#codePointBefore(char[], int, int)}. 6021 * Return the code point before index. 6022 * This examines only the characters at index-1 and index-2. 6023 * 6024 * @param text the characters to check 6025 * @param index the index after the last or only char forming the code point 6026 * @param limit the start of the valid text 6027 * @return the code point before the index 6028 * @stable ICU 3.0 6029 */ 6030 public static final int codePointBefore(char[] text, int index, int limit) { 6031 if (index <= limit || limit < 0) { 6032 throw new IndexOutOfBoundsException(); 6033 } 6034 char c2 = text[--index]; 6035 if (isLowSurrogate(c2)) { 6036 if (index > limit) { 6037 char c1 = text[--index]; 6038 if (isHighSurrogate(c1)) { 6039 return toCodePoint(c1, c2); 6040 } 6041 } 6042 } 6043 return c2; 6044 } 6045 6046 /** 6047 * Same as {@link Character#toChars(int, char[], int)}. 6048 * Writes the chars representing the 6049 * code point into the destination at the given index. 6050 * 6051 * @param cp the code point to convert 6052 * @param dst the destination array into which to put the char(s) representing the code point 6053 * @param dstIndex the index at which to put the first (or only) char 6054 * @return the count of the number of chars written (1 or 2) 6055 * @throws IllegalArgumentException if cp is not a valid code point 6056 * @stable ICU 3.0 6057 */ 6058 public static final int toChars(int cp, char[] dst, int dstIndex) { 6059 return Character.toChars(cp, dst, dstIndex); 6060 } 6061 6062 /** 6063 * Same as {@link Character#toChars(int)}. 6064 * Returns a char array representing the code point. 6065 * 6066 * @param cp the code point to convert 6067 * @return an array containing the char(s) representing the code point 6068 * @throws IllegalArgumentException if cp is not a valid code point 6069 * @stable ICU 3.0 6070 */ 6071 public static final char[] toChars(int cp) { 6072 return Character.toChars(cp); 6073 } 6074 6075 /** 6076 * Equivalent to the {@link Character#getDirectionality(char)} method, for 6077 * convenience. Returns a byte representing the directionality of the 6078 * character. 6079 * 6080 * {@icunote} Unlike {@link Character#getDirectionality(char)}, this returns 6081 * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters. 6082 * 6083 * {@icunote} The return value must be tested using the constants defined in {@link 6084 * UCharacterDirection} and its interface {@link 6085 * UCharacterEnums.ECharacterDirection} since the values are different from the ones 6086 * defined by <code>java.lang.Character</code>. 6087 * @param cp the code point to check 6088 * @return the directionality of the code point 6089 * @see #getDirection 6090 * @stable ICU 3.0 6091 */ 6092 public static byte getDirectionality(int cp) 6093 { 6094 return (byte)getDirection(cp); 6095 } 6096 6097 /** 6098 * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)} 6099 * method, for convenience. Counts the number of code points in the range 6100 * of text. 6101 * @param text the characters to check 6102 * @param start the start of the range 6103 * @param limit the limit of the range 6104 * @return the number of code points in the range 6105 * @stable ICU 3.0 6106 */ 6107 public static int codePointCount(CharSequence text, int start, int limit) { 6108 if (start < 0 || limit < start || limit > text.length()) { 6109 throw new IndexOutOfBoundsException("start (" + start + 6110 ") or limit (" + limit + 6111 ") invalid or out of range 0, " + text.length()); 6112 } 6113 6114 int len = limit - start; 6115 while (limit > start) { 6116 char ch = text.charAt(--limit); 6117 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6118 ch = text.charAt(--limit); 6119 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6120 --len; 6121 break; 6122 } 6123 } 6124 } 6125 return len; 6126 } 6127 6128 /** 6129 * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for 6130 * convenience. Counts the number of code points in the range of text. 6131 * @param text the characters to check 6132 * @param start the start of the range 6133 * @param limit the limit of the range 6134 * @return the number of code points in the range 6135 * @stable ICU 3.0 6136 */ 6137 public static int codePointCount(char[] text, int start, int limit) { 6138 if (start < 0 || limit < start || limit > text.length) { 6139 throw new IndexOutOfBoundsException("start (" + start + 6140 ") or limit (" + limit + 6141 ") invalid or out of range 0, " + text.length); 6142 } 6143 6144 int len = limit - start; 6145 while (limit > start) { 6146 char ch = text[--limit]; 6147 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6148 ch = text[--limit]; 6149 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6150 --len; 6151 break; 6152 } 6153 } 6154 } 6155 return len; 6156 } 6157 6158 /** 6159 * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)} 6160 * method, for convenience. Adjusts the char index by a code point offset. 6161 * @param text the characters to check 6162 * @param index the index to adjust 6163 * @param codePointOffset the number of code points by which to offset the index 6164 * @return the adjusted index 6165 * @stable ICU 3.0 6166 */ 6167 public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) { 6168 if (index < 0 || index > text.length()) { 6169 throw new IndexOutOfBoundsException("index ( " + index + 6170 ") out of range 0, " + text.length()); 6171 } 6172 6173 if (codePointOffset < 0) { 6174 while (++codePointOffset <= 0) { 6175 char ch = text.charAt(--index); 6176 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) { 6177 ch = text.charAt(--index); 6178 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6179 if (++codePointOffset > 0) { 6180 return index+1; 6181 } 6182 } 6183 } 6184 } 6185 } else { 6186 int limit = text.length(); 6187 while (--codePointOffset >= 0) { 6188 char ch = text.charAt(index++); 6189 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6190 ch = text.charAt(index++); 6191 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6192 if (--codePointOffset < 0) { 6193 return index-1; 6194 } 6195 } 6196 } 6197 } 6198 } 6199 6200 return index; 6201 } 6202 6203 /** 6204 * Equivalent to the 6205 * {@link Character#offsetByCodePoints(char[], int, int, int, int)} 6206 * method, for convenience. Adjusts the char index by a code point offset. 6207 * @param text the characters to check 6208 * @param start the start of the range to check 6209 * @param count the length of the range to check 6210 * @param index the index to adjust 6211 * @param codePointOffset the number of code points by which to offset the index 6212 * @return the adjusted index 6213 * @stable ICU 3.0 6214 */ 6215 public static int offsetByCodePoints(char[] text, int start, int count, int index, 6216 int codePointOffset) { 6217 int limit = start + count; 6218 if (start < 0 || limit < start || limit > text.length || index < start || index > limit) { 6219 throw new IndexOutOfBoundsException("index ( " + index + 6220 ") out of range " + start + 6221 ", " + limit + 6222 " in array 0, " + text.length); 6223 } 6224 6225 if (codePointOffset < 0) { 6226 while (++codePointOffset <= 0) { 6227 char ch = text[--index]; 6228 if (index < start) { 6229 throw new IndexOutOfBoundsException("index ( " + index + 6230 ") < start (" + start + 6231 ")"); 6232 } 6233 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) { 6234 ch = text[--index]; 6235 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6236 if (++codePointOffset > 0) { 6237 return index+1; 6238 } 6239 } 6240 } 6241 } 6242 } else { 6243 while (--codePointOffset >= 0) { 6244 char ch = text[index++]; 6245 if (index > limit) { 6246 throw new IndexOutOfBoundsException("index ( " + index + 6247 ") > limit (" + limit + 6248 ")"); 6249 } 6250 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6251 ch = text[index++]; 6252 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6253 if (--codePointOffset < 0) { 6254 return index-1; 6255 } 6256 } 6257 } 6258 } 6259 } 6260 6261 return index; 6262 } 6263 6264 // private variables ------------------------------------------------- 6265 6266 /** 6267 * To get the last character out from a data type 6268 */ 6269 private static final int LAST_CHAR_MASK_ = 0xFFFF; 6270 6271 // /** 6272 // * To get the last byte out from a data type 6273 // */ 6274 // private static final int LAST_BYTE_MASK_ = 0xFF; 6275 // 6276 // /** 6277 // * Shift 16 bits 6278 // */ 6279 // private static final int SHIFT_16_ = 16; 6280 // 6281 // /** 6282 // * Shift 24 bits 6283 // */ 6284 // private static final int SHIFT_24_ = 24; 6285 // 6286 // /** 6287 // * Decimal radix 6288 // */ 6289 // private static final int DECIMAL_RADIX_ = 10; 6290 6291 /** 6292 * No break space code point 6293 */ 6294 private static final int NO_BREAK_SPACE_ = 0xA0; 6295 6296 /** 6297 * Figure space code point 6298 */ 6299 private static final int FIGURE_SPACE_ = 0x2007; 6300 6301 /** 6302 * Narrow no break space code point 6303 */ 6304 private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; 6305 6306 /** 6307 * Ideographic number zero code point 6308 */ 6309 private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; 6310 6311 /** 6312 * CJK Ideograph, First code point 6313 */ 6314 private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; 6315 6316 /** 6317 * CJK Ideograph, Second code point 6318 */ 6319 private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; 6320 6321 /** 6322 * CJK Ideograph, Third code point 6323 */ 6324 private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; 6325 6326 /** 6327 * CJK Ideograph, Fourth code point 6328 */ 6329 private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db; 6330 6331 /** 6332 * CJK Ideograph, FIFTH code point 6333 */ 6334 private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; 6335 6336 /** 6337 * CJK Ideograph, Sixth code point 6338 */ 6339 private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; 6340 6341 /** 6342 * CJK Ideograph, Seventh code point 6343 */ 6344 private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; 6345 6346 /** 6347 * CJK Ideograph, Eighth code point 6348 */ 6349 private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; 6350 6351 /** 6352 * CJK Ideograph, Nineth code point 6353 */ 6354 private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; 6355 6356 /** 6357 * Application Program command code point 6358 */ 6359 private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; 6360 6361 /** 6362 * Unit separator code point 6363 */ 6364 private static final int UNIT_SEPARATOR_ = 0x001F; 6365 6366 /** 6367 * Delete code point 6368 */ 6369 private static final int DELETE_ = 0x007F; 6370 6371 /** 6372 * Han digit characters 6373 */ 6374 private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6; 6375 private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9; 6376 private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3; 6377 private static final int CJK_IDEOGRAPH_COMPLEX_THREE_ = 0x53c3; 6378 private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_ = 0x8086; 6379 private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_ = 0x4f0d; 6380 private static final int CJK_IDEOGRAPH_COMPLEX_SIX_ = 0x9678; 6381 private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_ = 0x67d2; 6382 private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_ = 0x634c; 6383 private static final int CJK_IDEOGRAPH_COMPLEX_NINE_ = 0x7396; 6384 private static final int CJK_IDEOGRAPH_TEN_ = 0x5341; 6385 private static final int CJK_IDEOGRAPH_COMPLEX_TEN_ = 0x62fe; 6386 private static final int CJK_IDEOGRAPH_HUNDRED_ = 0x767e; 6387 private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_ = 0x4f70; 6388 private static final int CJK_IDEOGRAPH_THOUSAND_ = 0x5343; 6389 private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf; 6390 private static final int CJK_IDEOGRAPH_TEN_THOUSAND_ = 0x824c; 6391 private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_ = 0x5104; 6392 6393 // private constructor ----------------------------------------------- 6394 ///CLOVER:OFF 6395 /** 6396 * Private constructor to prevent instantiation 6397 */ 6398 private UCharacter() 6399 { 6400 } 6401 ///CLOVER:ON 6402} 6403