1/** 2 ******************************************************************************* 3 * Copyright (C) 2001-2015 International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7 8package com.ibm.icu.lang; 9 10import java.util.BitSet; 11import java.util.Locale; 12 13import com.ibm.icu.impl.UCharacterProperty; 14import com.ibm.icu.util.ULocale; 15 16/** 17 * Constants for ISO 15924 script codes, and related functions. 18 * 19 * <p>The current set of script code constants supports at least all scripts 20 * that are encoded in the version of Unicode which ICU currently supports. 21 * The names of the constants are usually derived from the 22 * Unicode script property value aliases. 23 * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) 24 * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt . 25 * 26 * <p>Starting with ICU 3.6, constants for most ISO 15924 script codes 27 * are included, for use with language tags, CLDR data, and similar. 28 * Some of those codes are not used in the Unicode Character Database (UCD). 29 * For example, there are no characters that have a UCD script property value of 30 * Hans or Hant. All Han ideographs have the Hani script property value in Unicode. 31 * 32 * <p>Private-use codes Qaaa..Qabx are not included. 33 * 34 * <p>Starting with ICU 55, script codes are only added when their scripts 35 * have been or will certainly be encoded in Unicode, 36 * and have been assigned Unicode script property value aliases, 37 * to ensure that their script names are stable and match the names of the constants. 38 * Script codes like Latf and Aran that are not subject to separate encoding 39 * may be added at any time. 40 * 41 * @stable ICU 2.4 42 */ 43public final class UScript { 44 /** 45 * Invalid code 46 * @stable ICU 2.4 47 */ 48 public static final int INVALID_CODE = -1; 49 /** 50 * Common 51 * @stable ICU 2.4 52 */ 53 public static final int COMMON = 0; /* Zyyy */ 54 /** 55 * Inherited 56 * @stable ICU 2.4 57 */ 58 public static final int INHERITED = 1; /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */ 59 /** 60 * Arabic 61 * @stable ICU 2.4 62 */ 63 public static final int ARABIC = 2; /* Arab */ 64 /** 65 * Armenian 66 * @stable ICU 2.4 67 */ 68 public static final int ARMENIAN = 3; /* Armn */ 69 /** 70 * Bengali 71 * @stable ICU 2.4 72 */ 73 public static final int BENGALI = 4; /* Beng */ 74 /** 75 * Bopomofo 76 * @stable ICU 2.4 77 */ 78 public static final int BOPOMOFO = 5; /* Bopo */ 79 /** 80 * Cherokee 81 * @stable ICU 2.4 82 */ 83 public static final int CHEROKEE = 6; /* Cher */ 84 /** 85 * Coptic 86 * @stable ICU 2.4 87 */ 88 public static final int COPTIC = 7; /* Qaac */ 89 /** 90 * Cyrillic 91 * @stable ICU 2.4 92 */ 93 public static final int CYRILLIC = 8; /* Cyrl (Cyrs) */ 94 /** 95 * Deseret 96 * @stable ICU 2.4 97 */ 98 public static final int DESERET = 9; /* Dsrt */ 99 /** 100 * Devanagari 101 * @stable ICU 2.4 102 */ 103 public static final int DEVANAGARI = 10; /* Deva */ 104 /** 105 * Ethiopic 106 * @stable ICU 2.4 107 */ 108 public static final int ETHIOPIC = 11; /* Ethi */ 109 /** 110 * Georgian 111 * @stable ICU 2.4 112 */ 113 public static final int GEORGIAN = 12; /* Geor (Geon; Geoa) */ 114 /** 115 * Gothic 116 * @stable ICU 2.4 117 */ 118 public static final int GOTHIC = 13; /* Goth */ 119 /** 120 * Greek 121 * @stable ICU 2.4 122 */ 123 public static final int GREEK = 14; /* Grek */ 124 /** 125 * Gujarati 126 * @stable ICU 2.4 127 */ 128 public static final int GUJARATI = 15; /* Gujr */ 129 /** 130 * Gurmukhi 131 * @stable ICU 2.4 132 */ 133 public static final int GURMUKHI = 16; /* Guru */ 134 /** 135 * Han 136 * @stable ICU 2.4 137 */ 138 public static final int HAN = 17; /* Hani */ 139 /** 140 * Hangul 141 * @stable ICU 2.4 142 */ 143 public static final int HANGUL = 18; /* Hang */ 144 /** 145 * Hebrew 146 * @stable ICU 2.4 147 */ 148 public static final int HEBREW = 19; /* Hebr */ 149 /** 150 * Hiragana 151 * @stable ICU 2.4 152 */ 153 public static final int HIRAGANA = 20; /* Hira */ 154 /** 155 * Kannada 156 * @stable ICU 2.4 157 */ 158 public static final int KANNADA = 21; /* Knda */ 159 /** 160 * Katakana 161 * @stable ICU 2.4 162 */ 163 public static final int KATAKANA = 22; /* Kana */ 164 /** 165 * Khmer 166 * @stable ICU 2.4 167 */ 168 public static final int KHMER = 23; /* Khmr */ 169 /** 170 * Lao 171 * @stable ICU 2.4 172 */ 173 public static final int LAO = 24; /* Laoo */ 174 /** 175 * Latin 176 * @stable ICU 2.4 177 */ 178 public static final int LATIN = 25; /* Latn (Latf; Latg) */ 179 /** 180 * Malayalam 181 * @stable ICU 2.4 182 */ 183 public static final int MALAYALAM = 26; /* Mlym */ 184 /** 185 * Mangolian 186 * @stable ICU 2.4 187 */ 188 public static final int MONGOLIAN = 27; /* Mong */ 189 /** 190 * Myammar 191 * @stable ICU 2.4 192 */ 193 public static final int MYANMAR = 28; /* Mymr */ 194 /** 195 * Ogham 196 * @stable ICU 2.4 197 */ 198 public static final int OGHAM = 29; /* Ogam */ 199 /** 200 * Old Itallic 201 * @stable ICU 2.4 202 */ 203 public static final int OLD_ITALIC = 30; /* Ital */ 204 /** 205 * Oriya 206 * @stable ICU 2.4 207 */ 208 public static final int ORIYA = 31; /* Orya */ 209 /** 210 * Runic 211 * @stable ICU 2.4 212 */ 213 public static final int RUNIC = 32; /* Runr */ 214 /** 215 * Sinhala 216 * @stable ICU 2.4 217 */ 218 public static final int SINHALA = 33; /* Sinh */ 219 /** 220 * Syriac 221 * @stable ICU 2.4 222 */ 223 public static final int SYRIAC = 34; /* Syrc (Syrj; Syrn; Syre) */ 224 /** 225 * Tamil 226 * @stable ICU 2.4 227 */ 228 public static final int TAMIL = 35; /* Taml */ 229 /** 230 * Telugu 231 * @stable ICU 2.4 232 */ 233 public static final int TELUGU = 36; /* Telu */ 234 /** 235 * Thana 236 * @stable ICU 2.4 237 */ 238 public static final int THAANA = 37; /* Thaa */ 239 /** 240 * Thai 241 * @stable ICU 2.4 242 */ 243 public static final int THAI = 38; /* Thai */ 244 /** 245 * Tibetan 246 * @stable ICU 2.4 247 */ 248 public static final int TIBETAN = 39; /* Tibt */ 249 /** 250 * Unified Canadian Aboriginal Symbols 251 * @stable ICU 2.6 252 */ 253 public static final int CANADIAN_ABORIGINAL = 40; /* Cans */ 254 /** 255 * Unified Canadian Aboriginal Symbols (alias) 256 * @stable ICU 2.4 257 */ 258 public static final int UCAS = CANADIAN_ABORIGINAL; /* Cans */ 259 /** 260 * Yi syllables 261 * @stable ICU 2.4 262 */ 263 public static final int YI = 41; /* Yiii */ 264 /** 265 * Tagalog 266 * @stable ICU 2.4 267 */ 268 public static final int TAGALOG = 42; /* Tglg */ 269 /** 270 * Hanunooo 271 * @stable ICU 2.4 272 */ 273 public static final int HANUNOO = 43; /* Hano */ 274 /** 275 * Buhid 276 * @stable ICU 2.4 277 */ 278 public static final int BUHID = 44; /* Buhd */ 279 /** 280 * Tagbanwa 281 * @stable ICU 2.4 282 */ 283 public static final int TAGBANWA = 45; /* Tagb */ 284 /** 285 * Braille 286 * Script in Unicode 4 287 * @stable ICU 2.6 288 * 289 */ 290 public static final int BRAILLE = 46; /* Brai */ 291 /** 292 * Cypriot 293 * Script in Unicode 4 294 * @stable ICU 2.6 295 * 296 */ 297 public static final int CYPRIOT = 47; /* Cprt */ 298 /** 299 * Limbu 300 * Script in Unicode 4 301 * @stable ICU 2.6 302 * 303 */ 304 public static final int LIMBU = 48; /* Limb */ 305 /** 306 * Linear B 307 * Script in Unicode 4 308 * @stable ICU 2.6 309 * 310 */ 311 public static final int LINEAR_B = 49; /* Linb */ 312 /** 313 * Osmanya 314 * Script in Unicode 4 315 * @stable ICU 2.6 316 * 317 */ 318 public static final int OSMANYA = 50; /* Osma */ 319 /** 320 * Shavian 321 * Script in Unicode 4 322 * @stable ICU 2.6 323 * 324 */ 325 public static final int SHAVIAN = 51; /* Shaw */ 326 /** 327 * Tai Le 328 * Script in Unicode 4 329 * @stable ICU 2.6 330 * 331 */ 332 public static final int TAI_LE = 52; /* Tale */ 333 /** 334 * Ugaritic 335 * Script in Unicode 4 336 * @stable ICU 2.6 337 * 338 */ 339 public static final int UGARITIC = 53; /* Ugar */ 340 /** 341 * Script in Unicode 4.0.1 342 * @stable ICU 3.0 343 */ 344 public static final int KATAKANA_OR_HIRAGANA = 54; /*Hrkt */ 345 346 /** 347 * Script in Unicode 4.1 348 * @stable ICU 3.4 349 */ 350 public static final int BUGINESE = 55; /* Bugi */ 351 /** 352 * Script in Unicode 4.1 353 * @stable ICU 3.4 354 */ 355 public static final int GLAGOLITIC = 56; /* Glag */ 356 /** 357 * Script in Unicode 4.1 358 * @stable ICU 3.4 359 */ 360 public static final int KHAROSHTHI = 57; /* Khar */ 361 /** 362 * Script in Unicode 4.1 363 * @stable ICU 3.4 364 */ 365 public static final int SYLOTI_NAGRI = 58; /* Sylo */ 366 /** 367 * Script in Unicode 4.1 368 * @stable ICU 3.4 369 */ 370 public static final int NEW_TAI_LUE = 59; /* Talu */ 371 /** 372 * Script in Unicode 4.1 373 * @stable ICU 3.4 374 */ 375 public static final int TIFINAGH = 60; /* Tfng */ 376 /** 377 * Script in Unicode 4.1 378 * @stable ICU 3.4 379 */ 380 public static final int OLD_PERSIAN = 61; /* Xpeo */ 381 382 383 /** 384 * ISO 15924 script code 385 * @stable ICU 3.6 386 */ 387 public static final int BALINESE = 62; /* Bali */ 388 /** 389 * ISO 15924 script code 390 * @stable ICU 3.6 391 */ 392 public static final int BATAK = 63; /* Batk */ 393 /** 394 * ISO 15924 script code 395 * @stable ICU 3.6 396 */ 397 public static final int BLISSYMBOLS = 64; /* Blis */ 398 /** 399 * ISO 15924 script code 400 * @stable ICU 3.6 401 */ 402 public static final int BRAHMI = 65; /* Brah */ 403 /** 404 * ISO 15924 script code 405 * @stable ICU 3.6 406 */ 407 public static final int CHAM = 66; /* Cham */ 408 /** 409 * ISO 15924 script code 410 * @stable ICU 3.6 411 */ 412 public static final int CIRTH = 67; /* Cirt */ 413 /** 414 * ISO 15924 script code 415 * @stable ICU 3.6 416 */ 417 public static final int OLD_CHURCH_SLAVONIC_CYRILLIC = 68; /* Cyrs */ 418 /** 419 * ISO 15924 script code 420 * @stable ICU 3.6 421 */ 422 public static final int DEMOTIC_EGYPTIAN = 69; /* Egyd */ 423 /** 424 * ISO 15924 script code 425 * @stable ICU 3.6 426 */ 427 public static final int HIERATIC_EGYPTIAN = 70; /* Egyh */ 428 /** 429 * ISO 15924 script code 430 * @stable ICU 3.6 431 */ 432 public static final int EGYPTIAN_HIEROGLYPHS = 71; /* Egyp */ 433 /** 434 * ISO 15924 script code 435 * @stable ICU 3.6 436 */ 437 public static final int KHUTSURI = 72; /* Geok */ 438 /** 439 * ISO 15924 script code 440 * @stable ICU 3.6 441 */ 442 public static final int SIMPLIFIED_HAN = 73; /* Hans */ 443 /** 444 * ISO 15924 script code 445 * @stable ICU 3.6 446 */ 447 public static final int TRADITIONAL_HAN = 74; /* Hant */ 448 /** 449 * ISO 15924 script code 450 * @stable ICU 3.6 451 */ 452 public static final int PAHAWH_HMONG = 75; /* Hmng */ 453 /** 454 * ISO 15924 script code 455 * @stable ICU 3.6 456 */ 457 public static final int OLD_HUNGARIAN = 76; /* Hung */ 458 /** 459 * ISO 15924 script code 460 * @stable ICU 3.6 461 */ 462 public static final int HARAPPAN_INDUS = 77; /* Inds */ 463 /** 464 * ISO 15924 script code 465 * @stable ICU 3.6 466 */ 467 public static final int JAVANESE = 78; /* Java */ 468 /** 469 * ISO 15924 script code 470 * @stable ICU 3.6 471 */ 472 public static final int KAYAH_LI = 79; /* Kali */ 473 /** 474 * ISO 15924 script code 475 * @stable ICU 3.6 476 */ 477 public static final int LATIN_FRAKTUR = 80; /* Latf */ 478 /** 479 * ISO 15924 script code 480 * @stable ICU 3.6 481 */ 482 public static final int LATIN_GAELIC = 81; /* Latg */ 483 /** 484 * ISO 15924 script code 485 * @stable ICU 3.6 486 */ 487 public static final int LEPCHA = 82; /* Lepc */ 488 /** 489 * ISO 15924 script code 490 * @stable ICU 3.6 491 */ 492 public static final int LINEAR_A = 83; /* Lina */ 493 /** 494 * ISO 15924 script code 495 * @stable ICU 4.6 496 */ 497 public static final int MANDAIC = 84; /* Mand */ 498 /** 499 * ISO 15924 script code 500 * @stable ICU 3.6 501 */ 502 public static final int MANDAEAN = MANDAIC; 503 /** 504 * ISO 15924 script code 505 * @stable ICU 3.6 506 */ 507 public static final int MAYAN_HIEROGLYPHS = 85; /* Maya */ 508 /** 509 * ISO 15924 script code 510 * @stable ICU 4.6 511 */ 512 public static final int MEROITIC_HIEROGLYPHS = 86; /* Mero */ 513 /** 514 * ISO 15924 script code 515 * @stable ICU 3.6 516 */ 517 public static final int MEROITIC = MEROITIC_HIEROGLYPHS; 518 /** 519 * ISO 15924 script code 520 * @stable ICU 3.6 521 */ 522 public static final int NKO = 87; /* Nkoo */ 523 /** 524 * ISO 15924 script code 525 * @stable ICU 3.6 526 */ 527 public static final int ORKHON = 88; /* Orkh */ 528 /** 529 * ISO 15924 script code 530 * @stable ICU 3.6 531 */ 532 public static final int OLD_PERMIC = 89; /* Perm */ 533 /** 534 * ISO 15924 script code 535 * @stable ICU 3.6 536 */ 537 public static final int PHAGS_PA = 90; /* Phag */ 538 /** 539 * ISO 15924 script code 540 * @stable ICU 3.6 541 */ 542 public static final int PHOENICIAN = 91; /* Phnx */ 543 /** 544 * ISO 15924 script code 545 * @stable ICU 52 546 */ 547 public static final int MIAO = 92; /* Plrd */ 548 /** 549 * ISO 15924 script code 550 * @stable ICU 3.6 551 */ 552 public static final int PHONETIC_POLLARD = MIAO; 553 /** 554 * ISO 15924 script code 555 * @stable ICU 3.6 556 */ 557 public static final int RONGORONGO = 93; /* Roro */ 558 /** 559 * ISO 15924 script code 560 * @stable ICU 3.6 561 */ 562 public static final int SARATI = 94; /* Sara */ 563 /** 564 * ISO 15924 script code 565 * @stable ICU 3.6 566 */ 567 public static final int ESTRANGELO_SYRIAC = 95; /* Syre */ 568 /** 569 * ISO 15924 script code 570 * @stable ICU 3.6 571 */ 572 public static final int WESTERN_SYRIAC = 96; /* Syrj */ 573 /** 574 * ISO 15924 script code 575 * @stable ICU 3.6 576 */ 577 public static final int EASTERN_SYRIAC = 97; /* Syrn */ 578 /** 579 * ISO 15924 script code 580 * @stable ICU 3.6 581 */ 582 public static final int TENGWAR = 98; /* Teng */ 583 /** 584 * ISO 15924 script code 585 * @stable ICU 3.6 586 */ 587 public static final int VAI = 99; /* Vaii */ 588 /** 589 * ISO 15924 script code 590 * @stable ICU 3.6 591 */ 592 public static final int VISIBLE_SPEECH = 100;/* Visp */ 593 /** 594 * ISO 15924 script code 595 * @stable ICU 3.6 596 */ 597 public static final int CUNEIFORM = 101;/* Xsux */ 598 /** 599 * ISO 15924 script code 600 * @stable ICU 3.6 601 */ 602 public static final int UNWRITTEN_LANGUAGES = 102;/* Zxxx */ 603 /** 604 * ISO 15924 script code 605 * @stable ICU 3.6 606 */ 607 public static final int UNKNOWN = 103;/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */ 608 609 /** 610 * ISO 15924 script code 611 * @stable ICU 3.8 612 */ 613 public static final int CARIAN = 104;/* Cari */ 614 /** 615 * ISO 15924 script code 616 * @stable ICU 3.8 617 */ 618 public static final int JAPANESE = 105;/* Jpan */ 619 /** 620 * ISO 15924 script code 621 * @stable ICU 3.8 622 */ 623 public static final int LANNA = 106;/* Lana */ 624 /** 625 * ISO 15924 script code 626 * @stable ICU 3.8 627 */ 628 public static final int LYCIAN = 107;/* Lyci */ 629 /** 630 * ISO 15924 script code 631 * @stable ICU 3.8 632 */ 633 public static final int LYDIAN = 108;/* Lydi */ 634 /** 635 * ISO 15924 script code 636 * @stable ICU 3.8 637 */ 638 public static final int OL_CHIKI = 109;/* Olck */ 639 /** 640 * ISO 15924 script code 641 * @stable ICU 3.8 642 */ 643 public static final int REJANG = 110;/* Rjng */ 644 /** 645 * ISO 15924 script code 646 * @stable ICU 3.8 647 */ 648 public static final int SAURASHTRA = 111;/* Saur */ 649 /** 650 * ISO 15924 script code 651 * @stable ICU 3.8 652 */ 653 public static final int SIGN_WRITING = 112;/* Sgnw */ 654 /** 655 * ISO 15924 script code 656 * @stable ICU 3.8 657 */ 658 public static final int SUNDANESE = 113;/* Sund */ 659 /** 660 * ISO 15924 script code 661 * @stable ICU 3.8 662 */ 663 public static final int MOON = 114;/* Moon */ 664 /** 665 * ISO 15924 script code 666 * @stable ICU 3.8 667 */ 668 public static final int MEITEI_MAYEK = 115;/* Mtei */ 669 670 /** 671 * ISO 15924 script code 672 * @stable ICU 4.0 673 */ 674 public static final int IMPERIAL_ARAMAIC = 116;/* Armi */ 675 676 /** 677 * ISO 15924 script code 678 * @stable ICU 4.0 679 */ 680 public static final int AVESTAN = 117;/* Avst */ 681 682 /** 683 * ISO 15924 script code 684 * @stable ICU 4.0 685 */ 686 public static final int CHAKMA = 118;/* Cakm */ 687 688 /** 689 * ISO 15924 script code 690 * @stable ICU 4.0 691 */ 692 public static final int KOREAN = 119;/* Kore */ 693 694 /** 695 * ISO 15924 script code 696 * @stable ICU 4.0 697 */ 698 public static final int KAITHI = 120;/* Kthi */ 699 700 /** 701 * ISO 15924 script code 702 * @stable ICU 4.0 703 */ 704 public static final int MANICHAEAN = 121;/* Mani */ 705 706 /** 707 * ISO 15924 script code 708 * @stable ICU 4.0 709 */ 710 public static final int INSCRIPTIONAL_PAHLAVI = 122;/* Phli */ 711 712 /** 713 * ISO 15924 script code 714 * @stable ICU 4.0 715 */ 716 public static final int PSALTER_PAHLAVI = 123;/* Phlp */ 717 718 /** 719 * ISO 15924 script code 720 * @stable ICU 4.0 721 */ 722 public static final int BOOK_PAHLAVI = 124;/* Phlv */ 723 724 /** 725 * ISO 15924 script code 726 * @stable ICU 4.0 727 */ 728 public static final int INSCRIPTIONAL_PARTHIAN = 125;/* Prti */ 729 730 /** 731 * ISO 15924 script code 732 * @stable ICU 4.0 733 */ 734 public static final int SAMARITAN = 126;/* Samr */ 735 736 /** 737 * ISO 15924 script code 738 * @stable ICU 4.0 739 */ 740 public static final int TAI_VIET = 127;/* Tavt */ 741 742 /** 743 * ISO 15924 script code 744 * @stable ICU 4.0 745 */ 746 public static final int MATHEMATICAL_NOTATION = 128;/* Zmth */ 747 748 /** 749 * ISO 15924 script code 750 * @stable ICU 4.0 751 */ 752 public static final int SYMBOLS = 129;/* Zsym */ 753 754 /** 755 * ISO 15924 script code 756 * @stable ICU 4.4 757 */ 758 public static final int BAMUM = 130;/* Bamu */ 759 /** 760 * ISO 15924 script code 761 * @stable ICU 4.4 762 */ 763 public static final int LISU = 131;/* Lisu */ 764 /** 765 * ISO 15924 script code 766 * @stable ICU 4.4 767 */ 768 public static final int NAKHI_GEBA = 132;/* Nkgb */ 769 /** 770 * ISO 15924 script code 771 * @stable ICU 4.4 772 */ 773 public static final int OLD_SOUTH_ARABIAN = 133;/* Sarb */ 774 775 /** 776 * ISO 15924 script code 777 * @stable ICU 4.6 778 */ 779 public static final int BASSA_VAH = 134;/* Bass */ 780 /** 781 * ISO 15924 script code 782 * @stable ICU 54 783 */ 784 public static final int DUPLOYAN = 135;/* Dupl */ 785 /** 786 * Typo, use DUPLOYAN 787 * @deprecated ICU 54 788 */ 789 @Deprecated 790 public static final int DUPLOYAN_SHORTAND = DUPLOYAN; 791 /** 792 * ISO 15924 script code 793 * @stable ICU 4.6 794 */ 795 public static final int ELBASAN = 136;/* Elba */ 796 /** 797 * ISO 15924 script code 798 * @stable ICU 4.6 799 */ 800 public static final int GRANTHA = 137;/* Gran */ 801 /** 802 * ISO 15924 script code 803 * @stable ICU 4.6 804 */ 805 public static final int KPELLE = 138;/* Kpel */ 806 /** 807 * ISO 15924 script code 808 * @stable ICU 4.6 809 */ 810 public static final int LOMA = 139;/* Loma */ 811 /** 812 * Mende Kikakui 813 * ISO 15924 script code 814 * @stable ICU 4.6 815 */ 816 public static final int MENDE = 140;/* Mend */ 817 /** 818 * ISO 15924 script code 819 * @stable ICU 4.6 820 */ 821 public static final int MEROITIC_CURSIVE = 141;/* Merc */ 822 /** 823 * ISO 15924 script code 824 * @stable ICU 4.6 825 */ 826 public static final int OLD_NORTH_ARABIAN = 142;/* Narb */ 827 /** 828 * ISO 15924 script code 829 * @stable ICU 4.6 830 */ 831 public static final int NABATAEAN = 143;/* Nbat */ 832 /** 833 * ISO 15924 script code 834 * @stable ICU 4.6 835 */ 836 public static final int PALMYRENE = 144;/* Palm */ 837 /** 838 * ISO 15924 script code 839 * @stable ICU 54 840 */ 841 public static final int KHUDAWADI = 145;/* Sind */ 842 /** 843 * ISO 15924 script code 844 * @stable ICU 4.6 845 */ 846 public static final int SINDHI = KHUDAWADI; 847 /** 848 * ISO 15924 script code 849 * @stable ICU 4.6 850 */ 851 public static final int WARANG_CITI = 146;/* Wara */ 852 853 /** 854 * ISO 15924 script code 855 * @stable ICU 4.8 856 */ 857 public static final int AFAKA = 147;/* Afak */ 858 /** 859 * ISO 15924 script code 860 * @stable ICU 4.8 861 */ 862 public static final int JURCHEN = 148;/* Jurc */ 863 /** 864 * ISO 15924 script code 865 * @stable ICU 4.8 866 */ 867 public static final int MRO = 149;/* Mroo */ 868 /** 869 * ISO 15924 script code 870 * @stable ICU 4.8 871 */ 872 public static final int NUSHU = 150;/* Nshu */ 873 /** 874 * ISO 15924 script code 875 * @stable ICU 4.8 876 */ 877 public static final int SHARADA = 151;/* Shrd */ 878 /** 879 * ISO 15924 script code 880 * @stable ICU 4.8 881 */ 882 public static final int SORA_SOMPENG = 152;/* Sora */ 883 /** 884 * ISO 15924 script code 885 * @stable ICU 4.8 886 */ 887 public static final int TAKRI = 153;/* Takr */ 888 /** 889 * ISO 15924 script code 890 * @stable ICU 4.8 891 */ 892 public static final int TANGUT = 154;/* Tang */ 893 /** 894 * ISO 15924 script code 895 * @stable ICU 4.8 896 */ 897 public static final int WOLEAI = 155;/* Wole */ 898 899 /** 900 * ISO 15924 script code 901 * @stable ICU 49 902 */ 903 public static final int ANATOLIAN_HIEROGLYPHS = 156;/* Hluw */ 904 /** 905 * ISO 15924 script code 906 * @stable ICU 49 907 */ 908 public static final int KHOJKI = 157;/* Khoj */ 909 /** 910 * ISO 15924 script code 911 * @stable ICU 49 912 */ 913 public static final int TIRHUTA = 158;/* Tirh */ 914 /** 915 * ISO 15924 script code 916 * @stable ICU 52 917 */ 918 public static final int CAUCASIAN_ALBANIAN = 159; /* Aghb */ 919 /** 920 * ISO 15924 script code 921 * @stable ICU 52 922 */ 923 public static final int MAHAJANI = 160; /* Mahj */ 924 925 /** 926 * ISO 15924 script code 927 * @stable ICU 54 928 */ 929 public static final int AHOM = 161; /* Ahom */ 930 /** 931 * ISO 15924 script code 932 * @stable ICU 54 933 */ 934 public static final int HATRAN = 162; /* Hatr */ 935 /** 936 * ISO 15924 script code 937 * @stable ICU 54 938 */ 939 public static final int MODI = 163; /* Modi */ 940 /** 941 * ISO 15924 script code 942 * @stable ICU 54 943 */ 944 public static final int MULTANI = 164; /* Mult */ 945 /** 946 * ISO 15924 script code 947 * @stable ICU 54 948 */ 949 public static final int PAU_CIN_HAU = 165; /* Pauc */ 950 /** 951 * ISO 15924 script code 952 * @stable ICU 54 953 */ 954 public static final int SIDDHAM = 166; /* Sidd */ 955 956 /** 957 * <p>One higher than the last script code constant. 958 * This value increases as constants for script codes are added. 959 * 960 * <p>There are constants for Unicode 7 script property values. 961 * There are constants for ISO 15924 script codes assigned on or before 2013-10-12. 962 * There are no constants for private use codes from Qaaa - Qabx 963 * except as used in the UCD. 964 * 965 * @stable ICU 2.4 966 */ 967 public static final int CODE_LIMIT = 167; 968 969 private static int[] getCodesFromLocale(ULocale locale) { 970 // Multi-script languages, equivalent to the LocaleScript data 971 // that we used to load from locale resource bundles. 972 String lang = locale.getLanguage(); 973 if(lang.equals("ja")) { 974 return new int[] { UScript.KATAKANA, UScript.HIRAGANA, UScript.HAN }; 975 } 976 if(lang.equals("ko")) { 977 return new int[] { UScript.HANGUL, UScript.HAN }; 978 } 979 String script = locale.getScript(); 980 if(lang.equals("zh") && script.equals("Hant")) { 981 return new int[] { UScript.HAN, UScript.BOPOMOFO }; 982 } 983 // Explicit script code. 984 if(script.length() != 0) { 985 int scriptCode = UScript.getCodeFromName(script); 986 if(scriptCode != UScript.INVALID_CODE) { 987 if(scriptCode == UScript.SIMPLIFIED_HAN || scriptCode == UScript.TRADITIONAL_HAN) { 988 scriptCode = UScript.HAN; 989 } 990 return new int[] { scriptCode }; 991 } 992 } 993 return null; 994 } 995 996 /** 997 * Helper function to find the code from locale. 998 * @param locale The locale. 999 */ 1000 private static int[] findCodeFromLocale(ULocale locale) { 1001 int[] result = getCodesFromLocale(locale); 1002 if(result != null) { 1003 return result; 1004 } 1005 ULocale likely = ULocale.addLikelySubtags(locale); 1006 return getCodesFromLocale(likely); 1007 } 1008 1009 /** 1010 * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name. 1011 * Returns MALAYAM given "Malayam" OR "Mlym". 1012 * Returns LATIN given "en" OR "en_US" 1013 * @param locale Locale 1014 * @return The script codes array. null if the the code cannot be found. 1015 * @stable ICU 2.4 1016 */ 1017 public static final int[] getCode(Locale locale){ 1018 return findCodeFromLocale(ULocale.forLocale(locale)); 1019 } 1020 /** 1021 * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name. 1022 * Returns MALAYAM given "Malayam" OR "Mlym". 1023 * Returns LATIN given "en" OR "en_US" 1024 * @param locale ULocale 1025 * @return The script codes array. null if the the code cannot be found. 1026 * @stable ICU 3.0 1027 */ 1028 public static final int[] getCode(ULocale locale){ 1029 return findCodeFromLocale(locale); 1030 } 1031 /** 1032 * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name. 1033 * Returns MALAYAM given "Malayam" OR "Mlym". 1034 * Returns LATIN given "en" OR "en_US" 1035 * 1036 * <p>Note: To search by short or long script alias only, use 1037 * {@link #getCodeFromName(String)} instead. 1038 * That does a fast lookup with no access of the locale data. 1039 * 1040 * @param nameOrAbbrOrLocale name of the script or ISO 15924 code or locale 1041 * @return The script codes array. null if the the code cannot be found. 1042 * @stable ICU 2.4 1043 */ 1044 public static final int[] getCode(String nameOrAbbrOrLocale) { 1045 boolean triedCode = false; 1046 if (nameOrAbbrOrLocale.indexOf('_') < 0 && nameOrAbbrOrLocale.indexOf('-') < 0) { 1047 int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbrOrLocale); 1048 if (propNum != UProperty.UNDEFINED) { 1049 return new int[] {propNum}; 1050 } 1051 triedCode = true; 1052 } 1053 int[] scripts = findCodeFromLocale(new ULocale(nameOrAbbrOrLocale)); 1054 if (scripts != null) { 1055 return scripts; 1056 } 1057 if (!triedCode) { 1058 int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbrOrLocale); 1059 if (propNum != UProperty.UNDEFINED) { 1060 return new int[] {propNum}; 1061 } 1062 } 1063 return null; 1064 } 1065 1066 /** 1067 * Returns the script code associated with the given Unicode script property alias 1068 * (name or abbreviation). 1069 * Short aliases are ISO 15924 script codes. 1070 * Returns MALAYAM given "Malayam" OR "Mlym". 1071 * 1072 * @param nameOrAbbr name of the script or ISO 15924 code 1073 * @return The script code value, or INVALID_CODE if the code cannot be found. 1074 * @draft ICU 54 1075 * @provisional This API might change or be removed in a future release. 1076 */ 1077 public static final int getCodeFromName(String nameOrAbbr) { 1078 int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbr); 1079 return propNum == UProperty.UNDEFINED ? INVALID_CODE : propNum; 1080 } 1081 1082 /** 1083 * Gets the script code associated with the given codepoint. 1084 * Returns UScript.MALAYAM given 0x0D02 1085 * @param codepoint UChar32 codepoint 1086 * @return The script code 1087 * @stable ICU 2.4 1088 */ 1089 public static final int getScript(int codepoint){ 1090 if (codepoint >= UCharacter.MIN_VALUE & codepoint <= UCharacter.MAX_VALUE) { 1091 int scriptX=UCharacterProperty.INSTANCE.getAdditional(codepoint, 0)&UCharacterProperty.SCRIPT_X_MASK; 1092 if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) { 1093 return scriptX; 1094 } else if(scriptX<UCharacterProperty.SCRIPT_X_WITH_INHERITED) { 1095 return UScript.COMMON; 1096 } else if(scriptX<UCharacterProperty.SCRIPT_X_WITH_OTHER) { 1097 return UScript.INHERITED; 1098 } else { 1099 return UCharacterProperty.INSTANCE.m_scriptExtensions_[scriptX&UCharacterProperty.SCRIPT_MASK_]; 1100 } 1101 }else{ 1102 throw new IllegalArgumentException(Integer.toString(codepoint)); 1103 } 1104 } 1105 1106 /** 1107 * Do the Script_Extensions of code point c contain script sc? 1108 * If c does not have explicit Script_Extensions, then this tests whether 1109 * c has the Script property value sc. 1110 * 1111 * <p>Some characters are commonly used in multiple scripts. 1112 * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. 1113 * 1114 * <p>The Script_Extensions property is provisional. It may be modified or removed 1115 * in future versions of the Unicode Standard, and thus in ICU. 1116 * @param c code point 1117 * @param sc script code 1118 * @return true if sc is in Script_Extensions(c) 1119 * @stable ICU 49 1120 */ 1121 public static final boolean hasScript(int c, int sc) { 1122 int scriptX=UCharacterProperty.INSTANCE.getAdditional(c, 0)&UCharacterProperty.SCRIPT_X_MASK; 1123 if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) { 1124 return sc==scriptX; 1125 } 1126 1127 char[] scriptExtensions=UCharacterProperty.INSTANCE.m_scriptExtensions_; 1128 int scx=scriptX&UCharacterProperty.SCRIPT_MASK_; // index into scriptExtensions 1129 if(scriptX>=UCharacterProperty.SCRIPT_X_WITH_OTHER) { 1130 scx=scriptExtensions[scx+1]; 1131 } 1132 if(sc>0x7fff) { 1133 // Guard against bogus input that would 1134 // make us go past the Script_Extensions terminator. 1135 return false; 1136 } 1137 while(sc>scriptExtensions[scx]) { 1138 ++scx; 1139 } 1140 return sc==(scriptExtensions[scx]&0x7fff); 1141 } 1142 1143 /** 1144 * Sets code point c's Script_Extensions as script code integers into the output BitSet. 1145 * <ul> 1146 * <li>If c does have Script_Extensions, then the return value is 1147 * the negative number of Script_Extensions codes (= -set.cardinality()); 1148 * in this case, the Script property value 1149 * (normally Common or Inherited) is not included in the set. 1150 * <li>If c does not have Script_Extensions, then the one Script code is put into the set 1151 * and also returned. 1152 * <li>If c is not a valid code point, then the one {@link #UNKNOWN} code is put into the set 1153 * and also returned. 1154 * </ul> 1155 * In other words, if the return value is non-negative, it is c's single Script code 1156 * and the set contains exactly this Script code. 1157 * If the return value is -n, then the set contains c's n>=2 Script_Extensions script codes. 1158 * 1159 * <p>Some characters are commonly used in multiple scripts. 1160 * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. 1161 * 1162 * <p>The Script_Extensions property is provisional. It may be modified or removed 1163 * in future versions of the Unicode Standard, and thus in ICU. 1164 * @param c code point 1165 * @param set set of script code integers; will be cleared, then bits are set 1166 * corresponding to c's Script_Extensions 1167 * @return negative number of script codes in c's Script_Extensions, 1168 * or the non-negative single Script value 1169 * @stable ICU 49 1170 */ 1171 public static final int getScriptExtensions(int c, BitSet set) { 1172 set.clear(); 1173 int scriptX=UCharacterProperty.INSTANCE.getAdditional(c, 0)&UCharacterProperty.SCRIPT_X_MASK; 1174 if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) { 1175 set.set(scriptX); 1176 return scriptX; 1177 } 1178 1179 char[] scriptExtensions=UCharacterProperty.INSTANCE.m_scriptExtensions_; 1180 int scx=scriptX&UCharacterProperty.SCRIPT_MASK_; // index into scriptExtensions 1181 if(scriptX>=UCharacterProperty.SCRIPT_X_WITH_OTHER) { 1182 scx=scriptExtensions[scx+1]; 1183 } 1184 int length=0; 1185 int sx; 1186 do { 1187 sx=scriptExtensions[scx++]; 1188 set.set(sx&0x7fff); 1189 ++length; 1190 } while(sx<0x8000); 1191 // length==set.cardinality() 1192 return -length; 1193 } 1194 1195 /** 1196 * Returns the long Unicode script name, if there is one. 1197 * Otherwise returns the 4-letter ISO 15924 script code. 1198 * Returns "Malayam" given MALAYALAM. 1199 * 1200 * @param scriptCode int script code 1201 * @return long script name as given in PropertyValueAliases.txt, or the 4-letter code 1202 * @throws IllegalArgumentException if the script code is not valid 1203 * @stable ICU 2.4 1204 */ 1205 public static final String getName(int scriptCode){ 1206 return UCharacter.getPropertyValueName(UProperty.SCRIPT, 1207 scriptCode, 1208 UProperty.NameChoice.LONG); 1209 } 1210 1211 /** 1212 * Returns the 4-letter ISO 15924 script code, 1213 * which is the same as the short Unicode script name if Unicode has names for the script. 1214 * Returns "Mlym" given MALAYALAM. 1215 * 1216 * @param scriptCode int script code 1217 * @return short script name (4-letter code) 1218 * @throws IllegalArgumentException if the script code is not valid 1219 * @stable ICU 2.4 1220 */ 1221 public static final String getShortName(int scriptCode){ 1222 return UCharacter.getPropertyValueName(UProperty.SCRIPT, 1223 scriptCode, 1224 UProperty.NameChoice.SHORT); 1225 } 1226 1227 /** 1228 * Script metadata (script properties). 1229 * See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt 1230 */ 1231 private static final class ScriptMetadata { 1232 // 0 = NOT_ENCODED, no sample character, default false script properties. 1233 // Bits 20.. 0: sample character 1234 1235 // Bits 23..21: usage 1236 private static final int UNKNOWN = 1 << 21; 1237 private static final int EXCLUSION = 2 << 21; 1238 private static final int LIMITED_USE = 3 << 21; 1239 private static final int ASPIRATIONAL = 4 << 21; 1240 private static final int RECOMMENDED = 5 << 21; 1241 1242 // Bits 31..24: Single-bit flags 1243 private static final int RTL = 1 << 24; 1244 private static final int LB_LETTERS = 1 << 25; 1245 private static final int CASED = 1 << 26; 1246 1247 private static final int SCRIPT_PROPS[] = { 1248 // Begin copy-paste output from 1249 // tools/trunk/unicode/py/parsescriptmetadata.py 1250 // or from icu/trunk/source/common/uscript_props.cpp 1251 0x0040 | RECOMMENDED, // Zyyy 1252 0x0308 | UNKNOWN, // Zinh 1253 0x0628 | RECOMMENDED | RTL, // Arab 1254 0x0531 | RECOMMENDED | CASED, // Armn 1255 0x0995 | RECOMMENDED, // Beng 1256 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo 1257 0x13C4 | LIMITED_USE, // Cher 1258 0x03E2 | EXCLUSION | CASED, // Copt 1259 0x042F | RECOMMENDED | CASED, // Cyrl 1260 0x10414 | EXCLUSION | CASED, // Dsrt 1261 0x0905 | RECOMMENDED, // Deva 1262 0x12A0 | RECOMMENDED, // Ethi 1263 0x10D3 | RECOMMENDED, // Geor 1264 0x10330 | EXCLUSION, // Goth 1265 0x03A9 | RECOMMENDED | CASED, // Grek 1266 0x0A95 | RECOMMENDED, // Gujr 1267 0x0A15 | RECOMMENDED, // Guru 1268 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani 1269 0xAC00 | RECOMMENDED, // Hang 1270 0x05D0 | RECOMMENDED | RTL, // Hebr 1271 0x304B | RECOMMENDED | LB_LETTERS, // Hira 1272 0x0C95 | RECOMMENDED, // Knda 1273 0x30AB | RECOMMENDED | LB_LETTERS, // Kana 1274 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr 1275 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo 1276 0x004C | RECOMMENDED | CASED, // Latn 1277 0x0D15 | RECOMMENDED, // Mlym 1278 0x1826 | ASPIRATIONAL, // Mong 1279 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr 1280 0x168F | EXCLUSION, // Ogam 1281 0x10300 | EXCLUSION, // Ital 1282 0x0B15 | RECOMMENDED, // Orya 1283 0x16A0 | EXCLUSION, // Runr 1284 0x0D85 | RECOMMENDED, // Sinh 1285 0x0710 | LIMITED_USE | RTL, // Syrc 1286 0x0B95 | RECOMMENDED, // Taml 1287 0x0C15 | RECOMMENDED, // Telu 1288 0x078C | RECOMMENDED | RTL, // Thaa 1289 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai 1290 0x0F40 | RECOMMENDED, // Tibt 1291 0x14C0 | ASPIRATIONAL, // Cans 1292 0xA288 | ASPIRATIONAL | LB_LETTERS, // Yiii 1293 0x1703 | EXCLUSION, // Tglg 1294 0x1723 | EXCLUSION, // Hano 1295 0x1743 | EXCLUSION, // Buhd 1296 0x1763 | EXCLUSION, // Tagb 1297 0x2800 | UNKNOWN, // Brai 1298 0x10800 | EXCLUSION | RTL, // Cprt 1299 0x1900 | LIMITED_USE, // Limb 1300 0x10000 | EXCLUSION, // Linb 1301 0x10480 | EXCLUSION, // Osma 1302 0x10450 | EXCLUSION, // Shaw 1303 0x1950 | LIMITED_USE | LB_LETTERS, // Tale 1304 0x10380 | EXCLUSION, // Ugar 1305 0, 1306 0x1A00 | EXCLUSION, // Bugi 1307 0x2C00 | EXCLUSION | CASED, // Glag 1308 0x10A00 | EXCLUSION | RTL, // Khar 1309 0xA800 | LIMITED_USE, // Sylo 1310 0x1980 | LIMITED_USE | LB_LETTERS, // Talu 1311 0x2D30 | ASPIRATIONAL, // Tfng 1312 0x103A0 | EXCLUSION, // Xpeo 1313 0x1B05 | LIMITED_USE, // Bali 1314 0x1BC0 | LIMITED_USE, // Batk 1315 0, 1316 0x11005 | EXCLUSION, // Brah 1317 0xAA00 | LIMITED_USE, // Cham 1318 0, 1319 0, 1320 0, 1321 0, 1322 0x13153 | EXCLUSION, // Egyp 1323 0, 1324 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans 1325 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant 1326 0x16B1C | EXCLUSION, // Hmng 1327 0, 1328 0, 1329 0xA984 | LIMITED_USE, // Java 1330 0xA90A | LIMITED_USE, // Kali 1331 0, 1332 0, 1333 0x1C00 | LIMITED_USE, // Lepc 1334 0x10647 | EXCLUSION, // Lina 1335 0x0840 | LIMITED_USE | RTL, // Mand 1336 0, 1337 0x10980 | EXCLUSION | RTL, // Mero 1338 0x07CA | LIMITED_USE | RTL, // Nkoo 1339 0x10C00 | EXCLUSION | RTL, // Orkh 1340 0x1036B | EXCLUSION, // Perm 1341 0xA840 | EXCLUSION, // Phag 1342 0x10900 | EXCLUSION | RTL, // Phnx 1343 0x16F00 | ASPIRATIONAL, // Plrd 1344 0, 1345 0, 1346 0, 1347 0, 1348 0, 1349 0, 1350 0xA549 | LIMITED_USE, // Vaii 1351 0, 1352 0x12000 | EXCLUSION, // Xsux 1353 0, 1354 0xFDD0 | UNKNOWN, // Zzzz 1355 0x102A0 | EXCLUSION, // Cari 1356 0x304B | RECOMMENDED | LB_LETTERS, // Jpan 1357 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana 1358 0x10280 | EXCLUSION, // Lyci 1359 0x10920 | EXCLUSION | RTL, // Lydi 1360 0x1C5A | LIMITED_USE, // Olck 1361 0xA930 | EXCLUSION, // Rjng 1362 0xA882 | LIMITED_USE, // Saur 1363 0, 1364 0x1B83 | LIMITED_USE, // Sund 1365 0, 1366 0xABC0 | LIMITED_USE, // Mtei 1367 0x10840 | EXCLUSION | RTL, // Armi 1368 0x10B00 | EXCLUSION | RTL, // Avst 1369 0x11103 | LIMITED_USE, // Cakm 1370 0xAC00 | RECOMMENDED, // Kore 1371 0x11083 | EXCLUSION, // Kthi 1372 0x10AD8 | EXCLUSION | RTL, // Mani 1373 0x10B60 | EXCLUSION | RTL, // Phli 1374 0x10B8F | EXCLUSION | RTL, // Phlp 1375 0, 1376 0x10B40 | EXCLUSION | RTL, // Prti 1377 0x0800 | EXCLUSION | RTL, // Samr 1378 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt 1379 0, 1380 0, 1381 0xA6A0 | LIMITED_USE, // Bamu 1382 0xA4D0 | LIMITED_USE, // Lisu 1383 0, 1384 0x10A60 | EXCLUSION | RTL, // Sarb 1385 0x16AE6 | EXCLUSION, // Bass 1386 0x1BC20 | EXCLUSION, // Dupl 1387 0x10500 | EXCLUSION, // Elba 1388 0x11315 | EXCLUSION, // Gran 1389 0, 1390 0, 1391 0x1E802 | EXCLUSION | RTL, // Mend 1392 0x109A0 | EXCLUSION | RTL, // Merc 1393 0x10A95 | EXCLUSION | RTL, // Narb 1394 0x10896 | EXCLUSION | RTL, // Nbat 1395 0x10873 | EXCLUSION | RTL, // Palm 1396 0x112BE | EXCLUSION, // Sind 1397 0x118B4 | EXCLUSION | CASED, // Wara 1398 0, 1399 0, 1400 0x16A4F | EXCLUSION, // Mroo 1401 0, 1402 0x11183 | EXCLUSION, // Shrd 1403 0x110D0 | EXCLUSION, // Sora 1404 0x11680 | EXCLUSION, // Takr 1405 0, 1406 0, 1407 0, 1408 0x11208 | EXCLUSION, // Khoj 1409 0x11484 | EXCLUSION, // Tirh 1410 0x10537 | EXCLUSION, // Aghb 1411 0x11152 | EXCLUSION, // Mahj 1412 0, 1413 0, 1414 0x1160E | EXCLUSION, // Modi 1415 0, 1416 0x11AC0 | EXCLUSION, // Pauc 1417 0x1158E | EXCLUSION, // Sidd 1418 // End copy-paste from parsescriptmetadata.py 1419 }; 1420 1421 private static final int getScriptProps(int script) { 1422 if (0 <= script && script < SCRIPT_PROPS.length) { 1423 return SCRIPT_PROPS[script]; 1424 } else { 1425 return 0; 1426 } 1427 } 1428 } 1429 1430 /** 1431 * Script usage constants. 1432 * See UAX #31 Unicode Identifier and Pattern Syntax. 1433 * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers 1434 * 1435 * @stable ICU 51 1436 */ 1437 public enum ScriptUsage { 1438 /** 1439 * Not encoded in Unicode. 1440 * @stable ICU 51 1441 */ 1442 NOT_ENCODED, 1443 /** 1444 * Unknown script usage. 1445 * @stable ICU 51 1446 */ 1447 UNKNOWN, 1448 /** 1449 * Candidate for Exclusion from Identifiers. 1450 * @stable ICU 51 1451 */ 1452 EXCLUDED, 1453 /** 1454 * Limited Use script. 1455 * @stable ICU 51 1456 */ 1457 LIMITED_USE, 1458 /** 1459 * Aspirational Use script. 1460 * @stable ICU 51 1461 */ 1462 ASPIRATIONAL, 1463 /** 1464 * Recommended script. 1465 * @stable ICU 51 1466 */ 1467 RECOMMENDED 1468 } 1469 private static final ScriptUsage[] usageValues = ScriptUsage.values(); 1470 1471 /** 1472 * Returns the script sample character string. 1473 * This string normally consists of one code point but might be longer. 1474 * The string is empty if the script is not encoded. 1475 * 1476 * @param script script code 1477 * @return the sample character string 1478 * @stable ICU 51 1479 */ 1480 public static final String getSampleString(int script) { 1481 int sampleChar = ScriptMetadata.getScriptProps(script) & 0x1fffff; 1482 if(sampleChar != 0) { 1483 return new StringBuilder().appendCodePoint(sampleChar).toString(); 1484 } 1485 return ""; 1486 } 1487 1488 /** 1489 * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax. 1490 * Returns {@link ScriptUsage#NOT_ENCODED} if the script is not encoded in Unicode. 1491 * 1492 * @param script script code 1493 * @return script usage 1494 * @see ScriptUsage 1495 * @stable ICU 51 1496 */ 1497 public static final ScriptUsage getUsage(int script) { 1498 return usageValues[(ScriptMetadata.getScriptProps(script) >> 21) & 7]; 1499 } 1500 1501 /** 1502 * Returns true if the script is written right-to-left. 1503 * For example, Arab and Hebr. 1504 * 1505 * @param script script code 1506 * @return true if the script is right-to-left 1507 * @stable ICU 51 1508 */ 1509 public static final boolean isRightToLeft(int script) { 1510 return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.RTL) != 0; 1511 } 1512 1513 /** 1514 * Returns true if the script allows line breaks between letters (excluding hyphenation). 1515 * Such a script typically requires dictionary-based line breaking. 1516 * For example, Hani and Thai. 1517 * 1518 * @param script script code 1519 * @return true if the script allows line breaks between letters 1520 * @stable ICU 51 1521 */ 1522 public static final boolean breaksBetweenLetters(int script) { 1523 return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.LB_LETTERS) != 0; 1524 } 1525 1526 /** 1527 * Returns true if in modern (or most recent) usage of the script case distinctions are customary. 1528 * For example, Latn and Cyrl. 1529 * 1530 * @param script script code 1531 * @return true if the script is cased 1532 * @stable ICU 51 1533 */ 1534 public static final boolean isCased(int script) { 1535 return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.CASED) != 0; 1536 } 1537 1538 ///CLOVER:OFF 1539 /** 1540 * Private Constructor. Never default construct 1541 */ 1542 private UScript(){} 1543 ///CLOVER:ON 1544} 1545