1/* GENERATED SOURCE. DO NOT MODIFY. */ 2// © 2016 and later: Unicode, Inc. and others. 3// License & terms of use: http://www.unicode.org/copyright.html#License 4/** 5 ******************************************************************************* 6 * Copyright (C) 2000-2010, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10package android.icu.dev.test.translit; 11 12import android.icu.text.UTF16; 13import android.icu.text.UnicodeSet; 14 15public final class TestUtility { 16 17 public static String hex(char ch) { 18 String foo = Integer.toString(ch,16).toUpperCase(); 19 return "0000".substring(0,4-foo.length()) + foo; 20 } 21 22 public static String hex(int ch) { 23 String foo = Integer.toString(ch,16).toUpperCase(); 24 return "00000000".substring(0,4-foo.length()) + foo; 25 } 26 27 public static String hex(String s) { 28 return hex(s,","); 29 } 30 31 public static String hex(String s, String sep) { 32 if (s.length() == 0) return ""; 33 String result = hex(s.charAt(0)); 34 for (int i = 1; i < s.length(); ++i) { 35 result += sep; 36 result += hex(s.charAt(i)); 37 } 38 return result; 39 } 40 41 public static String replace(String source, String toBeReplaced, String replacement) { 42 StringBuffer results = new StringBuffer(); 43 int len = toBeReplaced.length(); 44 for (int i = 0; i < source.length(); ++i) { 45 if (source.regionMatches(false, i, toBeReplaced, 0, len)) { 46 results.append(replacement); 47 i += len - 1; // minus one, since we will increment 48 } else { 49 results.append(source.charAt(i)); 50 } 51 } 52 return results.toString(); 53 } 54 55 public static String replaceAll(String source, UnicodeSet set, String replacement) { 56 StringBuffer results = new StringBuffer(); 57 int cp; 58 for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) { 59 cp = UTF16.charAt(source,i); 60 if (set.contains(cp)) { 61 results.append(replacement); 62 } else { 63 UTF16.append(results, cp); 64 } 65 } 66 return results.toString(); 67 } 68 69 // COMMENTED OUT ALL THE OLD SCRIPT STUFF 70 /* 71 public static byte getScript(char c) { 72 return getScript(getBlock(c)); 73 } 74 75 public static byte getScript(byte block) { 76 return blockToScript[block]; 77 } 78 79 public static byte getBlock(char c) { 80 int index = c >> 7; 81 byte block = charToBlock[index]; 82 while (block < 0) { // take care of exceptions, blocks split across 128 boundaries 83 int[] tuple = split[-block-1]; 84 if (c < tuple[0]) block = (byte)tuple[1]; 85 else block = (byte)tuple[2]; 86 } 87 return block; 88 } 89 90 // returns next letter of script, or 0xFFFF if done 91 92 public static char getNextLetter(char c, byte script) { 93 while (c < 0xFFFF) { 94 ++c; 95 if (getScript(c) == script && Character.isLetter(c)) { 96 return c; 97 } 98 } 99 return c; 100 } 101 102 // Supplements to Character methods; these methods go through 103 // UCharacter if possible. If not, they fall back to Character. 104 105 public static boolean isUnassigned(char c) { 106 try { 107 return UCharacter.getType(c) == UCharacterCategory.UNASSIGNED; 108 } catch (NullPointerException e) { 109 System.out.print(""); 110 } 111 return Character.getType(c) == Character.UNASSIGNED; 112 } 113 114 public static boolean isLetter(char c) { 115 try { 116 return UCharacter.isLetter(c); 117 } catch (NullPointerException e) { 118 System.out.print(""); 119 } 120 return Character.isLetter(c); 121 } 122 123 public static void main(String[] args) { 124 System.out.println("Blocks: "); 125 byte lastblock = -128; 126 for (char cc = 0; cc < 0xFFFF; ++cc) { 127 byte block = TestUtility.getBlock(cc); 128 if (block != lastblock) { 129 System.out.println(TestUtility.hex(cc) + "\t" + block); 130 lastblock = block; 131 } 132 } 133 System.out.println(); 134 System.out.println("Scripts: "); 135 byte lastScript = -128; 136 for (char cc = 0; cc < 0xFFFF; ++cc) { 137 byte script = TestUtility.getScript(cc); 138 if (script != lastScript) { 139 System.out.println(TestUtility.hex(cc) + "\t" + script); 140 lastScript = script; 141 } 142 } 143 } 144 145 146 147 public static final byte // SCRIPT CODE 148 COMMON_SCRIPT = 0, 149 LATIN_SCRIPT = 1, 150 GREEK_SCRIPT = 2, 151 CYRILLIC_SCRIPT = 3, 152 ARMENIAN_SCRIPT = 4, 153 HEBREW_SCRIPT = 5, 154 ARABIC_SCRIPT = 6, 155 SYRIAC_SCRIPT = 7, 156 THAANA_SCRIPT = 8, 157 DEVANAGARI_SCRIPT = 9, 158 BENGALI_SCRIPT = 10, 159 GURMUKHI_SCRIPT = 11, 160 GUJARATI_SCRIPT = 12, 161 ORIYA_SCRIPT = 13, 162 TAMIL_SCRIPT = 14, 163 TELUGU_SCRIPT = 15, 164 KANNADA_SCRIPT = 16, 165 MALAYALAM_SCRIPT = 17, 166 SINHALA_SCRIPT = 18, 167 THAI_SCRIPT = 19, 168 LAO_SCRIPT = 20, 169 TIBETAN_SCRIPT = 21, 170 MYANMAR_SCRIPT = 22, 171 GEORGIAN_SCRIPT = 23, 172 JAMO_SCRIPT = 24, 173 HANGUL_SCRIPT = 25, 174 ETHIOPIC_SCRIPT = 26, 175 CHEROKEE_SCRIPT = 27, 176 ABORIGINAL_SCRIPT = 28, 177 OGHAM_SCRIPT = 29, 178 RUNIC_SCRIPT = 30, 179 KHMER_SCRIPT = 31, 180 MONGOLIAN_SCRIPT = 32, 181 HIRAGANA_SCRIPT = 33, 182 KATAKANA_SCRIPT = 34, 183 BOPOMOFO_SCRIPT = 35, 184 HAN_SCRIPT = 36, 185 YI_SCRIPT = 37; 186 187 public static final byte // block code 188 RESERVED_BLOCK = 0, 189 BASIC_LATIN = 1, 190 LATIN_1_SUPPLEMENT = 2, 191 LATIN_EXTENDED_A = 3, 192 LATIN_EXTENDED_B = 4, 193 IPA_EXTENSIONS = 5, 194 SPACING_MODIFIER_LETTERS = 6, 195 COMBINING_DIACRITICAL_MARKS = 7, 196 GREEK = 8, 197 CYRILLIC = 9, 198 ARMENIAN = 10, 199 HEBREW = 11, 200 ARABIC = 12, 201 SYRIAC = 13, 202 THAANA = 14, 203 DEVANAGARI = 15, 204 BENGALI = 16, 205 GURMUKHI = 17, 206 GUJARATI = 18, 207 ORIYA = 19, 208 TAMIL = 20, 209 TELUGU = 21, 210 KANNADA = 22, 211 MALAYALAM = 23, 212 SINHALA = 24, 213 THAI = 25, 214 LAO = 26, 215 TIBETAN = 27, 216 MYANMAR = 28, 217 GEORGIAN = 29, 218 HANGUL_JAMO = 30, 219 ETHIOPIC = 31, 220 CHEROKEE = 32, 221 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 33, 222 OGHAM = 34, 223 RUNIC = 35, 224 KHMER = 36, 225 MONGOLIAN = 37, 226 LATIN_EXTENDED_ADDITIONAL = 38, 227 GREEK_EXTENDED = 39, 228 GENERAL_PUNCTUATION = 40, 229 SUPERSCRIPTS_AND_SUBSCRIPTS = 41, 230 CURRENCY_SYMBOLS = 42, 231 COMBINING_MARKS_FOR_SYMBOLS = 43, 232 LETTERLIKE_SYMBOLS = 44, 233 NUMBER_FORMS = 45, 234 ARROWS = 46, 235 MATHEMATICAL_OPERATORS = 47, 236 MISCELLANEOUS_TECHNICAL = 48, 237 CONTROL_PICTURES = 49, 238 OPTICAL_CHARACTER_RECOGNITION = 50, 239 ENCLOSED_ALPHANUMERICS = 51, 240 BOX_DRAWING = 52, 241 BLOCK_ELEMENTS = 53, 242 GEOMETRIC_SHAPES = 54, 243 MISCELLANEOUS_SYMBOLS = 55, 244 DINGBATS = 56, 245 BRAILLE_PATTERNS = 57, 246 CJK_RADICALS_SUPPLEMENT = 58, 247 KANGXI_RADICALS = 59, 248 IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 60, 249 CJK_SYMBOLS_AND_PUNCTUATION = 61, 250 HIRAGANA = 62, 251 KATAKANA = 63, 252 BOPOMOFO = 64, 253 HANGUL_COMPATIBILITY_JAMO = 65, 254 KANBUN = 66, 255 BOPOMOFO_EXTENDED = 67, 256 ENCLOSED_CJK_LETTERS_AND_MONTHS = 68, 257 CJK_COMPATIBILITY = 69, 258 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 70, 259 CJK_UNIFIED_IDEOGRAPHS = 71, 260 YI_SYLLABLES = 72, 261 YI_RADICALS = 73, 262 HANGUL_SYLLABLES = 74, 263 HIGH_SURROGATES = 75, 264 HIGH_PRIVATE_USE_SURROGATES = 76, 265 LOW_SURROGATES = 77, 266 PRIVATE_USE = 78, 267 CJK_COMPATIBILITY_IDEOGRAPHS = 79, 268 ALPHABETIC_PRESENTATION_FORMS = 80, 269 ARABIC_PRESENTATION_FORMS_A = 81, 270 COMBINING_HALF_MARKS = 82, 271 CJK_COMPATIBILITY_FORMS = 83, 272 SMALL_FORM_VARIANTS = 84, 273 ARABIC_PRESENTATION_FORMS_B = 85, 274 SPECIALS = 86, 275 HALFWIDTH_AND_FULLWIDTH_FORMS = 87; 276 277 static final byte[] blockToScript = { 278 COMMON_SCRIPT, // 0, <RESERVED_BLOCK> 279 LATIN_SCRIPT, // 1, BASIC_LATIN 280 LATIN_SCRIPT, // 2, LATIN_1_SUPPLEMENT 281 LATIN_SCRIPT, // 3, LATIN_EXTENDED_A 282 LATIN_SCRIPT, // 4, LATIN_EXTENDED_B 283 LATIN_SCRIPT, // 5, IPA_EXTENSIONS 284 COMMON_SCRIPT, // 6, SPACING_MODIFIER_LETTERS 285 COMMON_SCRIPT, // 7, COMBINING_DIACRITICAL_MARKS 286 GREEK_SCRIPT, // 8, GREEK 287 CYRILLIC_SCRIPT, // 9, CYRILLIC 288 ARMENIAN_SCRIPT, // 10, ARMENIAN 289 HEBREW_SCRIPT, // 11, HEBREW 290 ARABIC_SCRIPT, // 12, ARABIC 291 SYRIAC_SCRIPT, // 13, SYRIAC 292 THAANA_SCRIPT, // 14, THAANA 293 DEVANAGARI_SCRIPT, // 15, DEVANAGARI 294 BENGALI_SCRIPT, // 16, BENGALI 295 GURMUKHI_SCRIPT, // 17, GURMUKHI 296 GUJARATI_SCRIPT, // 18, GUJARATI 297 ORIYA_SCRIPT, // 19, ORIYA 298 TAMIL_SCRIPT, // 20, TAMIL 299 TELUGU_SCRIPT, // 21, TELUGU 300 KANNADA_SCRIPT, // 22, KANNADA 301 MALAYALAM_SCRIPT, // 23, MALAYALAM 302 SINHALA_SCRIPT, // 24, SINHALA 303 THAI_SCRIPT, // 25, THAI 304 LAO_SCRIPT, // 26, LAO 305 TIBETAN_SCRIPT, // 27, TIBETAN 306 MYANMAR_SCRIPT, // 28, MYANMAR 307 GEORGIAN_SCRIPT, // 29, GEORGIAN 308 JAMO_SCRIPT, // 30, HANGUL_JAMO 309 ETHIOPIC_SCRIPT, // 31, ETHIOPIC 310 CHEROKEE_SCRIPT, // 32, CHEROKEE 311 ABORIGINAL_SCRIPT, // 33, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 312 OGHAM_SCRIPT, // 34, OGHAM 313 RUNIC_SCRIPT, // 35, RUNIC 314 KHMER_SCRIPT, // 36, KHMER 315 MONGOLIAN_SCRIPT, // 37, MONGOLIAN 316 LATIN_SCRIPT, // 38, LATIN_EXTENDED_ADDITIONAL 317 GREEK_SCRIPT, // 39, GREEK_EXTENDED 318 COMMON_SCRIPT, // 40, GENERAL_PUNCTUATION 319 COMMON_SCRIPT, // 41, SUPERSCRIPTS_AND_SUBSCRIPTS 320 COMMON_SCRIPT, // 42, CURRENCY_SYMBOLS 321 COMMON_SCRIPT, // 43, COMBINING_MARKS_FOR_SYMBOLS 322 COMMON_SCRIPT, // 44, LETTERLIKE_SYMBOLS 323 COMMON_SCRIPT, // 45, NUMBER_FORMS 324 COMMON_SCRIPT, // 46, ARROWS 325 COMMON_SCRIPT, // 47, MATHEMATICAL_OPERATORS 326 COMMON_SCRIPT, // 48, MISCELLANEOUS_TECHNICAL 327 COMMON_SCRIPT, // 49, CONTROL_PICTURES 328 COMMON_SCRIPT, // 50, OPTICAL_CHARACTER_RECOGNITION 329 COMMON_SCRIPT, // 51, ENCLOSED_ALPHANUMERICS 330 COMMON_SCRIPT, // 52, BOX_DRAWING 331 COMMON_SCRIPT, // 53, BLOCK_ELEMENTS 332 COMMON_SCRIPT, // 54, GEOMETRIC_SHAPES 333 COMMON_SCRIPT, // 55, MISCELLANEOUS_SYMBOLS 334 COMMON_SCRIPT, // 56, DINGBATS 335 COMMON_SCRIPT, // 57, BRAILLE_PATTERNS 336 HAN_SCRIPT, // 58, CJK_RADICALS_SUPPLEMENT 337 HAN_SCRIPT, // 59, KANGXI_RADICALS 338 HAN_SCRIPT, // 60, IDEOGRAPHIC_DESCRIPTION_CHARACTERS 339 COMMON_SCRIPT, // 61, CJK_SYMBOLS_AND_PUNCTUATION 340 HIRAGANA_SCRIPT, // 62, HIRAGANA 341 KATAKANA_SCRIPT, // 63, KATAKANA 342 BOPOMOFO_SCRIPT, // 64, BOPOMOFO 343 JAMO_SCRIPT, // 65, HANGUL_COMPATIBILITY_JAMO 344 HAN_SCRIPT, // 66, KANBUN 345 BOPOMOFO_SCRIPT, // 67, BOPOMOFO_EXTENDED 346 COMMON_SCRIPT, // 68, ENCLOSED_CJK_LETTERS_AND_MONTHS 347 COMMON_SCRIPT, // 69, CJK_COMPATIBILITY 348 HAN_SCRIPT, // 70, CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 349 HAN_SCRIPT, // 71, CJK_UNIFIED_IDEOGRAPHS 350 YI_SCRIPT, // 72, YI_SYLLABLES 351 YI_SCRIPT, // 73, YI_RADICALS 352 HANGUL_SCRIPT, // 74, HANGUL_SYLLABLES 353 COMMON_SCRIPT, // 75, HIGH_SURROGATES 354 COMMON_SCRIPT, // 76, HIGH_PRIVATE_USE_SURROGATES 355 COMMON_SCRIPT, // 77, LOW_SURROGATES 356 COMMON_SCRIPT, // 78, PRIVATE_USE 357 HAN_SCRIPT, // 79, CJK_COMPATIBILITY_IDEOGRAPHS 358 COMMON_SCRIPT, // 80, ALPHABETIC_PRESENTATION_FORMS 359 ARABIC_SCRIPT, // 81, ARABIC_PRESENTATION_FORMS_A 360 COMMON_SCRIPT, // 82, COMBINING_HALF_MARKS 361 COMMON_SCRIPT, // 83, CJK_COMPATIBILITY_FORMS 362 COMMON_SCRIPT, // 84, SMALL_FORM_VARIANTS 363 ARABIC_SCRIPT, // 85, ARABIC_PRESENTATION_FORMS_B 364 COMMON_SCRIPT, // 86, SPECIALS 365 COMMON_SCRIPT, // 87, HALFWIDTH_AND_FULLWIDTH_FORMS 366 COMMON_SCRIPT, // 88, SPECIALS 367 }; 368 369 // could be further reduced to a byte array, but I didn't bother. 370 static final int[][] split = { 371 {0x0250, 4, 5}, // -1 372 {0x02B0, 5, 6}, // -2 373 {0x0370, 7, 8}, // -3 374 {0x0530, 0, 10}, // -4 375 {0x0590, 10, 11}, // -5 376 {0x0750, 13, 0}, // -6 377 {0x07C0, 14, 0}, // -7 378 {0x10A0, 28, 29}, // -8 379 {0x13A0, 0, 32}, // -9 380 {0x16A0, 34, 35}, // -10 381 {0x18B0, 37, 0}, // -11 382 {0x2070, 40, 41}, // -12 383 {0x20A0, 41, -31}, // -13 384 {0x2150, 44, 45}, // -14 385 {0x2190, 45, 46}, // -15 386 {0x2440, 49, -32}, // -16 387 {0x25A0, 53, 54}, // -17 388 {0x27C0, 56, 0}, // -18 389 {0x2FE0, 59, -33}, // -19 390 {0x3040, 61, 62}, // -20 391 {0x30A0, 62, 63}, // -21 392 {0x3130, 64, 65}, // -22 393 {0x3190, 65, -34}, // -23 394 {0x4DB6, 70, 0}, // -24 395 {0xA490, 72, -35}, // -25 396 {0xD7A4, 74, 0}, // -26 397 {0xFB50, 80, 81}, // -27 398 {0xFE20, 0, -36}, // -28 399 {0xFEFF, 85, 86}, // -29 400 {0xFFF0, 87, -37}, // -30 401 {0x20D0, 42, 43}, // -31 402 {0x2460, 50, 51}, // -32 403 {0x2FF0, 0, 60}, // -33 404 {0x31A0, 66, -38}, // -34 405 {0xA4D0, 73, 0}, //-35 406 {0xFE30, 82, -39}, //-36 407 {0xFFFE, 88, 0}, //-37 408 {0x31C0, 67, 0}, // -38 409 {0xFE50, 83, -40}, //-39 410 {0xFE70, 84, 85} // -40 411 }; 412 413 static final byte[] charToBlock = { 414 1, 2, 3, 4, -1, -2, -3, 8, 9, 9, -4, -5, 12, 12, -6, -7, 415 0, 0, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 27, 416 28, -8, 30, 30, 31, 31, 31, -9, 33, 33, 33, 33, 33, -10, 0, 36, 417 37, -11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 38, 39, 39, 418 -12, -13, -14, -15, 47, 47, 48, 48, -16, 51, 52, -17, 55, 55, 56, -18, 419 57, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 59, -19, 420 -20, -21, -22, -23, 68, 68, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 421 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 422 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 423 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, -24, 71, 71, 71, 71, 424 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 425 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 426 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 427 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 428 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 429 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 430 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 431 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 432 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 433 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 434 72, 72, 72, 72, 72, 72, 72, 72, 72, -25, 0, 0, 0, 0, 0, 0, 435 0, 0, 0, 0, 0, 0, 0, 0, 74, 74, 74, 74, 74, 74, 74, 74, 436 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 437 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 438 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 439 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 440 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, -26, 441 75, 75, 75, 75, 75, 75, 75, 76, 77, 77, 77, 77, 77, 77, 77, 77, 442 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 443 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 444 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 445 78, 78, 79, 79, 79, 79, -27, 81, 81, 81, 81, 81, -28, -29, 87, -30 446 }; 447 */ 448} 449