1/* GENERATED SOURCE. DO NOT MODIFY. */ 2/* 3 ******************************************************************************* 4 * Copyright (C) 1996-2012, International Business Machines Corporation and * 5 * others. All Rights Reserved. * 6 ******************************************************************************* 7 */ 8package android.icu.dev.test.translit; 9 10import java.util.ArrayList; 11import java.util.Enumeration; 12import java.util.HashMap; 13import java.util.HashSet; 14import java.util.Iterator; 15import java.util.List; 16import java.util.Locale; 17import java.util.Map.Entry; 18 19import android.icu.dev.test.TestFmwk; 20import android.icu.dev.test.TestUtil; 21import android.icu.dev.util.UnicodeMap; 22import android.icu.impl.Utility; 23import android.icu.impl.UtilityExtensions; 24import android.icu.lang.CharSequences; 25import android.icu.lang.UCharacter; 26import android.icu.lang.UScript; 27import android.icu.text.CanonicalIterator; 28import android.icu.text.Normalizer2; 29import android.icu.text.Replaceable; 30import android.icu.text.ReplaceableString; 31import android.icu.text.StringTransform; 32import android.icu.text.Transliterator; 33import android.icu.text.UTF16; 34import android.icu.text.UnicodeFilter; 35import android.icu.text.UnicodeSet; 36import android.icu.text.UnicodeSetIterator; 37import android.icu.util.CaseInsensitiveString; 38import android.icu.util.ULocale; 39import org.junit.runner.RunWith; 40import android.icu.junit.IcuTestFmwkRunner; 41 42/*********************************************************************** 43 44 HOW TO USE THIS TEST FILE 45 -or- 46 How I developed on two platforms 47 without losing (too much of) my mind 48 49 501. Add new tests by copying/pasting/changing existing tests. On Java, 51 any public void method named Test...() taking no parameters becomes 52 a test. On C++, you need to modify the header and add a line to 53 the runIndexedTest() dispatch method. 54 552. Make liberal use of the expect() method; it is your friend. 56 573. The tests in this file exactly match those in a sister file on the 58 other side. The two files are: 59 60 icu4j: src/android.icu.dev.test/translit/TransliteratorTest.java 61 icu4c: source/test/intltest/transtst.cpp 62 63 ==> THIS IS THE IMPORTANT PART <== 64 65 When you add a test in this file, add it in transtst.cpp too. 66 Give it the same name and put it in the same relative place. This 67 makes maintenance a lot simpler for any poor soul who ends up 68 trying to synchronize the tests between icu4j and icu4c. 69 704. If you MUST enter a test that is NOT paralleled in the sister file, 71 then add it in the special non-mirrored section. These are 72 labeled 73 74 "icu4j ONLY" 75 76 or 77 78 "icu4c ONLY" 79 80 Make sure you document the reason the test is here and not there. 81 82 83Thank you. 84The Management 85 ***********************************************************************/ 86 87/** 88 * @test 89 * @summary General test of Transliterator 90 */ 91@RunWith(IcuTestFmwkRunner.class) 92public class TransliteratorTest extends TestFmwk { 93 94 public static void main(String[] args) throws Exception { 95 new TransliteratorTest().run(args); 96 } 97 98 public void TestHangul() { 99 100 Transliterator lh = Transliterator.getInstance("Latin-Hangul"); 101 Transliterator hl = lh.getInverse(); 102 103 assertTransform("Transform", "\uCE20", lh, "ch"); 104 105 assertTransform("Transform", "\uC544\uB530", lh, hl, "atta", "a-tta"); 106 assertTransform("Transform", "\uC544\uBE60", lh, hl, "appa", "a-ppa"); 107 assertTransform("Transform", "\uC544\uC9DC", lh, hl, "ajja", "a-jja"); 108 assertTransform("Transform", "\uC544\uAE4C", lh, hl, "akka", "a-kka"); 109 assertTransform("Transform", "\uC544\uC2F8", lh, hl, "assa", "a-ssa"); 110 assertTransform("Transform", "\uC544\uCC28", lh, hl, "acha", "a-cha"); 111 assertTransform("Transform", "\uC545\uC0AC", lh, hl, "agsa", "ag-sa"); 112 assertTransform("Transform", "\uC548\uC790", lh, hl, "anja", "an-ja"); 113 assertTransform("Transform", "\uC548\uD558", lh, hl, "anha", "an-ha"); 114 assertTransform("Transform", "\uC54C\uAC00", lh, hl, "alga", "al-ga"); 115 assertTransform("Transform", "\uC54C\uB9C8", lh, hl, "alma", "al-ma"); 116 assertTransform("Transform", "\uC54C\uBC14", lh, hl, "alba", "al-ba"); 117 assertTransform("Transform", "\uC54C\uC0AC", lh, hl, "alsa", "al-sa"); 118 assertTransform("Transform", "\uC54C\uD0C0", lh, hl, "alta", "al-ta"); 119 assertTransform("Transform", "\uC54C\uD30C", lh, hl, "alpa", "al-pa"); 120 assertTransform("Transform", "\uC54C\uD558", lh, hl, "alha", "al-ha"); 121 assertTransform("Transform", "\uC555\uC0AC", lh, hl, "absa", "ab-sa"); 122 assertTransform("Transform", "\uC548\uAC00", lh, hl, "anga", "an-ga"); 123 assertTransform("Transform", "\uC545\uC2F8", lh, hl, "agssa", "ag-ssa"); 124 assertTransform("Transform", "\uC548\uC9DC", lh, hl, "anjja", "an-jja"); 125 assertTransform("Transform", "\uC54C\uC2F8", lh, hl, "alssa", "al-ssa"); 126 assertTransform("Transform", "\uC54C\uB530", lh, hl, "altta", "al-tta"); 127 assertTransform("Transform", "\uC54C\uBE60", lh, hl, "alppa", "al-ppa"); 128 assertTransform("Transform", "\uC555\uC2F8", lh, hl, "abssa", "ab-ssa"); 129 assertTransform("Transform", "\uC546\uCE74", lh, hl, "akkka", "akk-ka"); 130 assertTransform("Transform", "\uC558\uC0AC", lh, hl, "asssa", "ass-sa"); 131 132 } 133 134 public void TestChinese() { 135 Transliterator hanLatin = Transliterator.getInstance("Han-Latin"); 136 assertTransform("Transform", "z\u00E0o Unicode", hanLatin, "\u9020Unicode"); 137 assertTransform("Transform", "z\u00E0i chu\u00E0ng z\u00E0o Unicode zh\u012B qi\u00E1n", hanLatin, "\u5728\u5275\u9020Unicode\u4E4B\u524D"); 138 } 139 140 public void TestRegistry() { 141 checkRegistry("foo3", "::[a-z]; ::NFC; [:letter:] a > b;"); // check compound 142 checkRegistry("foo2", "::NFC; [:letter:] a > b;"); // check compound 143 checkRegistry("foo1", "[:letter:] a > b;"); 144 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) { 145 String id = (String) e.nextElement(); 146 checkRegistry(id); 147 } 148 } 149 150 private void checkRegistry (String id, String rules) { 151 Transliterator foo = Transliterator.createFromRules(id, rules, Transliterator.FORWARD); 152 Transliterator.registerInstance(foo); 153 checkRegistry(id); 154 } 155 156 private void checkRegistry(String id) { 157 Transliterator fie = Transliterator.getInstance(id); 158 final UnicodeSet fae = new UnicodeSet("[a-z5]"); 159 fie.setFilter(fae); 160 Transliterator foe = Transliterator.getInstance(id); 161 UnicodeFilter fee = foe.getFilter(); 162 if (fae.equals(fee)) { 163 errln("Changed what is in registry for " + id); 164 } 165 } 166 167 public void TestInstantiation() { 168 long ms = System.currentTimeMillis(); 169 String ID; 170 for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) { 171 ID = (String) e.nextElement(); 172 if (ID.equals("Latin-Han/definition")) { 173 System.out.println("\nTODO: disabling Latin-Han/definition check for now: fix later"); 174 continue; 175 } 176 Transliterator t = null; 177 try { 178 t = Transliterator.getInstance(ID); 179 // This is only true for some subclasses 180 // // We should get a new instance if we try again 181 // Transliterator t2 = Transliterator.getInstance(ID); 182 // if (t != t2) { 183 // logln("OK: " + Transliterator.getDisplayName(ID) + " (" + ID + "): " + t); 184 // } else { 185 // errln("FAIL: " + ID + " returned identical instances"); 186 // t = null; 187 // } 188 } catch (IllegalArgumentException ex) { 189 errln("FAIL: " + ID); 190 throw ex; 191 } 192 193 // if (t.getFilter() != null) { 194 // errln("Fail: Should never have filter on transliterator unless we started with one: " + ID + ", " + t.getFilter()); 195 // } 196 197 if (t != null) { 198 // Now test toRules 199 String rules = null; 200 try { 201 rules = t.toRules(true); 202 203 Transliterator.createFromRules("x", rules, Transliterator.FORWARD); 204 } catch (IllegalArgumentException ex2) { 205 errln("FAIL: " + ID + ".toRules() => bad rules: " + 206 rules); 207 throw ex2; 208 } 209 } 210 } 211 212 // Now test the failure path 213 try { 214 ID = "<Not a valid Transliterator ID>"; 215 Transliterator t = Transliterator.getInstance(ID); 216 errln("FAIL: " + ID + " returned " + t); 217 } catch (IllegalArgumentException ex) { 218 logln("OK: Bogus ID handled properly"); 219 } 220 221 ms = System.currentTimeMillis() - ms; 222 logln("Elapsed time: " + ms + " ms"); 223 } 224 225 public void TestSimpleRules() { 226 /* Example: rules 1. ab>x|y 227 * 2. yc>z 228 * 229 * []|eabcd start - no match, copy e to tranlated buffer 230 * [e]|abcd match rule 1 - copy output & adjust cursor 231 * [ex|y]cd match rule 2 - copy output & adjust cursor 232 * [exz]|d no match, copy d to transliterated buffer 233 * [exzd]| done 234 */ 235 expect("ab>x|y;" + 236 "yc>z", 237 "eabcd", "exzd"); 238 239 /* Another set of rules: 240 * 1. ab>x|yzacw 241 * 2. za>q 242 * 3. qc>r 243 * 4. cw>n 244 * 245 * []|ab Rule 1 246 * [x|yzacw] No match 247 * [xy|zacw] Rule 2 248 * [xyq|cw] Rule 4 249 * [xyqn]| Done 250 */ 251 expect("ab>x|yzacw;" + 252 "za>q;" + 253 "qc>r;" + 254 "cw>n", 255 "ab", "xyqn"); 256 257 /* Test categories 258 */ 259 Transliterator t = Transliterator.createFromRules("<ID>", 260 "$dummy=\uE100;" + 261 "$vowel=[aeiouAEIOU];" + 262 "$lu=[:Lu:];" + 263 "$vowel } $lu > '!';" + 264 "$vowel > '&';" + 265 "'!' { $lu > '^';" + 266 "$lu > '*';" + 267 "a>ERROR", 268 Transliterator.FORWARD); 269 expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&"); 270 } 271 272 /** 273 * Test inline set syntax and set variable syntax. 274 */ 275 public void TestInlineSet() { 276 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz"); 277 expect("a[0-9]b > qrs", "1a7b9", "1qrs9"); 278 279 expect("$digit = [0-9];" + 280 "$alpha = [a-zA-Z];" + 281 "$alphanumeric = [$digit $alpha];" + // *** 282 "$special = [^$alphanumeric];" + // *** 283 "$alphanumeric > '-';" + 284 "$special > '*';", 285 286 "thx-1138", "---*----"); 287 } 288 289 /** 290 * Create some inverses and confirm that they work. We have to be 291 * careful how we do this, since the inverses will not be true 292 * inverses -- we can't throw any random string at the composition 293 * of the transliterators and expect the identity function. F x 294 * F' != I. However, if we are careful about the input, we will 295 * get the expected results. 296 */ 297 public void TestRuleBasedInverse() { 298 String RULES = 299 "abc>zyx;" + 300 "ab>yz;" + 301 "bc>zx;" + 302 "ca>xy;" + 303 "a>x;" + 304 "b>y;" + 305 "c>z;" + 306 307 "abc<zyx;" + 308 "ab<yz;" + 309 "bc<zx;" + 310 "ca<xy;" + 311 "a<x;" + 312 "b<y;" + 313 "c<z;" + 314 315 ""; 316 317 String[] DATA = { 318 // Careful here -- random strings will not work. If we keep 319 // the left side to the domain and the right side to the range 320 // we will be okay though (left, abc; right xyz). 321 "a", "x", 322 "abcacab", "zyxxxyy", 323 "caccb", "xyzzy", 324 }; 325 326 Transliterator fwd = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD); 327 Transliterator rev = Transliterator.createFromRules("<ID>", RULES, Transliterator.REVERSE); 328 for (int i=0; i<DATA.length; i+=2) { 329 expect(fwd, DATA[i], DATA[i+1]); 330 expect(rev, DATA[i+1], DATA[i]); 331 } 332 } 333 334 /** 335 * Basic test of keyboard. 336 */ 337 public void TestKeyboard() { 338 Transliterator t = Transliterator.createFromRules("<ID>", 339 "psch>Y;" 340 +"ps>y;" 341 +"ch>x;" 342 +"a>A;", Transliterator.FORWARD); 343 String DATA[] = { 344 // insertion, buffer 345 "a", "A", 346 "p", "Ap", 347 "s", "Aps", 348 "c", "Apsc", 349 "a", "AycA", 350 "psch", "AycAY", 351 null, "AycAY", // null means finishKeyboardTransliteration 352 }; 353 354 keyboardAux(t, DATA); 355 } 356 357 /** 358 * Basic test of keyboard with cursor. 359 */ 360 public void TestKeyboard2() { 361 Transliterator t = Transliterator.createFromRules("<ID>", 362 "ych>Y;" 363 +"ps>|y;" 364 +"ch>x;" 365 +"a>A;", Transliterator.FORWARD); 366 String DATA[] = { 367 // insertion, buffer 368 "a", "A", 369 "p", "Ap", 370 "s", "Aps", // modified for rollback - "Ay", 371 "c", "Apsc", // modified for rollback - "Ayc", 372 "a", "AycA", 373 "p", "AycAp", 374 "s", "AycAps", // modified for rollback - "AycAy", 375 "c", "AycApsc", // modified for rollback - "AycAyc", 376 "h", "AycAY", 377 null, "AycAY", // null means finishKeyboardTransliteration 378 }; 379 380 keyboardAux(t, DATA); 381 } 382 383 /** 384 * Test keyboard transliteration with back-replacement. 385 */ 386 public void TestKeyboard3() { 387 // We want th>z but t>y. Furthermore, during keyboard 388 // transliteration we want t>y then yh>z if t, then h are 389 // typed. 390 String RULES = 391 "t>|y;" + 392 "yh>z;" + 393 ""; 394 395 String[] DATA = { 396 // Column 1: characters to add to buffer (as if typed) 397 // Column 2: expected appearance of buffer after 398 // keyboard xliteration. 399 "a", "a", 400 "b", "ab", 401 "t", "abt", // modified for rollback - "aby", 402 "c", "abyc", 403 "t", "abyct", // modified for rollback - "abycy", 404 "h", "abycz", 405 null, "abycz", // null means finishKeyboardTransliteration 406 }; 407 408 Transliterator t = Transliterator.createFromRules("<ID>", RULES, Transliterator.FORWARD); 409 keyboardAux(t, DATA); 410 } 411 412 private void keyboardAux(Transliterator t, String[] DATA) { 413 Transliterator.Position index = new Transliterator.Position(); 414 ReplaceableString s = new ReplaceableString(); 415 for (int i=0; i<DATA.length; i+=2) { 416 StringBuffer log; 417 if (DATA[i] != null) { 418 log = new StringBuffer(s.toString() + " + " 419 + DATA[i] 420 + " -> "); 421 t.transliterate(s, index, DATA[i]); 422 } else { 423 log = new StringBuffer(s.toString() + " => "); 424 t.finishTransliteration(s, index); 425 } 426 UtilityExtensions.formatInput(log, s, index); 427 if (s.toString().equals(DATA[i+1])) { 428 logln(log.toString()); 429 } else { 430 errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]); 431 } 432 } 433 } 434 435 // Latin-Arabic has been temporarily removed until it can be 436 // done correctly. 437 438 // public void TestArabic() { 439 // String DATA[] = { 440 // "Arabic", 441 // "\u062a\u062a\u0645\u062a\u0639 "+ 442 // "\u0627\u0644\u0644\u063a\u0629 "+ 443 // "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629 "+ 444 // "\u0628\u0628\u0646\u0638\u0645 "+ 445 // "\u0643\u062a\u0627\u0628\u0628\u064a\u0629 "+ 446 // "\u062c\u0645\u064a\u0644\u0629" 447 // }; 448 449 // Transliterator t = Transliterator.getInstance("Latin-Arabic"); 450 // for (int i=0; i<DATA.length; i+=2) { 451 // expect(t, DATA[i], DATA[i+1]); 452 // } 453 // } 454 455 /** 456 * Compose the Kana transliterator forward and reverse and try 457 * some strings that should come out unchanged. 458 */ 459 public void TestCompoundKana() { 460 Transliterator t = Transliterator.getInstance("Latin-Katakana;Katakana-Latin"); 461 expect(t, "aaaaa", "aaaaa"); 462 } 463 464 /** 465 * Compose the hex transliterators forward and reverse. 466 */ 467 public void TestCompoundHex() { 468 Transliterator a = Transliterator.getInstance("Any-Hex"); 469 Transliterator b = Transliterator.getInstance("Hex-Any"); 470 // Transliterator[] trans = { a, b }; 471 // Transliterator ab = Transliterator.getInstance(trans); 472 Transliterator ab = Transliterator.getInstance("Any-Hex;Hex-Any"); 473 474 // Do some basic tests of b 475 expect(b, "\\u0030\\u0031", "01"); 476 477 String s = "abcde"; 478 expect(ab, s, s); 479 480 // trans = new Transliterator[] { b, a }; 481 // Transliterator ba = Transliterator.getInstance(trans); 482 Transliterator ba = Transliterator.getInstance("Hex-Any;Any-Hex"); 483 ReplaceableString str = new ReplaceableString(s); 484 a.transliterate(str); 485 expect(ba, str.toString(), str.toString()); 486 } 487 488 /** 489 * Do some basic tests of filtering. 490 */ 491 public void TestFiltering() { 492 493 Transliterator tempTrans = Transliterator.createFromRules("temp", "x > y; x{a} > b; ", Transliterator.FORWARD); 494 tempTrans.setFilter(new UnicodeSet("[a]")); 495 String tempResult = tempTrans.transform("xa"); 496 assertEquals("context should not be filtered ", "xb", tempResult); 497 498 tempTrans = Transliterator.createFromRules("temp", "::[a]; x > y; x{a} > b; ", Transliterator.FORWARD); 499 tempResult = tempTrans.transform("xa"); 500 assertEquals("context should not be filtered ", "xb", tempResult); 501 502 Transliterator hex = Transliterator.getInstance("Any-Hex"); 503 hex.setFilter(new UnicodeFilter() { 504 public boolean contains(int c) { 505 return c != 'c'; 506 } 507 public String toPattern(boolean escapeUnprintable) { 508 return ""; 509 } 510 public boolean matchesIndexValue(int v) { 511 return false; 512 } 513 public void addMatchSetTo(UnicodeSet toUnionTo) {} 514 }); 515 String s = "abcde"; 516 String out = hex.transliterate(s); 517 String exp = "\\u0061\\u0062c\\u0064\\u0065"; 518 if (out.equals(exp)) { 519 logln("Ok: \"" + exp + "\""); 520 } else { 521 logln("FAIL: \"" + out + "\", wanted \"" + exp + "\""); 522 } 523 } 524 525 /** 526 * Test anchors 527 */ 528 public void TestAnchors() { 529 expect("^ab > 01 ;" + 530 " ab > |8 ;" + 531 " b > k ;" + 532 " 8x$ > 45 ;" + 533 " 8x > 77 ;", 534 535 "ababbabxabx", 536 "018k7745"); 537 expect("$s = [z$] ;" + 538 "$s{ab > 01 ;" + 539 " ab > |8 ;" + 540 " b > k ;" + 541 " 8x}$s > 45 ;" + 542 " 8x > 77 ;", 543 544 "abzababbabxzabxabx", 545 "01z018k45z01x45"); 546 } 547 548 /** 549 * Test pattern quoting and escape mechanisms. 550 */ 551 public void TestPatternQuoting() { 552 // Array of 3n items 553 // Each item is <rules>, <input>, <expected output> 554 String[] DATA = { 555 "\u4E01>'[male adult]'", "\u4E01", "[male adult]", 556 }; 557 558 for (int i=0; i<DATA.length; i+=3) { 559 logln("Pattern: " + Utility.escape(DATA[i])); 560 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 561 expect(t, DATA[i+1], DATA[i+2]); 562 } 563 } 564 565 public void TestVariableNames() { 566 Transliterator gl = Transliterator.createFromRules("foo5", "$\u2DC0 = qy; a>b;", Transliterator.FORWARD); 567 if (gl == null) { 568 errln("FAIL: null Transliterator returned."); 569 } 570 } 571 572 /** 573 * Regression test for bugs found in Greek transliteration. 574 */ 575 public void TestJ277() { 576 Transliterator gl = Transliterator.getInstance("Greek-Latin; NFD; [:M:]Remove; NFC"); 577 578 char sigma = (char)0x3C3; 579 char upsilon = (char)0x3C5; 580 char nu = (char)0x3BD; 581 // not used char PHI = (char)0x3A6; 582 char alpha = (char)0x3B1; 583 // not used char omega = (char)0x3C9; 584 // not used char omicron = (char)0x3BF; 585 // not used char epsilon = (char)0x3B5; 586 587 // sigma upsilon nu -> syn 588 StringBuffer buf = new StringBuffer(); 589 buf.append(sigma).append(upsilon).append(nu); 590 String syn = buf.toString(); 591 expect(gl, syn, "syn"); 592 593 // sigma alpha upsilon nu -> saun 594 buf.setLength(0); 595 buf.append(sigma).append(alpha).append(upsilon).append(nu); 596 String sayn = buf.toString(); 597 expect(gl, sayn, "saun"); 598 599 // Again, using a smaller rule set 600 String rules = 601 "$alpha = \u03B1;" + 602 "$nu = \u03BD;" + 603 "$sigma = \u03C3;" + 604 "$ypsilon = \u03C5;" + 605 "$vowel = [aeiouAEIOU$alpha$ypsilon];" + 606 "s <> $sigma;" + 607 "a <> $alpha;" + 608 "u <> $vowel { $ypsilon;" + 609 "y <> $ypsilon;" + 610 "n <> $nu;"; 611 Transliterator mini = Transliterator.createFromRules 612 ("mini", rules, Transliterator.REVERSE); 613 expect(mini, syn, "syn"); 614 expect(mini, sayn, "saun"); 615 616 //| // Transliterate the Greek locale data 617 //| Locale el("el"); 618 //| DateFormatSymbols syms(el, status); 619 //| if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; } 620 //| int32_t i, count; 621 //| const UnicodeString* data = syms.getMonths(count); 622 //| for (i=0; i<count; ++i) { 623 //| if (data[i].length() == 0) { 624 //| continue; 625 //| } 626 //| UnicodeString out(data[i]); 627 //| gl->transliterate(out); 628 //| bool_t ok = TRUE; 629 //| if (data[i].length() >= 2 && out.length() >= 2 && 630 //| u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) { 631 //| if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) { 632 //| ok = FALSE; 633 //| } 634 //| } 635 //| if (ok) { 636 //| logln(prettify(data[i] + " -> " + out)); 637 //| } else { 638 //| errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out)); 639 //| } 640 //| } 641 } 642 643 // /** 644 // * Prefix, suffix support in hex transliterators 645 // */ 646 // public void TestJ243() { 647 // // Test default Hex-Any, which should handle 648 // // \\u, \\U, u+, and U+ 649 // HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator(); 650 // expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz"); 651 // 652 // // Try a custom Hex-Any 653 // // \\uXXXX and &#xXXXX; 654 // HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;"); 655 // expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x0123", 656 // "abcd5fx0123"); 657 // 658 // // Try custom Any-Hex (default is tested elsewhere) 659 // UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;"); 660 // expect(hex3, "012", "012"); 661 // } 662 663 public void TestJ329() { 664 665 Object[] DATA = { 666 Boolean.FALSE, "a > b; c > d", 667 Boolean.TRUE, "a > b; no operator; c > d", 668 }; 669 670 for (int i=0; i<DATA.length; i+=2) { 671 String err = null; 672 try { 673 Transliterator.createFromRules("<ID>", 674 (String) DATA[i+1], 675 Transliterator.FORWARD); 676 } catch (IllegalArgumentException e) { 677 err = e.getMessage(); 678 } 679 boolean gotError = (err != null); 680 String desc = (String) DATA[i+1] + 681 (gotError ? (" -> error: " + err) : " -> no error"); 682 if ((err != null) == ((Boolean)DATA[i]).booleanValue()) { 683 logln("Ok: " + desc); 684 } else { 685 errln("FAIL: " + desc); 686 } 687 } 688 } 689 690 /** 691 * Test segments and segment references. 692 */ 693 public void TestSegments() { 694 // Array of 3n items 695 // Each item is <rules>, <input>, <expected output> 696 String[] DATA = { 697 "([a-z]) '.' ([0-9]) > $2 '-' $1", 698 "abc.123.xyz.456", 699 "ab1-c23.xy4-z56", 700 }; 701 702 for (int i=0; i<DATA.length; i+=3) { 703 logln("Pattern: " + Utility.escape(DATA[i])); 704 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 705 expect(t, DATA[i+1], DATA[i+2]); 706 } 707 } 708 709 /** 710 * Test cursor positioning outside of the key 711 */ 712 public void TestCursorOffset() { 713 // Array of 3n items 714 // Each item is <rules>, <input>, <expected output> 715 String[] DATA = { 716 "pre {alpha} post > | @ ALPHA ;" + 717 "eALPHA > beta ;" + 718 "pre {beta} post > BETA @@ | ;" + 719 "post > xyz", 720 721 "prealphapost prebetapost", 722 "prbetaxyz preBETApost", 723 }; 724 725 for (int i=0; i<DATA.length; i+=3) { 726 logln("Pattern: " + Utility.escape(DATA[i])); 727 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 728 expect(t, DATA[i+1], DATA[i+2]); 729 } 730 } 731 732 /** 733 * Test zero length and > 1 char length variable values. Test 734 * use of variable refs in UnicodeSets. 735 */ 736 public void TestArbitraryVariableValues() { 737 // Array of 3n items 738 // Each item is <rules>, <input>, <expected output> 739 String[] DATA = { 740 "$abe = ab;" + 741 "$pat = x[yY]z;" + 742 "$ll = 'a-z';" + 743 "$llZ = [$ll];" + 744 "$llY = [$ll$pat];" + 745 "$emp = ;" + 746 747 "$abe > ABE;" + 748 "$pat > END;" + 749 "$llZ > 1;" + 750 "$llY > 2;" + 751 "7$emp 8 > 9;" + 752 "", 753 754 "ab xYzxyz stY78", 755 "ABE ENDEND 1129", 756 }; 757 758 for (int i=0; i<DATA.length; i+=3) { 759 logln("Pattern: " + Utility.escape(DATA[i])); 760 Transliterator t = Transliterator.createFromRules("<ID>", DATA[i], Transliterator.FORWARD); 761 expect(t, DATA[i+1], DATA[i+2]); 762 } 763 } 764 765 /** 766 * Confirm that the contextStart, contextLimit, start, and limit 767 * behave correctly. 768 */ 769 public void TestPositionHandling() { 770 // Array of 3n items 771 // Each item is <rules>, <input>, <expected output> 772 String[] DATA = { 773 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 774 "xtat txtb", // pos 0,9,0,9 775 "xTTaSS TTxUUb", 776 777 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 778 "xtat txtb", // pos 2,9,3,8 779 "xtaSS TTxUUb", 780 781 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 782 "xtat txtb", // pos 3,8,3,8 783 "xtaTT TTxTTb", 784 }; 785 786 // Array of 4n positions -- these go with the DATA array 787 // They are: contextStart, contextLimit, start, limit 788 int[] POS = { 789 0, 9, 0, 9, 790 2, 9, 3, 8, 791 3, 8, 3, 8, 792 }; 793 794 int n = DATA.length/3; 795 for (int i=0; i<n; i++) { 796 Transliterator t = Transliterator.createFromRules("<ID>", DATA[3*i], Transliterator.FORWARD); 797 Transliterator.Position pos = new Transliterator.Position( 798 POS[4*i], POS[4*i+1], POS[4*i+2], POS[4*i+3]); 799 ReplaceableString rsource = new ReplaceableString(DATA[3*i+1]); 800 t.transliterate(rsource, pos); 801 t.finishTransliteration(rsource, pos); 802 String result = rsource.toString(); 803 String exp = DATA[3*i+2]; 804 expectAux(Utility.escape(DATA[3*i]), 805 DATA[3*i+1], 806 result, 807 result.equals(exp), 808 exp); 809 } 810 } 811 812 /** 813 * Test the Hiragana-Katakana transliterator. 814 */ 815 public void TestHiraganaKatakana() { 816 Transliterator hk = Transliterator.getInstance("Hiragana-Katakana"); 817 Transliterator kh = Transliterator.getInstance("Katakana-Hiragana"); 818 819 // Array of 3n items 820 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana> 821 String[] DATA = { 822 "both", 823 "\u3042\u3090\u3099\u3092\u3050", 824 "\u30A2\u30F8\u30F2\u30B0", 825 826 "kh", 827 "\u307C\u3051\u3060\u3042\u3093\u30FC", 828 "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC", 829 }; 830 831 for (int i=0; i<DATA.length; i+=3) { 832 switch (DATA[i].charAt(0)) { 833 case 'h': // Hiragana-Katakana 834 expect(hk, DATA[i+1], DATA[i+2]); 835 break; 836 case 'k': // Katakana-Hiragana 837 expect(kh, DATA[i+2], DATA[i+1]); 838 break; 839 case 'b': // both 840 expect(hk, DATA[i+1], DATA[i+2]); 841 expect(kh, DATA[i+2], DATA[i+1]); 842 break; 843 } 844 } 845 846 } 847 848 public void TestCopyJ476() { 849 // This is a C++-only copy constructor test 850 } 851 852 /** 853 * Test inter-Indic transliterators. These are composed. 854 */ 855 public void TestInterIndic() { 856 String ID = "Devanagari-Gujarati"; 857 Transliterator dg = Transliterator.getInstance(ID); 858 if (dg == null) { 859 errln("FAIL: getInstance(" + ID + ") returned null"); 860 return; 861 } 862 String id = dg.getID(); 863 if (!id.equals(ID)) { 864 errln("FAIL: getInstance(" + ID + ").getID() => " + id); 865 } 866 String dev = "\u0901\u090B\u0925"; 867 String guj = "\u0A81\u0A8B\u0AA5"; 868 expect(dg, dev, guj); 869 } 870 871 /** 872 * Test filter syntax in IDs. (J23) 873 */ 874 public void TestFilterIDs() { 875 String[] DATA = { 876 "[aeiou]Any-Hex", // ID 877 "[aeiou]Hex-Any", // expected inverse ID 878 "quizzical", // src 879 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src) 880 881 "[aeiou]Any-Hex;[^5]Hex-Any", 882 "[^5]Any-Hex;[aeiou]Hex-Any", 883 "quizzical", 884 "q\\u0075izzical", 885 886 "[abc]Null", 887 "[abc]Null", 888 "xyz", 889 "xyz", 890 }; 891 892 for (int i=0; i<DATA.length; i+=4) { 893 String ID = DATA[i]; 894 Transliterator t = Transliterator.getInstance(ID); 895 expect(t, DATA[i+2], DATA[i+3]); 896 897 // Check the ID 898 if (!ID.equals(t.getID())) { 899 errln("FAIL: getInstance(" + ID + ").getID() => " + 900 t.getID()); 901 } 902 903 // Check the inverse 904 String uID = DATA[i+1]; 905 Transliterator u = t.getInverse(); 906 if (u == null) { 907 errln("FAIL: " + ID + ".getInverse() returned NULL"); 908 } else if (!u.getID().equals(uID)) { 909 errln("FAIL: " + ID + ".getInverse().getID() => " + 910 u.getID() + ", expected " + uID); 911 } 912 } 913 } 914 915 /** 916 * Test the case mapping transliterators. 917 */ 918 public void TestCaseMap() { 919 Transliterator toUpper = 920 Transliterator.getInstance("Any-Upper[^xyzXYZ]"); 921 Transliterator toLower = 922 Transliterator.getInstance("Any-Lower[^xyzXYZ]"); 923 Transliterator toTitle = 924 Transliterator.getInstance("Any-Title[^xyzXYZ]"); 925 926 expect(toUpper, "The quick brown fox jumped over the lazy dogs.", 927 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS."); 928 expect(toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.", 929 "the quick brown foX jumped over the lazY dogs."); 930 expect(toTitle, "the quick brown foX caN'T jump over the laZy dogs.", 931 "The Quick Brown FoX Can't Jump Over The LaZy Dogs."); 932 } 933 934 /** 935 * Test the name mapping transliterators. 936 */ 937 public void TestNameMap() { 938 Transliterator uni2name = 939 Transliterator.getInstance("Any-Name[^abc]"); 940 Transliterator name2uni = 941 Transliterator.getInstance("Name-Any"); 942 943 expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF", 944 "\\N{NO-BREAK SPACE}abc\\N{CJK UNIFIED IDEOGRAPH-4E01}\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}"); 945 expect(name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{", 946 "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{"); 947 948 // round trip 949 Transliterator t = Transliterator.getInstance("Any-Name;Name-Any"); 950 951 String s = "{\u00A0abc\u4E01\\N{x\u00B5\u0A81\uFFFD\u0004\u0009\u0081\uFFFF\u0004\\N{"; 952 expect(t, s, s); 953 } 954 955 /** 956 * Test liberalized ID syntax. 1006c 957 */ 958 public void TestLiberalizedID() { 959 // Some test cases have an expected getID() value of NULL. This 960 // means I have disabled the test case for now. This stuff is 961 // still under development, and I haven't decided whether to make 962 // getID() return canonical case yet. It will all get rewritten 963 // with the move to Source-Target/Variant IDs anyway. [aliu] 964 String DATA[] = { 965 "latin-greek", null /*"Latin-Greek"*/, "case insensitivity", 966 " Null ", "Null", "whitespace", 967 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter", 968 " null ; latin-greek ", null /*"Null;Latin-Greek"*/, "compound whitespace", 969 }; 970 971 for (int i=0; i<DATA.length; i+=3) { 972 try { 973 Transliterator t = Transliterator.getInstance(DATA[i]); 974 if (DATA[i+1] == null || DATA[i+1].equals(t.getID())) { 975 logln("Ok: " + DATA[i+2] + 976 " create ID \"" + DATA[i] + "\" => \"" + 977 t.getID() + "\""); 978 } else { 979 errln("FAIL: " + DATA[i+2] + 980 " create ID \"" + DATA[i] + "\" => \"" + 981 t.getID() + "\", exp \"" + DATA[i+1] + "\""); 982 } 983 } catch (IllegalArgumentException e) { 984 errln("FAIL: " + DATA[i+2] + 985 " create ID \"" + DATA[i] + "\""); 986 } 987 } 988 } 989 990 public void TestCreateInstance() { 991 String FORWARD = "F"; 992 String REVERSE = "R"; 993 String DATA[] = { 994 // Column 1: id 995 // Column 2: direction 996 // Column 3: expected ID, or "" if expect failure 997 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912 998 999 // JB#2689: bad compound causes crash 1000 "InvalidSource-InvalidTarget", FORWARD, "", 1001 "InvalidSource-InvalidTarget", REVERSE, "", 1002 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "", 1003 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "", 1004 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "", 1005 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "", 1006 1007 null 1008 }; 1009 1010 for (int i=0; DATA[i]!=null; i+=3) { 1011 String id=DATA[i]; 1012 int dir = (DATA[i+1]==FORWARD)? 1013 Transliterator.FORWARD:Transliterator.REVERSE; 1014 String expID=DATA[i+2]; 1015 Exception e = null; 1016 Transliterator t; 1017 try { 1018 t = Transliterator.getInstance(id,dir); 1019 } catch (Exception e1) { 1020 e = e1; 1021 t = null; 1022 } 1023 String newID = (t!=null)?t.getID():""; 1024 boolean ok = (newID.equals(expID)); 1025 if (t==null) { 1026 newID = e.getMessage(); 1027 } 1028 if (ok) { 1029 logln("Ok: createInstance(" + 1030 id + "," + DATA[i+1] + ") => " + newID); 1031 } else { 1032 errln("FAIL: createInstance(" + 1033 id + "," + DATA[i+1] + ") => " + newID + 1034 ", expected " + expID); 1035 } 1036 } 1037 } 1038 1039 /** 1040 * Test the normalization transliterator. 1041 */ 1042 public void TestNormalizationTransliterator() { 1043 // THE FOLLOWING TWO TABLES ARE COPIED FROM android.icu.dev.test.normalizer.BasicTest 1044 // PLEASE KEEP THEM IN SYNC WITH BasicTest. 1045 String[][] CANON = { 1046 // Input Decomposed Composed 1047 {"cat", "cat", "cat" }, 1048 {"\u00e0ardvark", "a\u0300ardvark", "\u00e0ardvark" }, 1049 1050 {"\u1e0a", "D\u0307", "\u1e0a" }, // D-dot_above 1051 {"D\u0307", "D\u0307", "\u1e0a" }, // D dot_above 1052 1053 {"\u1e0c\u0307", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_below dot_above 1054 {"\u1e0a\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D-dot_above dot_below 1055 {"D\u0307\u0323", "D\u0323\u0307", "\u1e0c\u0307" }, // D dot_below dot_above 1056 1057 {"\u1e10\u0307\u0323", "D\u0327\u0323\u0307","\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above 1058 {"D\u0307\u0328\u0323","D\u0328\u0323\u0307","\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below 1059 1060 {"\u1E14", "E\u0304\u0300", "\u1E14" }, // E-macron-grave 1061 {"\u0112\u0300", "E\u0304\u0300", "\u1E14" }, // E-macron + grave 1062 {"\u00c8\u0304", "E\u0300\u0304", "\u00c8\u0304" }, // E-grave + macron 1063 1064 {"\u212b", "A\u030a", "\u00c5" }, // angstrom_sign 1065 {"\u00c5", "A\u030a", "\u00c5" }, // A-ring 1066 1067 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated with 3.0 1068 {"\u00fd\uFB03n", "y\u0301\uFB03n", "\u00fd\uFB03n" }, //updated with 3.0 1069 1070 {"Henry IV", "Henry IV", "Henry IV" }, 1071 {"Henry \u2163", "Henry \u2163", "Henry \u2163" }, 1072 1073 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana) 1074 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten 1075 {"\uFF76\uFF9E", "\uFF76\uFF9E", "\uFF76\uFF9E" }, // hw_ka + hw_ten 1076 {"\u30AB\uFF9E", "\u30AB\uFF9E", "\u30AB\uFF9E" }, // ka + hw_ten 1077 {"\uFF76\u3099", "\uFF76\u3099", "\uFF76\u3099" }, // hw_ka + ten 1078 1079 {"A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" }, 1080 }; 1081 1082 String[][] COMPAT = { 1083 // Input Decomposed Composed 1084 {"\uFB4f", "\u05D0\u05DC", "\u05D0\u05DC" }, // Alef-Lamed vs. Alef, Lamed 1085 1086 {"\u00fdffin", "y\u0301ffin", "\u00fdffin" }, //updated for 3.0 1087 {"\u00fd\uFB03n", "y\u0301ffin", "\u00fdffin" }, // ffi ligature -> f + f + i 1088 1089 {"Henry IV", "Henry IV", "Henry IV" }, 1090 {"Henry \u2163", "Henry IV", "Henry IV" }, 1091 1092 {"\u30AC", "\u30AB\u3099", "\u30AC" }, // ga (Katakana) 1093 {"\u30AB\u3099", "\u30AB\u3099", "\u30AC" }, // ka + ten 1094 1095 {"\uFF76\u3099", "\u30AB\u3099", "\u30AC" }, // hw_ka + ten 1096 }; 1097 1098 Transliterator NFD = Transliterator.getInstance("NFD"); 1099 Transliterator NFC = Transliterator.getInstance("NFC"); 1100 for (int i=0; i<CANON.length; ++i) { 1101 String in = CANON[i][0]; 1102 String expd = CANON[i][1]; 1103 String expc = CANON[i][2]; 1104 expect(NFD, in, expd); 1105 expect(NFC, in, expc); 1106 } 1107 1108 Transliterator NFKD = Transliterator.getInstance("NFKD"); 1109 Transliterator NFKC = Transliterator.getInstance("NFKC"); 1110 for (int i=0; i<COMPAT.length; ++i) { 1111 String in = COMPAT[i][0]; 1112 String expkd = COMPAT[i][1]; 1113 String expkc = COMPAT[i][2]; 1114 expect(NFKD, in, expkd); 1115 expect(NFKC, in, expkc); 1116 } 1117 1118 Transliterator t = Transliterator.getInstance("NFD; [x]Remove"); 1119 expect(t, "\u010dx", "c\u030C"); 1120 } 1121 1122 /** 1123 * Test compound RBT rules. 1124 */ 1125 public void TestCompoundRBT() { 1126 // Careful with spacing and ';' here: Phrase this exactly 1127 // as toRules() is going to return it. If toRules() changes 1128 // with regard to spacing or ';', then adjust this string. 1129 String rule = "::Hex-Any;\n" + 1130 "::Any-Lower;\n" + 1131 "a > '.A.';\n" + 1132 "b > '.B.';\n" + 1133 "::[^t]Any-Upper;"; 1134 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 1135 if (t == null) { 1136 errln("FAIL: createFromRules failed"); 1137 return; 1138 } 1139 expect(t, "\u0043at in the hat, bat on the mat", 1140 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t"); 1141 String r = t.toRules(true); 1142 if (r.equals(rule)) { 1143 logln("OK: toRules() => " + r); 1144 } else { 1145 errln("FAIL: toRules() => " + r + 1146 ", expected " + rule); 1147 } 1148 1149 // Now test toRules 1150 t = Transliterator.getInstance("Greek-Latin; Latin-Cyrillic", Transliterator.FORWARD); 1151 if (t == null) { 1152 errln("FAIL: createInstance failed"); 1153 return; 1154 } 1155 String exp = "::Greek-Latin;\n::Latin-Cyrillic;"; 1156 r = t.toRules(true); 1157 if (!r.equals(exp)) { 1158 errln("FAIL: toRules() => " + r + 1159 ", expected " + exp); 1160 } else { 1161 logln("OK: toRules() => " + r); 1162 } 1163 1164 // Round trip the result of toRules 1165 t = Transliterator.createFromRules("Test", r, Transliterator.FORWARD); 1166 if (t == null) { 1167 errln("FAIL: createFromRules #2 failed"); 1168 return; 1169 } else { 1170 logln("OK: createFromRules(" + r + ") succeeded"); 1171 } 1172 1173 // Test toRules again 1174 r = t.toRules(true); 1175 if (!r.equals(exp)) { 1176 errln("FAIL: toRules() => " + r + 1177 ", expected " + exp); 1178 } else { 1179 logln("OK: toRules() => " + r); 1180 } 1181 1182 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform 1183 // to what the regenerated ID will look like. 1184 String id = "Upper(Lower);(NFKC)"; 1185 t = Transliterator.getInstance(id, Transliterator.FORWARD); 1186 if (t == null) { 1187 errln("FAIL: createInstance #2 failed"); 1188 return; 1189 } 1190 if (t.getID().equals(id)) { 1191 logln("OK: created " + id); 1192 } else { 1193 errln("FAIL: createInstance(" + id + 1194 ").getID() => " + t.getID()); 1195 } 1196 1197 Transliterator u = t.getInverse(); 1198 if (u == null) { 1199 errln("FAIL: createInverse failed"); 1200 return; 1201 } 1202 exp = "NFKC();Lower(Upper)"; 1203 if (u.getID().equals(exp)) { 1204 logln("OK: createInverse(" + id + ") => " + 1205 u.getID()); 1206 } else { 1207 errln("FAIL: createInverse(" + id + ") => " + 1208 u.getID()); 1209 } 1210 } 1211 1212 /** 1213 * Compound filter semantics were orginially not implemented 1214 * correctly. Originally, each component filter f(i) is replaced by 1215 * f'(i) = f(i) && g, where g is the filter for the compound 1216 * transliterator. 1217 * 1218 * From Mark: 1219 * 1220 * Suppose and I have a transliterator X. Internally X is 1221 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A]. 1222 * 1223 * The compound should convert all greek characters (through latin) to 1224 * cyrillic, then lowercase the result. The filter should say "don't 1225 * touch 'A' in the original". But because an intermediate result 1226 * happens to go through "A", the Greek Alpha gets hung up. 1227 */ 1228 public void TestCompoundFilter() { 1229 Transliterator t = Transliterator.getInstance 1230 ("Greek-Latin; Latin-Greek; Lower", Transliterator.FORWARD); 1231 t.setFilter(new UnicodeSet("[^A]")); 1232 1233 // Only the 'A' at index 1 should remain unchanged 1234 expect(t, 1235 CharsToUnicodeString("BA\\u039A\\u0391"), 1236 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1")); 1237 } 1238 1239 /** 1240 * Test the "Remove" transliterator. 1241 */ 1242 public void TestRemove() { 1243 Transliterator t = Transliterator.getInstance("Remove[aeiou]"); 1244 expect(t, "The quick brown fox.", 1245 "Th qck brwn fx."); 1246 } 1247 1248 public void TestToRules() { 1249 String RBT = "rbt"; 1250 String SET = "set"; 1251 String[] DATA = { 1252 RBT, 1253 "$a=\\u4E61; [$a] > A;", 1254 "[\\u4E61] > A;", 1255 1256 RBT, 1257 "$white=[[:Zs:][:Zl:]]; $white{a} > A;", 1258 "[[:Zs:][:Zl:]]{a} > A;", 1259 1260 SET, 1261 "[[:Zs:][:Zl:]]", 1262 "[[:Zs:][:Zl:]]", 1263 1264 SET, 1265 "[:Ps:]", 1266 "[:Ps:]", 1267 1268 SET, 1269 "[:L:]", 1270 "[:L:]", 1271 1272 SET, 1273 "[[:L:]-[A]]", 1274 "[[:L:]-[A]]", 1275 1276 SET, 1277 "[~[:Lu:][:Ll:]]", 1278 "[~[:Lu:][:Ll:]]", 1279 1280 SET, 1281 "[~[a-z]]", 1282 "[~[a-z]]", 1283 1284 RBT, 1285 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;", 1286 "[^[:Zs:]]{a} > A;", 1287 1288 RBT, 1289 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;", 1290 "[[a-z]-[:Zs:]]{a} > A;", 1291 1292 RBT, 1293 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;", 1294 "[[:Zs:]&[a-z]]{a} > A;", 1295 1296 RBT, 1297 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;", 1298 "[x[:Zs:]]{a} > A;", 1299 1300 RBT, 1301 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"+ 1302 "$macron = \\u0304 ;"+ 1303 "$evowel = [aeiouyAEIOUY] ;"+ 1304 "$iotasub = \\u0345 ;"+ 1305 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;", 1306 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;", 1307 1308 RBT, 1309 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1310 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1311 }; 1312 1313 for (int d=0; d < DATA.length; d+=3) { 1314 if (DATA[d] == RBT) { 1315 // Transliterator test 1316 Transliterator t = Transliterator.createFromRules("ID", 1317 DATA[d+1], Transliterator.FORWARD); 1318 if (t == null) { 1319 errln("FAIL: createFromRules failed"); 1320 return; 1321 } 1322 String rules, escapedRules; 1323 rules = t.toRules(false); 1324 escapedRules = t.toRules(true); 1325 String expRules = Utility.unescape(DATA[d+2]); 1326 String expEscapedRules = DATA[d+2]; 1327 if (rules.equals(expRules)) { 1328 logln("Ok: " + DATA[d+1] + 1329 " => " + Utility.escape(rules)); 1330 } else { 1331 errln("FAIL: " + DATA[d+1] + 1332 " => " + Utility.escape(rules + ", exp " + expRules)); 1333 } 1334 if (escapedRules.equals(expEscapedRules)) { 1335 logln("Ok: " + DATA[d+1] + 1336 " => " + escapedRules); 1337 } else { 1338 errln("FAIL: " + DATA[d+1] + 1339 " => " + escapedRules + ", exp " + expEscapedRules); 1340 } 1341 1342 } else { 1343 // UnicodeSet test 1344 String pat = DATA[d+1]; 1345 String expToPat = DATA[d+2]; 1346 UnicodeSet set = new UnicodeSet(pat); 1347 1348 // Adjust spacing etc. as necessary. 1349 String toPat; 1350 toPat = set.toPattern(true); 1351 if (expToPat.equals(toPat)) { 1352 logln("Ok: " + pat + 1353 " => " + toPat); 1354 } else { 1355 errln("FAIL: " + pat + 1356 " => " + Utility.escape(toPat) + 1357 ", exp " + Utility.escape(pat)); 1358 } 1359 } 1360 } 1361 } 1362 1363 public void TestContext() { 1364 Transliterator.Position pos = new Transliterator.Position(0, 2, 0, 1); // cs cl s l 1365 1366 expect("de > x; {d}e > y;", 1367 "de", 1368 "ye", 1369 pos); 1370 1371 expect("ab{c} > z;", 1372 "xadabdabcy", 1373 "xadabdabzy"); 1374 } 1375 1376 static final String CharsToUnicodeString(String s) { 1377 return Utility.unescape(s); 1378 } 1379 1380 public void TestSupplemental() { 1381 1382 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" + 1383 "a > $a; $s > i;"), 1384 CharsToUnicodeString("ab\\U0001030Fx"), 1385 CharsToUnicodeString("\\U00010300bix")); 1386 1387 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" + 1388 "$b=[A-Z\\U00010400-\\U0001044D];" + 1389 "($a)($b) > $2 $1;"), 1390 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"), 1391 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301")); 1392 1393 // k|ax\\U00010300xm 1394 1395 // k|a\\U00010400\\U00010300xm 1396 // ky|\\U00010400\\U00010300xm 1397 // ky\\U00010400|\\U00010300xm 1398 1399 // ky\\U00010400|\\U00010300\\U00010400m 1400 // ky\\U00010400y|\\U00010400m 1401 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" + 1402 "$a {x} > | @ \\U00010400;" + 1403 "{$a} [^\\u0000-\\uFFFF] > y;"), 1404 CharsToUnicodeString("kax\\U00010300xm"), 1405 CharsToUnicodeString("ky\\U00010400y\\U00010400m")); 1406 1407 expect(Transliterator.getInstance("Any-Name"), 1408 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"), 1409 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"); 1410 1411 expect(Transliterator.getInstance("Name-Any"), 1412 "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}", 1413 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0")); 1414 1415 expect(Transliterator.getInstance("Any-Hex/Unicode"), 1416 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1417 "U+10330U+10FF00U+E0061U+00A0"); 1418 1419 expect(Transliterator.getInstance("Any-Hex/C"), 1420 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1421 "\\U00010330\\U0010FF00\\U000E0061\\u00A0"); 1422 1423 expect(Transliterator.getInstance("Any-Hex/Perl"), 1424 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1425 "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"); 1426 1427 expect(Transliterator.getInstance("Any-Hex/Java"), 1428 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1429 "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"); 1430 1431 expect(Transliterator.getInstance("Any-Hex/XML"), 1432 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1433 "𐌰􏼀󠁡 "); 1434 1435 expect(Transliterator.getInstance("Any-Hex/XML10"), 1436 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1437 "𐌰􏼀󠁡 "); 1438 1439 expect(Transliterator.getInstance("[\\U000E0000-\\U000E0FFF] Remove"), 1440 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1441 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0")); 1442 } 1443 1444 public void TestQuantifier() { 1445 1446 // Make sure @ in a quantified anteContext works 1447 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';", 1448 "AAAAAb", 1449 "aaa(aac)"); 1450 1451 // Make sure @ in a quantified postContext works 1452 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';", 1453 "baaaaa", 1454 "caa(aaa)"); 1455 1456 // Make sure @ in a quantified postContext with seg ref works 1457 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';", 1458 "baaaaa", 1459 "baa(aaa)"); 1460 1461 // Make sure @ past ante context doesn't enter ante context 1462 Transliterator.Position pos = new Transliterator.Position(0, 5, 3, 5); 1463 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';", 1464 "xxxab", 1465 "xxx(ac)", 1466 pos); 1467 1468 // Make sure @ past post context doesn't pass limit 1469 Transliterator.Position pos2 = new Transliterator.Position(0, 4, 0, 2); 1470 expect("{b} a+ > c @@ |; x > y; a > A;", 1471 "baxx", 1472 "caxx", 1473 pos2); 1474 1475 // Make sure @ past post context doesn't enter post context 1476 expect("{b} a+ > c @@ |; x > y; a > A;", 1477 "baxx", 1478 "cayy"); 1479 1480 expect("(ab)? c > d;", 1481 "c abc ababc", 1482 "d d abd"); 1483 1484 // NOTE: The (ab)+ when referenced just yields a single "ab", 1485 // not the full sequence of them. This accords with perl behavior. 1486 expect("(ab)+ {x} > '(' $1 ')';", 1487 "x abx ababxy", 1488 "x ab(ab) abab(ab)y"); 1489 1490 expect("b+ > x;", 1491 "ac abc abbc abbbc", 1492 "ac axc axc axc"); 1493 1494 expect("[abc]+ > x;", 1495 "qac abrc abbcs abtbbc", 1496 "qx xrx xs xtx"); 1497 1498 expect("q{(ab)+} > x;", 1499 "qa qab qaba qababc qaba", 1500 "qa qx qxa qxc qxa"); 1501 1502 expect("q(ab)* > x;", 1503 "qa qab qaba qababc", 1504 "xa x xa xc"); 1505 1506 // NOTE: The (ab)+ when referenced just yields a single "ab", 1507 // not the full sequence of them. This accords with perl behavior. 1508 expect("q(ab)* > '(' $1 ')';", 1509 "qa qab qaba qababc", 1510 "()a (ab) (ab)a (ab)c"); 1511 1512 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire 1513 // quoted string 1514 expect("'ab'+ > x;", 1515 "bb ab ababb", 1516 "bb x xb"); 1517 1518 // $foo+ and $foo* -- the quantifier should apply to the entire 1519 // variable reference 1520 expect("$var = ab; $var+ > x;", 1521 "bb ab ababb", 1522 "bb x xb"); 1523 } 1524 1525 static class TestFact implements Transliterator.Factory { 1526 static class NameableNullTrans extends Transliterator { 1527 public NameableNullTrans(String id) { 1528 super(id, null); 1529 } 1530 protected void handleTransliterate(Replaceable text, 1531 Position offsets, boolean incremental) { 1532 offsets.start = offsets.limit; 1533 } 1534 } 1535 String id; 1536 public TestFact(String theID) { 1537 id = theID; 1538 } 1539 public Transliterator getInstance(String ignoredID) { 1540 return new NameableNullTrans(id); 1541 } 1542 } 1543 1544 public void TestSTV() { 1545 Enumeration es = Transliterator.getAvailableSources(); 1546 for (int i=0; es.hasMoreElements(); ++i) { 1547 String source = (String) es.nextElement(); 1548 logln("" + i + ": " + source); 1549 if (source.length() == 0) { 1550 errln("FAIL: empty source"); 1551 continue; 1552 } 1553 Enumeration et = Transliterator.getAvailableTargets(source); 1554 for (int j=0; et.hasMoreElements(); ++j) { 1555 String target = (String) et.nextElement(); 1556 logln(" " + j + ": " + target); 1557 if (target.length() == 0) { 1558 errln("FAIL: empty target"); 1559 continue; 1560 } 1561 Enumeration ev = Transliterator.getAvailableVariants(source, target); 1562 for (int k=0; ev.hasMoreElements(); ++k) { 1563 String variant = (String) ev.nextElement(); 1564 if (variant.length() == 0) { 1565 logln(" " + k + ": <empty>"); 1566 } else { 1567 logln(" " + k + ": " + variant); 1568 } 1569 } 1570 } 1571 } 1572 1573 // Test registration 1574 String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 1575 String[] FULL_IDS = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 1576 String[] SOURCES = { null, "Seoridf", "Oewoir" }; 1577 for (int i=0; i<3; ++i) { 1578 Transliterator.registerFactory(IDS[i], new TestFact(IDS[i])); 1579 try { 1580 Transliterator t = Transliterator.getInstance(IDS[i]); 1581 if (t.getID().equals(IDS[i])) { 1582 logln("Ok: Registration/creation succeeded for ID " + 1583 IDS[i]); 1584 } else { 1585 errln("FAIL: Registration of ID " + 1586 IDS[i] + " creates ID " + t.getID()); 1587 } 1588 Transliterator.unregister(IDS[i]); 1589 try { 1590 t = Transliterator.getInstance(IDS[i]); 1591 errln("FAIL: Unregistration failed for ID " + 1592 IDS[i] + "; still receiving ID " + t.getID()); 1593 } catch (IllegalArgumentException e2) { 1594 // Good; this is what we expect 1595 logln("Ok; Unregistered " + IDS[i]); 1596 } 1597 } catch (IllegalArgumentException e) { 1598 errln("FAIL: Registration/creation failed for ID " + 1599 IDS[i]); 1600 } finally { 1601 Transliterator.unregister(IDS[i]); 1602 } 1603 } 1604 1605 // Make sure getAvailable API reflects removal 1606 for (Enumeration e = Transliterator.getAvailableIDs(); 1607 e.hasMoreElements(); ) { 1608 String id = (String) e.nextElement(); 1609 for (int i=0; i<3; ++i) { 1610 if (id.equals(FULL_IDS[i])) { 1611 errln("FAIL: unregister(" + id + ") failed"); 1612 } 1613 } 1614 } 1615 for (Enumeration e = Transliterator.getAvailableTargets("Any"); 1616 e.hasMoreElements(); ) { 1617 String t = (String) e.nextElement(); 1618 if (t.equals(IDS[0])) { 1619 errln("FAIL: unregister(Any-" + t + ") failed"); 1620 } 1621 } 1622 for (Enumeration e = Transliterator.getAvailableSources(); 1623 e.hasMoreElements(); ) { 1624 String s = (String) e.nextElement(); 1625 for (int i=0; i<3; ++i) { 1626 if (SOURCES[i] == null) continue; 1627 if (s.equals(SOURCES[i])) { 1628 errln("FAIL: unregister(" + s + "-*) failed"); 1629 } 1630 } 1631 } 1632 } 1633 1634 /** 1635 * Test inverse of Greek-Latin; Title() 1636 */ 1637 public void TestCompoundInverse() { 1638 Transliterator t = Transliterator.getInstance 1639 ("Greek-Latin; Title()", Transliterator.REVERSE); 1640 if (t == null) { 1641 errln("FAIL: createInstance"); 1642 return; 1643 } 1644 String exp = "(Title);Latin-Greek"; 1645 if (t.getID().equals(exp)) { 1646 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" + 1647 t.getID()); 1648 } else { 1649 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" + 1650 t.getID() + "\", expected \"" + exp + "\""); 1651 } 1652 } 1653 1654 /** 1655 * Test NFD chaining with RBT 1656 */ 1657 public void TestNFDChainRBT() { 1658 Transliterator t = Transliterator.createFromRules( 1659 "TEST", "::NFD; aa > Q; a > q;", 1660 Transliterator.FORWARD); 1661 logln(t.toRules(true)); 1662 expect(t, "aa", "Q"); 1663 } 1664 1665 /** 1666 * Inverse of "Null" should be "Null". (J21) 1667 */ 1668 public void TestNullInverse() { 1669 Transliterator t = Transliterator.getInstance("Null"); 1670 Transliterator u = t.getInverse(); 1671 if (!u.getID().equals("Null")) { 1672 errln("FAIL: Inverse of Null should be Null"); 1673 } 1674 } 1675 1676 /** 1677 * Check ID of inverse of alias. (J22) 1678 */ 1679 public void TestAliasInverseID() { 1680 String ID = "Latin-Hangul"; // This should be any alias ID with an inverse 1681 Transliterator t = Transliterator.getInstance(ID); 1682 Transliterator u = t.getInverse(); 1683 String exp = "Hangul-Latin"; 1684 String got = u.getID(); 1685 if (!got.equals(exp)) { 1686 errln("FAIL: Inverse of " + ID + " is " + got + 1687 ", expected " + exp); 1688 } 1689 } 1690 1691 /** 1692 * Test IDs of inverses of compound transliterators. (J20) 1693 */ 1694 public void TestCompoundInverseID() { 1695 String ID = "Latin-Jamo;NFC(NFD)"; 1696 Transliterator t = Transliterator.getInstance(ID); 1697 Transliterator u = t.getInverse(); 1698 String exp = "NFD(NFC);Jamo-Latin"; 1699 String got = u.getID(); 1700 if (!got.equals(exp)) { 1701 errln("FAIL: Inverse of " + ID + " is " + got + 1702 ", expected " + exp); 1703 } 1704 } 1705 1706 /** 1707 * Test undefined variable. 1708 */ 1709 public void TestUndefinedVariable() { 1710 String rule = "$initial } a <> \u1161;"; 1711 try { 1712 Transliterator.createFromRules("<ID>", rule,Transliterator.FORWARD); 1713 } catch (IllegalArgumentException e) { 1714 logln("OK: Got exception for " + rule + ", as expected: " + 1715 e.getMessage()); 1716 return; 1717 } 1718 errln("Fail: bogus rule " + rule + " compiled without error"); 1719 } 1720 1721 /** 1722 * Test empty context. 1723 */ 1724 public void TestEmptyContext() { 1725 expect(" { a } > b;", "xay a ", "xby b "); 1726 } 1727 1728 /** 1729 * Test compound filter ID syntax 1730 */ 1731 public void TestCompoundFilterID() { 1732 String[] DATA = { 1733 // Col. 1 = ID or rule set (latter must start with #) 1734 1735 // = columns > 1 are null if expect col. 1 to be illegal = 1736 1737 // Col. 2 = direction, "F..." or "R..." 1738 // Col. 3 = source string 1739 // Col. 4 = exp result 1740 1741 "[abc]; [abc]", null, null, null, // multiple filters 1742 "Latin-Greek; [abc];", null, null, null, // misplaced filter 1743 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\u0392c", 1744 "[b]; (Lower); Latin-Greek; Upper(); ([\u0392])", "R", "\u0391\u0392\u0393", "\u0391b\u0393", 1745 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\u0392c", 1746 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\u0392]);", "R", "\u0391\u0392\u0393", "\u0391b\u0393", 1747 }; 1748 1749 for (int i=0; i<DATA.length; i+=4) { 1750 String id = DATA[i]; 1751 int direction = (DATA[i+1] != null && DATA[i+1].charAt(0) == 'R') ? 1752 Transliterator.REVERSE : Transliterator.FORWARD; 1753 String source = DATA[i+2]; 1754 String exp = DATA[i+3]; 1755 boolean expOk = (DATA[i+1] != null); 1756 Transliterator t = null; 1757 IllegalArgumentException e = null; 1758 try { 1759 if (id.charAt(0) == '#') { 1760 t = Transliterator.createFromRules("ID", id, direction); 1761 } else { 1762 t = Transliterator.getInstance(id, direction); 1763 } 1764 } catch (IllegalArgumentException ee) { 1765 e = ee; 1766 } 1767 boolean ok = (t != null && e == null); 1768 if (ok == expOk) { 1769 logln("Ok: " + id + " => " + t + 1770 (e != null ? (", " + e.getMessage()) : "")); 1771 if (source != null) { 1772 expect(t, source, exp); 1773 } 1774 } else { 1775 errln("FAIL: " + id + " => " + t + 1776 (e != null ? (", " + e.getMessage()) : "")); 1777 } 1778 } 1779 } 1780 1781 /** 1782 * Test new property set syntax 1783 */ 1784 public void TestPropertySet() { 1785 expect("a>A; \\p{Lu}>x; \\p{Any}>y;", "abcDEF", "Ayyxxx"); 1786 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9", 1787 "[ a stitch ]\n[ in time ]\r[ saves 9]"); 1788 } 1789 1790 /** 1791 * Test various failure points of the new 2.0 engine. 1792 */ 1793 public void TestNewEngine() { 1794 Transliterator t = Transliterator.getInstance("Latin-Hiragana"); 1795 // Katakana should be untouched 1796 expect(t, "a\u3042\u30A2", "\u3042\u3042\u30A2"); 1797 1798 if (true) { 1799 // This test will only work if Transliterator.ROLLBACK is 1800 // true. Otherwise, this test will fail, revealing a 1801 // limitation of global filters in incremental mode. 1802 1803 Transliterator a = 1804 Transliterator.createFromRules("a_to_A", "a > A;", Transliterator.FORWARD); 1805 Transliterator A = 1806 Transliterator.createFromRules("A_to_b", "A > b;", Transliterator.FORWARD); 1807 1808 //Transliterator array[] = new Transliterator[] { 1809 // a, 1810 // Transliterator.getInstance("NFD"), 1811 // A }; 1812 //t = Transliterator.getInstance(array, new UnicodeSet("[:Ll:]")); 1813 1814 try { 1815 Transliterator.registerInstance(a); 1816 Transliterator.registerInstance(A); 1817 1818 t = Transliterator.getInstance("[:Ll:];a_to_A;NFD;A_to_b"); 1819 expect(t, "aAaA", "bAbA"); 1820 1821 Transliterator[] u = t.getElements(); 1822 assertTrue("getElements().length", u.length == 3); 1823 assertEquals("getElements()[0]", u[0].getID(), "a_to_A"); 1824 assertEquals("getElements()[1]", u[1].getID(), "NFD"); 1825 assertEquals("getElements()[2]", u[2].getID(), "A_to_b"); 1826 1827 t = Transliterator.getInstance("a_to_A;NFD;A_to_b"); 1828 t.setFilter(new UnicodeSet("[:Ll:]")); 1829 expect(t, "aAaA", "bAbA"); 1830 } finally { 1831 Transliterator.unregister("a_to_A"); 1832 Transliterator.unregister("A_to_b"); 1833 } 1834 } 1835 1836 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;", 1837 "a", 1838 "ax"); 1839 1840 String gr = 1841 "$ddot = \u0308 ;" + 1842 "$lcgvowel = [\u03b1\u03b5\u03b7\u03b9\u03bf\u03c5\u03c9] ;" + 1843 "$rough = \u0314 ;" + 1844 "($lcgvowel+ $ddot?) $rough > h | $1 ;" + 1845 "\u03b1 <> a ;" + 1846 "$rough <> h ;"; 1847 1848 expect(gr, "\u03B1\u0314", "ha"); 1849 } 1850 1851 /** 1852 * Test quantified segment behavior. We want: 1853 * ([abc])+ > x $1 x; applied to "cba" produces "xax" 1854 */ 1855 public void TestQuantifiedSegment() { 1856 // The normal case 1857 expect("([abc]+) > x $1 x;", "cba", "xcbax"); 1858 1859 // The tricky case; the quantifier is around the segment 1860 expect("([abc])+ > x $1 x;", "cba", "xax"); 1861 1862 // Tricky case in reverse direction 1863 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax"); 1864 1865 // Check post-context segment 1866 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba"); 1867 1868 // Test toRule/toPattern for non-quantified segment. 1869 // Careful with spacing here. 1870 String r = "([a-c]){q} > x $1 x;"; 1871 Transliterator t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD); 1872 String rr = t.toRules(true); 1873 if (!r.equals(rr)) { 1874 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 1875 } else { 1876 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 1877 } 1878 1879 // Test toRule/toPattern for quantified segment. 1880 // Careful with spacing here. 1881 r = "([a-c])+{q} > x $1 x;"; 1882 t = Transliterator.createFromRules("ID", r, Transliterator.FORWARD); 1883 rr = t.toRules(true); 1884 if (!r.equals(rr)) { 1885 errln("FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 1886 } else { 1887 logln("Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 1888 } 1889 } 1890 1891 //====================================================================== 1892 // Ram's tests 1893 //====================================================================== 1894 /* this test performs test of rules in ISO 15915 */ 1895 public void TestDevanagariLatinRT(){ 1896 String[] source = { 1897 "bh\u0101rata", 1898 "kra", 1899 "k\u1E63a", 1900 "khra", 1901 "gra", 1902 "\u1E45ra", 1903 "cra", 1904 "chra", 1905 "j\u00F1a", 1906 "jhra", 1907 "\u00F1ra", 1908 "\u1E6Dya", 1909 "\u1E6Dhra", 1910 "\u1E0Dya", 1911 //"r\u0323ya", // \u095c is not valid in Devanagari 1912 "\u1E0Dhya", 1913 "\u1E5Bhra", 1914 "\u1E47ra", 1915 "tta", 1916 "thra", 1917 "dda", 1918 "dhra", 1919 "nna", 1920 "pra", 1921 "phra", 1922 "bra", 1923 "bhra", 1924 "mra", 1925 "\u1E49ra", 1926 //"l\u0331ra", 1927 "yra", 1928 "\u1E8Fra", 1929 //"l-", 1930 "vra", 1931 "\u015Bra", 1932 "\u1E63ra", 1933 "sra", 1934 "hma", 1935 "\u1E6D\u1E6Da", 1936 "\u1E6D\u1E6Dha", 1937 "\u1E6Dh\u1E6Dha", 1938 "\u1E0D\u1E0Da", 1939 "\u1E0D\u1E0Dha", 1940 "\u1E6Dya", 1941 "\u1E6Dhya", 1942 "\u1E0Dya", 1943 "\u1E0Dhya", 1944 // Not roundtrippable -- 1945 // \u0939\u094d\u094d\u092E - hma 1946 // \u0939\u094d\u092E - hma 1947 // CharsToUnicodeString("hma"), 1948 "hya", 1949 "\u015Br\u0325", 1950 "\u015Bca", 1951 "\u0115", 1952 "san\u0304j\u012Bb s\u0113nagupta", 1953 "\u0101nand vaddir\u0101ju", 1954 }; 1955 String[] expected = { 1956 "\u092D\u093E\u0930\u0924", /* bha\u0304rata */ 1957 "\u0915\u094D\u0930", /* kra */ 1958 "\u0915\u094D\u0937", /* ks\u0323a */ 1959 "\u0916\u094D\u0930", /* khra */ 1960 "\u0917\u094D\u0930", /* gra */ 1961 "\u0919\u094D\u0930", /* n\u0307ra */ 1962 "\u091A\u094D\u0930", /* cra */ 1963 "\u091B\u094D\u0930", /* chra */ 1964 "\u091C\u094D\u091E", /* jn\u0303a */ 1965 "\u091D\u094D\u0930", /* jhra */ 1966 "\u091E\u094D\u0930", /* n\u0303ra */ 1967 "\u091F\u094D\u092F", /* t\u0323ya */ 1968 "\u0920\u094D\u0930", /* t\u0323hra */ 1969 "\u0921\u094D\u092F", /* d\u0323ya */ 1970 //"\u095C\u094D\u092F", /* r\u0323ya */ // \u095c is not valid in Devanagari 1971 "\u0922\u094D\u092F", /* d\u0323hya */ 1972 "\u0922\u093C\u094D\u0930", /* r\u0323hra */ 1973 "\u0923\u094D\u0930", /* n\u0323ra */ 1974 "\u0924\u094D\u0924", /* tta */ 1975 "\u0925\u094D\u0930", /* thra */ 1976 "\u0926\u094D\u0926", /* dda */ 1977 "\u0927\u094D\u0930", /* dhra */ 1978 "\u0928\u094D\u0928", /* nna */ 1979 "\u092A\u094D\u0930", /* pra */ 1980 "\u092B\u094D\u0930", /* phra */ 1981 "\u092C\u094D\u0930", /* bra */ 1982 "\u092D\u094D\u0930", /* bhra */ 1983 "\u092E\u094D\u0930", /* mra */ 1984 "\u0929\u094D\u0930", /* n\u0331ra */ 1985 //"\u0934\u094D\u0930", /* l\u0331ra */ 1986 "\u092F\u094D\u0930", /* yra */ 1987 "\u092F\u093C\u094D\u0930", /* y\u0307ra */ 1988 //"l-", 1989 "\u0935\u094D\u0930", /* vra */ 1990 "\u0936\u094D\u0930", /* s\u0301ra */ 1991 "\u0937\u094D\u0930", /* s\u0323ra */ 1992 "\u0938\u094D\u0930", /* sra */ 1993 "\u0939\u094d\u092E", /* hma */ 1994 "\u091F\u094D\u091F", /* t\u0323t\u0323a */ 1995 "\u091F\u094D\u0920", /* t\u0323t\u0323ha */ 1996 "\u0920\u094D\u0920", /* t\u0323ht\u0323ha*/ 1997 "\u0921\u094D\u0921", /* d\u0323d\u0323a */ 1998 "\u0921\u094D\u0922", /* d\u0323d\u0323ha */ 1999 "\u091F\u094D\u092F", /* t\u0323ya */ 2000 "\u0920\u094D\u092F", /* t\u0323hya */ 2001 "\u0921\u094D\u092F", /* d\u0323ya */ 2002 "\u0922\u094D\u092F", /* d\u0323hya */ 2003 // "hma", /* hma */ 2004 "\u0939\u094D\u092F", /* hya */ 2005 "\u0936\u0943", /* s\u0301r\u0325a */ 2006 "\u0936\u094D\u091A", /* s\u0301ca */ 2007 "\u090d", /* e\u0306 */ 2008 "\u0938\u0902\u091C\u0940\u092C\u094D \u0938\u0947\u0928\u0917\u0941\u092A\u094D\u0924", 2009 "\u0906\u0928\u0902\u0926\u094D \u0935\u0926\u094D\u0926\u093F\u0930\u093E\u091C\u0941", 2010 }; 2011 2012 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD ); 2013 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD); 2014 2015 for(int i= 0; i<source.length; i++){ 2016 expect(latinToDev,(source[i]),(expected[i])); 2017 expect(devToLatin,(expected[i]),(source[i])); 2018 } 2019 2020 } 2021 public void TestTeluguLatinRT(){ 2022 String[] source = { 2023 "raghur\u0101m vi\u015Bvan\u0101dha", /* Raghuram Viswanadha */ 2024 "\u0101nand vaddir\u0101ju", /* Anand Vaddiraju */ 2025 "r\u0101j\u012Bv ka\u015Barab\u0101da", /* Rajeev Kasarabada */ 2026 "san\u0304j\u012Bv ka\u015Barab\u0101da", /* sanjeev kasarabada */ 2027 "san\u0304j\u012Bb sen'gupta", /* sanjib sengupata */ 2028 "amar\u0113ndra hanum\u0101nula", /* Amarendra hanumanula */ 2029 "ravi kum\u0101r vi\u015Bvan\u0101dha", /* Ravi Kumar Viswanadha */ 2030 "\u0101ditya kandr\u0113gula", /* Aditya Kandregula */ 2031 "\u015Br\u012Bdhar ka\u1E47\u1E6Dama\u015Be\u1E6D\u1E6Di", /* Shridhar Kantamsetty */ 2032 "m\u0101dhav de\u015Be\u1E6D\u1E6Di" /* Madhav Desetty */ 2033 }; 2034 2035 String[] expected = { 2036 "\u0c30\u0c18\u0c41\u0c30\u0c3e\u0c2e\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27", 2037 "\u0c06\u0c28\u0c02\u0c26\u0c4d \u0C35\u0C26\u0C4D\u0C26\u0C3F\u0C30\u0C3E\u0C1C\u0C41", 2038 "\u0c30\u0c3e\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26", 2039 "\u0c38\u0c02\u0c1c\u0c40\u0c35\u0c4d \u0c15\u0c36\u0c30\u0c2c\u0c3e\u0c26", 2040 "\u0c38\u0c02\u0c1c\u0c40\u0c2c\u0c4d \u0c38\u0c46\u0c28\u0c4d\u0c17\u0c41\u0c2a\u0c4d\u0c24", 2041 "\u0c05\u0c2e\u0c30\u0c47\u0c02\u0c26\u0c4d\u0c30 \u0c39\u0c28\u0c41\u0c2e\u0c3e\u0c28\u0c41\u0c32", 2042 "\u0c30\u0c35\u0c3f \u0c15\u0c41\u0c2e\u0c3e\u0c30\u0c4d \u0c35\u0c3f\u0c36\u0c4d\u0c35\u0c28\u0c3e\u0c27", 2043 "\u0c06\u0c26\u0c3f\u0c24\u0c4d\u0c2f \u0C15\u0C02\u0C26\u0C4D\u0C30\u0C47\u0C17\u0C41\u0c32", 2044 "\u0c36\u0c4d\u0c30\u0c40\u0C27\u0C30\u0C4D \u0c15\u0c02\u0c1f\u0c2e\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f", 2045 "\u0c2e\u0c3e\u0c27\u0c35\u0c4d \u0c26\u0c46\u0c36\u0c46\u0c1f\u0c4d\u0c1f\u0c3f", 2046 }; 2047 2048 2049 Transliterator latinToDev=Transliterator.getInstance("Latin-Telugu", Transliterator.FORWARD); 2050 Transliterator devToLatin=Transliterator.getInstance("Telugu-Latin", Transliterator.FORWARD); 2051 2052 for(int i= 0; i<source.length; i++){ 2053 expect(latinToDev,(source[i]),(expected[i])); 2054 expect(devToLatin,(expected[i]),(source[i])); 2055 } 2056 } 2057 2058 public void TestSanskritLatinRT(){ 2059 int MAX_LEN =15; 2060 String[] source = { 2061 "rmk\u1E63\u0113t", 2062 "\u015Br\u012Bmad", 2063 "bhagavadg\u012Bt\u0101", 2064 "adhy\u0101ya", 2065 "arjuna", 2066 "vi\u1E63\u0101da", 2067 "y\u014Dga", 2068 "dhr\u0325tar\u0101\u1E63\u1E6Dra", 2069 "uv\u0101cr\u0325", 2070 "dharmak\u1E63\u0113tr\u0113", 2071 "kuruk\u1E63\u0113tr\u0113", 2072 "samav\u0113t\u0101", 2073 "yuyutsava\u1E25", 2074 "m\u0101mak\u0101\u1E25", 2075 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva", 2076 "kimakurvata", 2077 "san\u0304java", 2078 }; 2079 String[] expected = { 2080 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D", 2081 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d", 2082 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e", 2083 "\u0905\u0927\u094d\u092f\u093e\u092f", 2084 "\u0905\u0930\u094d\u091c\u0941\u0928", 2085 "\u0935\u093f\u0937\u093e\u0926", 2086 "\u092f\u094b\u0917", 2087 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930", 2088 "\u0909\u0935\u093E\u091A\u0943", 2089 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2090 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2091 "\u0938\u092e\u0935\u0947\u0924\u093e", 2092 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903", 2093 "\u092e\u093e\u092e\u0915\u093e\u0903", 2094 //"\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935", 2095 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924", 2096 "\u0938\u0902\u091c\u0935", 2097 }; 2098 2099 Transliterator latinToDev=Transliterator.getInstance("Latin-Devanagari", Transliterator.FORWARD); 2100 Transliterator devToLatin=Transliterator.getInstance("Devanagari-Latin", Transliterator.FORWARD); 2101 for(int i= 0; i<MAX_LEN; i++){ 2102 expect(latinToDev,(source[i]),(expected[i])); 2103 expect(devToLatin,(expected[i]),(source[i])); 2104 } 2105 } 2106 2107 public void TestCompoundLatinRT(){ 2108 int MAX_LEN =15; 2109 String[] source = { 2110 "rmk\u1E63\u0113t", 2111 "\u015Br\u012Bmad", 2112 "bhagavadg\u012Bt\u0101", 2113 "adhy\u0101ya", 2114 "arjuna", 2115 "vi\u1E63\u0101da", 2116 "y\u014Dga", 2117 "dhr\u0325tar\u0101\u1E63\u1E6Dra", 2118 "uv\u0101cr\u0325", 2119 "dharmak\u1E63\u0113tr\u0113", 2120 "kuruk\u1E63\u0113tr\u0113", 2121 "samav\u0113t\u0101", 2122 "yuyutsava\u1E25", 2123 "m\u0101mak\u0101\u1E25", 2124 // "p\u0101\u1E47\u1E0Dav\u0101\u015Bcaiva", 2125 "kimakurvata", 2126 "san\u0304java" 2127 }; 2128 String[] expected = { 2129 "\u0930\u094D\u092E\u094D\u0915\u094D\u0937\u0947\u0924\u094D", 2130 "\u0936\u094d\u0930\u0940\u092e\u0926\u094d", 2131 "\u092d\u0917\u0935\u0926\u094d\u0917\u0940\u0924\u093e", 2132 "\u0905\u0927\u094d\u092f\u093e\u092f", 2133 "\u0905\u0930\u094d\u091c\u0941\u0928", 2134 "\u0935\u093f\u0937\u093e\u0926", 2135 "\u092f\u094b\u0917", 2136 "\u0927\u0943\u0924\u0930\u093e\u0937\u094d\u091f\u094d\u0930", 2137 "\u0909\u0935\u093E\u091A\u0943", 2138 "\u0927\u0930\u094d\u092e\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2139 "\u0915\u0941\u0930\u0941\u0915\u094d\u0937\u0947\u0924\u094d\u0930\u0947", 2140 "\u0938\u092e\u0935\u0947\u0924\u093e", 2141 "\u092f\u0941\u092f\u0941\u0924\u094d\u0938\u0935\u0903", 2142 "\u092e\u093e\u092e\u0915\u093e\u0903", 2143 // "\u092a\u093e\u0923\u094d\u0921\u0935\u093e\u0936\u094d\u091a\u0948\u0935", 2144 "\u0915\u093f\u092e\u0915\u0941\u0930\u094d\u0935\u0924", 2145 "\u0938\u0902\u091c\u0935" 2146 }; 2147 2148 Transliterator latinToDevToLatin=Transliterator.getInstance("Latin-Devanagari;Devanagari-Latin", Transliterator.FORWARD); 2149 Transliterator devToLatinToDev=Transliterator.getInstance("Devanagari-Latin;Latin-Devanagari", Transliterator.FORWARD); 2150 for(int i= 0; i<MAX_LEN; i++){ 2151 expect(latinToDevToLatin,(source[i]),(source[i])); 2152 expect(devToLatinToDev,(expected[i]),(expected[i])); 2153 } 2154 } 2155 /** 2156 * Test Gurmukhi-Devanagari Tippi and Bindi 2157 */ 2158 public void TestGurmukhiDevanagari(){ 2159 // the rule says: 2160 // (\u0902) (when preceded by vowel) ---> (\u0A02) 2161 // (\u0902) (when preceded by consonant) ---> (\u0A70) 2162 2163 UnicodeSet vowel =new UnicodeSet("[\u0905-\u090A \u090F\u0910\u0913\u0914 \u093e-\u0942\u0947\u0948\u094B\u094C\u094D]"); 2164 UnicodeSet non_vowel =new UnicodeSet("[\u0915-\u0928\u092A-\u0930]"); 2165 2166 UnicodeSetIterator vIter = new UnicodeSetIterator(vowel); 2167 UnicodeSetIterator nvIter = new UnicodeSetIterator(non_vowel); 2168 Transliterator trans = Transliterator.getInstance("Devanagari-Gurmukhi"); 2169 StringBuffer src = new StringBuffer(" \u0902"); 2170 StringBuffer expect = new StringBuffer(" \u0A02"); 2171 while(vIter.next()){ 2172 src.setCharAt(0,(char) vIter.codepoint); 2173 expect.setCharAt(0,(char) (vIter.codepoint+0x0100)); 2174 expect(trans,src.toString(),expect.toString()); 2175 } 2176 2177 expect.setCharAt(1,'\u0A70'); 2178 while(nvIter.next()){ 2179 //src.setCharAt(0,(char) nvIter.codepoint); 2180 src.setCharAt(0,(char)nvIter.codepoint); 2181 expect.setCharAt(0,(char) (nvIter.codepoint+0x0100)); 2182 expect(trans,src.toString(),expect.toString()); 2183 } 2184 } 2185 /** 2186 * Test instantiation from a locale. 2187 */ 2188 public void TestLocaleInstantiation() { 2189 Transliterator t; 2190 try{ 2191 t = Transliterator.getInstance("te_IN-Latin"); 2192 //expect(t, "\u0430", "a"); 2193 }catch(IllegalArgumentException ex){ 2194 warnln("Could not load locale data for obtaining the script used in the locale te_IN. "+ex.getMessage()); 2195 } 2196 try{ 2197 t = Transliterator.getInstance("ru_RU-Latin"); 2198 expect(t, "\u0430", "a"); 2199 }catch(IllegalArgumentException ex){ 2200 warnln("Could not load locale data for obtaining the script used in the locale ru_RU. "+ex.getMessage()); 2201 } 2202 try{ 2203 t = Transliterator.getInstance("en-el"); 2204 expect(t, "a", "\u03B1"); 2205 }catch(IllegalArgumentException ex){ 2206 warnln("Could not load locale data for obtaining the script used in the locale el. "+ ex.getMessage()); 2207 } 2208 } 2209 2210 /** 2211 * Test title case handling of accent (should ignore accents) 2212 */ 2213 public void TestTitleAccents() { 2214 Transliterator t = Transliterator.getInstance("Title"); 2215 expect(t, "a\u0300b can't abe", "A\u0300b Can't Abe"); 2216 } 2217 2218 /** 2219 * Basic test of a locale resource based rule. 2220 */ 2221 public void TestLocaleResource() { 2222 String DATA[] = { 2223 // id from to 2224 "Latin-Greek/UNGEGN", "b", "\u03bc\u03c0", 2225 "Latin-el", "b", "\u03bc\u03c0", 2226 "Latin-Greek", "b", "\u03B2", 2227 "Greek-Latin/UNGEGN", "\u03B2", "v", 2228 "el-Latin", "\u03B2", "v", 2229 "Greek-Latin", "\u03B2", "b", 2230 }; 2231 for (int i=0; i<DATA.length; i+=3) { 2232 Transliterator t = Transliterator.getInstance(DATA[i]); 2233 expect(t, DATA[i+1], DATA[i+2]); 2234 } 2235 } 2236 2237 /** 2238 * Make sure parse errors reference the right line. 2239 */ 2240 public void TestParseError() { 2241 String rule = 2242 "a > b;\n" + 2243 "# more stuff\n" + 2244 "d << b;"; 2245 try { 2246 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2247 if(t!=null){ 2248 errln("FAIL: Did not get expected exception"); 2249 } 2250 } catch (IllegalArgumentException e) { 2251 String err = e.getMessage(); 2252 if (err.indexOf("d << b") >= 0) { 2253 logln("Ok: " + err); 2254 } else { 2255 errln("FAIL: " + err); 2256 } 2257 return; 2258 } 2259 errln("FAIL: no syntax error"); 2260 } 2261 2262 /** 2263 * Make sure sets on output are disallowed. 2264 */ 2265 public void TestOutputSet() { 2266 String rule = "$set = [a-cm-n]; b > $set;"; 2267 Transliterator t = null; 2268 try { 2269 t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2270 if(t!=null){ 2271 errln("FAIL: Did not get the expected exception"); 2272 } 2273 } catch (IllegalArgumentException e) { 2274 logln("Ok: " + e.getMessage()); 2275 return; 2276 } 2277 errln("FAIL: No syntax error"); 2278 } 2279 2280 /** 2281 * Test the use variable range pragma, making sure that use of 2282 * variable range characters is detected and flagged as an error. 2283 */ 2284 public void TestVariableRange() { 2285 String rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;"; 2286 try { 2287 Transliterator t = 2288 Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2289 if(t!=null){ 2290 errln("FAIL: Did not get the expected exception"); 2291 } 2292 } catch (IllegalArgumentException e) { 2293 logln("Ok: " + e.getMessage()); 2294 return; 2295 } 2296 errln("FAIL: No syntax error"); 2297 } 2298 2299 /** 2300 * Test invalid post context error handling 2301 */ 2302 public void TestInvalidPostContext() { 2303 try { 2304 Transliterator t = 2305 Transliterator.createFromRules("ID", "a}b{c>d;", Transliterator.FORWARD); 2306 if(t!=null){ 2307 errln("FAIL: Did not get the expected exception"); 2308 } 2309 } catch (IllegalArgumentException e) { 2310 String msg = e.getMessage(); 2311 if (msg.indexOf("a}b{c") >= 0) { 2312 logln("Ok: " + msg); 2313 } else { 2314 errln("FAIL: " + msg); 2315 } 2316 return; 2317 } 2318 errln("FAIL: No syntax error"); 2319 } 2320 2321 /** 2322 * Test ID form variants 2323 */ 2324 public void TestIDForms() { 2325 String DATA[] = { 2326 "NFC", null, "NFD", 2327 "nfd", null, "NFC", // make sure case is ignored 2328 "Any-NFKD", null, "Any-NFKC", 2329 "Null", null, "Null", 2330 "-nfkc", "nfkc", "NFKD", 2331 "-nfkc/", "nfkc", "NFKD", 2332 "Latin-Greek/UNGEGN", null, "Greek-Latin/UNGEGN", 2333 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN", 2334 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali", 2335 "Source-", null, null, 2336 "Source/Variant-", null, null, 2337 "Source-/Variant", null, null, 2338 "/Variant", null, null, 2339 "/Variant-", null, null, 2340 "-/Variant", null, null, 2341 "-/", null, null, 2342 "-", null, null, 2343 "/", null, null, 2344 }; 2345 2346 for (int i=0; i<DATA.length; i+=3) { 2347 String ID = DATA[i]; 2348 String expID = DATA[i+1]; 2349 String expInvID = DATA[i+2]; 2350 boolean expValid = (expInvID != null); 2351 if (expID == null) { 2352 expID = ID; 2353 } 2354 try { 2355 Transliterator t = 2356 Transliterator.getInstance(ID); 2357 Transliterator u = t.getInverse(); 2358 if (t.getID().equals(expID) && 2359 u.getID().equals(expInvID)) { 2360 logln("Ok: " + ID + ".getInverse() => " + expInvID); 2361 } else { 2362 errln("FAIL: getInstance(" + ID + ") => " + 2363 t.getID() + " x getInverse() => " + u.getID() + 2364 ", expected " + expInvID); 2365 } 2366 } catch (IllegalArgumentException e) { 2367 if (!expValid) { 2368 logln("Ok: getInstance(" + ID + ") => " + e.getMessage()); 2369 } else { 2370 errln("FAIL: getInstance(" + ID + ") => " + e.getMessage()); 2371 } 2372 } 2373 } 2374 } 2375 2376 void checkRules(String label, Transliterator t2, String testRulesForward) { 2377 String rules2 = t2.toRules(true); 2378 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), ""); 2379 rules2 = TestUtility.replace(rules2, " ", ""); 2380 rules2 = TestUtility.replace(rules2, "\n", ""); 2381 rules2 = TestUtility.replace(rules2, "\r", ""); 2382 testRulesForward = TestUtility.replace(testRulesForward, " ", ""); 2383 2384 if (!rules2.equals(testRulesForward)) { 2385 errln(label); 2386 logln("GENERATED RULES: " + rules2); 2387 logln("SHOULD BE: " + testRulesForward); 2388 } 2389 } 2390 2391 /** 2392 * Mark's toRules test. 2393 */ 2394 public void TestToRulesMark() { 2395 2396 String testRules = 2397 "::[[:Latin:][:Mark:]];" 2398 + "::NFKD (NFC);" 2399 + "::Lower (Lower);" 2400 + "a <> \\u03B1;" // alpha 2401 + "::NFKC (NFD);" 2402 + "::Upper (Lower);" 2403 + "::Lower ();" 2404 + "::([[:Greek:][:Mark:]]);" 2405 ; 2406 String testRulesForward = 2407 "::[[:Latin:][:Mark:]];" 2408 + "::NFKD(NFC);" 2409 + "::Lower(Lower);" 2410 + "a > \\u03B1;" 2411 + "::NFKC(NFD);" 2412 + "::Upper (Lower);" 2413 + "::Lower ();" 2414 ; 2415 String testRulesBackward = 2416 "::[[:Greek:][:Mark:]];" 2417 + "::Lower (Upper);" 2418 + "::NFD(NFKC);" 2419 + "\\u03B1 > a;" 2420 + "::Lower(Lower);" 2421 + "::NFC(NFKD);" 2422 ; 2423 String source = "\u00E1"; // a-acute 2424 String target = "\u03AC"; // alpha-acute 2425 2426 Transliterator t2 = Transliterator.createFromRules("source-target", testRules, Transliterator.FORWARD); 2427 Transliterator t3 = Transliterator.createFromRules("target-source", testRules, Transliterator.REVERSE); 2428 2429 expect(t2, source, target); 2430 expect(t3, target, source); 2431 2432 checkRules("Failed toRules FORWARD", t2, testRulesForward); 2433 checkRules("Failed toRules BACKWARD", t3, testRulesBackward); 2434 } 2435 2436 /** 2437 * Test Escape and Unescape transliterators. 2438 */ 2439 public void TestEscape() { 2440 expect(Transliterator.getInstance("Hex-Any"), 2441 "\\x{40}\\U000000312Q", 2442 "@12Q"); 2443 expect(Transliterator.getInstance("Any-Hex/C"), 2444 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2445 "\\u0041\\U0010BEEF\\uFEED"); 2446 expect(Transliterator.getInstance("Any-Hex/Java"), 2447 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2448 "\\u0041\\uDBEF\\uDEEF\\uFEED"); 2449 expect(Transliterator.getInstance("Any-Hex/Perl"), 2450 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 2451 "\\x{41}\\x{10BEEF}\\x{FEED}"); 2452 } 2453 2454 /** 2455 * Make sure display names of variants look reasonable. 2456 */ 2457 public void TestDisplayName() { 2458 String DATA[] = { 2459 // ID, forward name, reverse name 2460 // Update the text as necessary -- the important thing is 2461 // not the text itself, but how various cases are handled. 2462 2463 // Basic test 2464 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any", 2465 2466 // Variants 2467 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl", 2468 2469 // Target-only IDs 2470 "NFC", "Any to NFC", "Any to NFD", 2471 }; 2472 2473 Locale US = Locale.US; 2474 2475 for (int i=0; i<DATA.length; i+=3) { 2476 String name = Transliterator.getDisplayName(DATA[i], US); 2477 if (!name.equals(DATA[i+1])) { 2478 errln("FAIL: " + DATA[i] + ".getDisplayName() => " + 2479 name + ", expected " + DATA[i+1]); 2480 } else { 2481 logln("Ok: " + DATA[i] + ".getDisplayName() => " + name); 2482 } 2483 Transliterator t = Transliterator.getInstance(DATA[i], Transliterator.REVERSE); 2484 name = Transliterator.getDisplayName(t.getID(), US); 2485 if (!name.equals(DATA[i+2])) { 2486 errln("FAIL: " + t.getID() + ".getDisplayName() => " + 2487 name + ", expected " + DATA[i+2]); 2488 } else { 2489 logln("Ok: " + t.getID() + ".getDisplayName() => " + name); 2490 } 2491 2492 // Cover getDisplayName(String) 2493 ULocale save = ULocale.getDefault(); 2494 ULocale.setDefault(ULocale.US); 2495 String name2 = Transliterator.getDisplayName(t.getID()); 2496 if (!name.equals(name2)) 2497 errln("FAIL: getDisplayName with default locale failed"); 2498 ULocale.setDefault(save); 2499 } 2500 } 2501 2502 /** 2503 * Test anchor masking 2504 */ 2505 public void TestAnchorMasking() { 2506 String rule = "^a > Q; a > q;"; 2507 try { 2508 Transliterator t = Transliterator.createFromRules("ID", rule, Transliterator.FORWARD); 2509 if(t==null){ 2510 errln("FAIL: Did not get the expected exception"); 2511 } 2512 } catch (IllegalArgumentException e) { 2513 errln("FAIL: " + rule + " => " + e); 2514 } 2515 } 2516 2517 /** 2518 * This test is not in trnstst.cpp. This test has been moved from com/ibm/icu/dev/test/lang/TestUScript.java 2519 * during ICU4J modularization to remove dependency of tests on Transliterator. 2520 */ 2521 public void TestScriptAllCodepoints(){ 2522 int code; 2523 HashSet scriptIdsChecked = new HashSet(); 2524 HashSet scriptAbbrsChecked = new HashSet(); 2525 for( int i =0; i <= 0x10ffff; i++){ 2526 code = UScript.getScript(i); 2527 if(code==UScript.INVALID_CODE){ 2528 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed"); 2529 } 2530 String id =UScript.getName(code); 2531 String abbr = UScript.getShortName(code); 2532 if (!scriptIdsChecked.contains(id)) { 2533 scriptIdsChecked.add(id); 2534 String newId ="[:"+id+":];NFD"; 2535 try{ 2536 Transliterator t = Transliterator.getInstance(newId); 2537 if(t==null){ 2538 errln("Failed to create transliterator for "+hex(i)+ 2539 " script code: " +id); 2540 } 2541 }catch(Exception e){ 2542 errln("Failed to create transliterator for "+hex(i) 2543 +" script code: " +id 2544 + " Exception: "+e.getMessage()); 2545 } 2546 } 2547 if (!scriptAbbrsChecked.contains(abbr)) { 2548 scriptAbbrsChecked.add(abbr); 2549 String newAbbrId ="[:"+abbr+":];NFD"; 2550 try{ 2551 Transliterator t = Transliterator.getInstance(newAbbrId); 2552 if(t==null){ 2553 errln("Failed to create transliterator for "+hex(i)+ 2554 " script code: " +abbr); 2555 } 2556 }catch(Exception e){ 2557 errln("Failed to create transliterator for "+hex(i) 2558 +" script code: " +abbr 2559 + " Exception: "+e.getMessage()); 2560 } 2561 } 2562 } 2563 } 2564 2565 2566 static final String[][] registerRules = { 2567 {"Any-Dev1", "x > X; y > Y;"}, 2568 {"Any-Dev2", "XY > Z"}, 2569 {"Greek-Latin/FAKE", 2570 "[^[:L:][:M:]] { \u03bc\u03c0 > b ; "+ 2571 "\u03bc\u03c0 } [^[:L:][:M:]] > b ; "+ 2572 "[^[:L:][:M:]] { [\u039c\u03bc][\u03a0\u03c0] > B ; "+ 2573 "[\u039c\u03bc][\u03a0\u03c0] } [^[:L:][:M:]] > B ;" 2574 }, 2575 }; 2576 2577 static final String DESERET_DEE = UTF16.valueOf(0x10414); 2578 static final String DESERET_dee = UTF16.valueOf(0x1043C); 2579 2580 static final String[][] testCases = { 2581 2582 // NORMALIZATION 2583 // should add more test cases 2584 {"NFD" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2585 {"NFC" , "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2586 {"NFKD", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2587 {"NFKC", "a\u0300 \u00E0 \u1100\u1161 \uFF76\uFF9E\u03D3"}, 2588 2589 // mp -> b BUG 2590 {"Greek-Latin/UNGEGN", "(\u03BC\u03C0)", "(b)"}, 2591 {"Greek-Latin/FAKE", "(\u03BC\u03C0)", "(b)"}, 2592 2593 // check for devanagari bug 2594 {"nfd;Dev1;Dev2;nfc", "xy", "Z"}, 2595 2596 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE 2597 {"Title", "ab'cD ffi\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2598 "Ab'cd Ffi\u0131ii\u0307 \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee}, 2599 //TODO: enable this test once Titlecase works right 2600 //{"Title", "\uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2601 // "Ffi\u0131ii \u01C8\u01C9\u01C9 " + DESERET_DEE + DESERET_dee}, 2602 2603 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2604 "AB'CD FFIII\u0130 \u01C7\u01C7\u01C7 " + DESERET_DEE + DESERET_DEE}, 2605 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE, 2606 "ab'cd \uFB00i\u0131ii\u0307 \u01C9\u01C9\u01C9 " + DESERET_dee + DESERET_dee}, 2607 2608 {"Upper", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE}, 2609 {"Lower", "ab'cD \uFB00i\u0131I\u0130 \u01C7\u01C8\u01C9 " + DESERET_dee + DESERET_DEE}, 2610 2611 // FORMS OF S 2612 {"Greek-Latin/UNGEGN", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"}, 2613 {"Latin-Greek/UNGEGN", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"}, 2614 {"Greek-Latin", "\u03C3 \u03C3\u03C2 \u03C2\u03C3", "s ss s\u0331s\u0331"}, 2615 {"Latin-Greek", "s ss s\u0331s\u0331", "\u03C3 \u03C3\u03C2 \u03C2\u03C3"}, 2616 2617 // Tatiana bug 2618 // Upper: TAT\u02B9\u00C2NA 2619 // Lower: tat\u02B9\u00E2na 2620 // Title: Tat\u02B9\u00E2na 2621 {"Upper", "tat\u02B9\u00E2na", "TAT\u02B9\u00C2NA"}, 2622 {"Lower", "TAT\u02B9\u00C2NA", "tat\u02B9\u00E2na"}, 2623 {"Title", "tat\u02B9\u00E2na", "Tat\u02B9\u00E2na"}, 2624 }; 2625 2626 public void TestSpecialCases() { 2627 2628 for (int i = 0; i < registerRules.length; ++i) { 2629 Transliterator t = Transliterator.createFromRules(registerRules[i][0], 2630 registerRules[i][1], Transliterator.FORWARD); 2631 DummyFactory.add(registerRules[i][0], t); 2632 } 2633 for (int i = 0; i < testCases.length; ++i) { 2634 String name = testCases[i][0]; 2635 Transliterator t = Transliterator.getInstance(name); 2636 String id = t.getID(); 2637 String source = testCases[i][1]; 2638 String target = null; 2639 2640 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe) 2641 2642 if (testCases[i].length > 2) target = testCases[i][2]; 2643 else if (id.equalsIgnoreCase("NFD")) target = android.icu.text.Normalizer.normalize(source, android.icu.text.Normalizer.NFD); 2644 else if (id.equalsIgnoreCase("NFC")) target = android.icu.text.Normalizer.normalize(source, android.icu.text.Normalizer.NFC); 2645 else if (id.equalsIgnoreCase("NFKD")) target = android.icu.text.Normalizer.normalize(source, android.icu.text.Normalizer.NFKD); 2646 else if (id.equalsIgnoreCase("NFKC")) target = android.icu.text.Normalizer.normalize(source, android.icu.text.Normalizer.NFKC); 2647 else if (id.equalsIgnoreCase("Lower")) target = UCharacter.toLowerCase(Locale.US, source); 2648 else if (id.equalsIgnoreCase("Upper")) target = UCharacter.toUpperCase(Locale.US, source); 2649 2650 expect(t, source, target); 2651 } 2652 for (int i = 0; i < registerRules.length; ++i) { 2653 Transliterator.unregister(registerRules[i][0]); 2654 } 2655 } 2656 2657 // seems like there should be an easier way to just register an instance of a transliterator 2658 2659 static class DummyFactory implements Transliterator.Factory { 2660 static DummyFactory singleton = new DummyFactory(); 2661 static HashMap m = new HashMap(); 2662 2663 // Since Transliterators are immutable, we don't have to clone on set & get 2664 static void add(String ID, Transliterator t) { 2665 m.put(ID, t); 2666 //System.out.println("Registering: " + ID + ", " + t.toRules(true)); 2667 Transliterator.registerFactory(ID, singleton); 2668 } 2669 public Transliterator getInstance(String ID) { 2670 return (Transliterator) m.get(ID); 2671 } 2672 } 2673 2674 public void TestCasing() { 2675 Transliterator toLower = Transliterator.getInstance("lower"); 2676 Transliterator toCasefold = Transliterator.getInstance("casefold"); 2677 Transliterator toUpper = Transliterator.getInstance("upper"); 2678 Transliterator toTitle = Transliterator.getInstance("title"); 2679 for (int i = 0; i < 0x600; ++i) { 2680 String s = UTF16.valueOf(i); 2681 2682 String lower = UCharacter.toLowerCase(ULocale.ROOT, s); 2683 assertEquals("Lowercase", lower, toLower.transform(s)); 2684 2685 String casefold = UCharacter.foldCase(s, true); 2686 assertEquals("Casefold", casefold, toCasefold.transform(s)); 2687 2688 String title = UCharacter.toTitleCase(ULocale.ROOT, s, null); 2689 assertEquals("Title", title, toTitle.transform(s)); 2690 2691 String upper = UCharacter.toUpperCase(ULocale.ROOT, s); 2692 assertEquals("Upper", upper, toUpper.transform(s)); 2693 } 2694 } 2695 2696 public void TestSurrogateCasing () { 2697 // check that casing handles surrogates 2698 // titlecase is currently defective 2699 int dee = UTF16.charAt(DESERET_dee,0); 2700 int DEE = UCharacter.toTitleCase(dee); 2701 if (!UTF16.valueOf(DEE).equals(DESERET_DEE)) { 2702 errln("Fails titlecase of surrogates" + Integer.toString(dee,16) + ", " + Integer.toString(DEE,16)); 2703 } 2704 2705 if (!UCharacter.toUpperCase(DESERET_dee + DESERET_DEE).equals(DESERET_DEE + DESERET_DEE)) { 2706 errln("Fails uppercase of surrogates"); 2707 } 2708 2709 if (!UCharacter.toLowerCase(DESERET_dee + DESERET_DEE).equals(DESERET_dee + DESERET_dee)) { 2710 errln("Fails lowercase of surrogates"); 2711 } 2712 } 2713 2714 // Check to see that incremental gets at least part way through a reasonable string. 2715 2716 public void TestIncrementalProgress() { 2717 String latinTest = "The Quick Brown Fox."; 2718 String devaTest = Transliterator.getInstance("Latin-Devanagari").transliterate(latinTest); 2719 String kataTest = Transliterator.getInstance("Latin-Katakana").transliterate(latinTest); 2720 String[][] tests = { 2721 {"Any", latinTest}, 2722 {"Latin", latinTest}, 2723 {"Halfwidth", latinTest}, 2724 {"Devanagari", devaTest}, 2725 {"Katakana", kataTest}, 2726 }; 2727 2728 Enumeration sources = Transliterator.getAvailableSources(); 2729 while(sources.hasMoreElements()) { 2730 String source = (String) sources.nextElement(); 2731 String test = findMatch(source, tests); 2732 if (test == null) { 2733 logln("Skipping " + source + "-X"); 2734 continue; 2735 } 2736 Enumeration targets = Transliterator.getAvailableTargets(source); 2737 while(targets.hasMoreElements()) { 2738 String target = (String) targets.nextElement(); 2739 Enumeration variants = Transliterator.getAvailableVariants(source, target); 2740 while(variants.hasMoreElements()) { 2741 String variant = (String) variants.nextElement(); 2742 String id = source + "-" + target + "/" + variant; 2743 logln("id: " + id); 2744 2745 String filter = getTranslitTestFilter(); 2746 if (filter != null && id.indexOf(filter) < 0) continue; 2747 2748 Transliterator t = Transliterator.getInstance(id); 2749 CheckIncrementalAux(t, test); 2750 2751 String rev = t.transliterate(test); 2752 Transliterator inv = t.getInverse(); 2753 CheckIncrementalAux(inv, rev); 2754 } 2755 } 2756 } 2757 } 2758 2759 public String findMatch (String source, String[][] pairs) { 2760 for (int i = 0; i < pairs.length; ++i) { 2761 if (source.equalsIgnoreCase(pairs[i][0])) return pairs[i][1]; 2762 } 2763 return null; 2764 } 2765 2766 public void CheckIncrementalAux(Transliterator t, String input) { 2767 2768 Replaceable test = new ReplaceableString(input); 2769 Transliterator.Position pos = new Transliterator.Position(0, test.length(), 0, test.length()); 2770 t.transliterate(test, pos); 2771 boolean gotError = false; 2772 2773 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X? 2774 2775 if (pos.start == 0 && pos.limit != 0 && !t.getID().equals("Hex-Any/Unicode")) { 2776 errln("No Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos)); 2777 gotError = true; 2778 } else { 2779 logln("PASS Progress, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos)); 2780 } 2781 t.finishTransliteration(test, pos); 2782 if (pos.start != pos.limit) { 2783 errln("Incomplete, " + t.getID() + ": " + UtilityExtensions.formatInput(test, pos)); 2784 gotError = true; 2785 } 2786 if(!gotError){ 2787 //errln("FAIL: Did not get expected error"); 2788 } 2789 } 2790 2791 public void TestFunction() { 2792 // Careful with spacing and ';' here: Phrase this exactly 2793 // as toRules() is going to return it. If toRules() changes 2794 // with regard to spacing or ';', then adjust this string. 2795 String rule = 2796 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';"; 2797 2798 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2799 if (t == null) { 2800 errln("FAIL: createFromRules failed"); 2801 return; 2802 } 2803 2804 String r = t.toRules(true); 2805 if (r.equals(rule)) { 2806 logln("OK: toRules() => " + r); 2807 } else { 2808 errln("FAIL: toRules() => " + r + 2809 ", expected " + rule); 2810 } 2811 2812 expect(t, "The Quick Brown Fox", 2813 "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"); 2814 rule = 2815 "([^\\ -\\u007F]) > &Hex/Unicode( $1 ) ' ' &Name( $1 ) ;"; 2816 2817 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2818 if (t == null) { 2819 errln("FAIL: createFromRules failed"); 2820 return; 2821 } 2822 2823 r = t.toRules(true); 2824 if (r.equals(rule)) { 2825 logln("OK: toRules() => " + r); 2826 } else { 2827 errln("FAIL: toRules() => " + r + 2828 ", expected " + rule); 2829 } 2830 2831 expect(t, "\u0301", 2832 "U+0301 \\N{COMBINING ACUTE ACCENT}"); 2833 } 2834 2835 public void TestInvalidBackRef() { 2836 String rule = ". > $1;"; 2837 String rule2 ="(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\u0020;"; 2838 try { 2839 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2840 if (t != null) { 2841 errln("FAIL: createFromRules should have returned NULL"); 2842 } 2843 errln("FAIL: Ok: . > $1; => no error"); 2844 Transliterator t2= Transliterator.createFromRules("Test2", rule2, Transliterator.FORWARD); 2845 if (t2 != null) { 2846 errln("FAIL: createFromRules should have returned NULL"); 2847 } 2848 errln("FAIL: Ok: . > $1; => no error"); 2849 } catch (IllegalArgumentException e) { 2850 logln("Ok: . > $1; => " + e.getMessage()); 2851 } 2852 } 2853 2854 public void TestMulticharStringSet() { 2855 // Basic testing 2856 String rule = 2857 " [{aa}] > x;" + 2858 " a > y;" + 2859 " [b{bc}] > z;" + 2860 "[{gd}] { e > q;" + 2861 " e } [{fg}] > r;" ; 2862 2863 Transliterator t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2864 if (t == null) { 2865 errln("FAIL: createFromRules failed"); 2866 return; 2867 } 2868 2869 expect(t, "a aa ab bc d gd de gde gdefg ddefg", 2870 "y x yz z d gd de gdq gdqfg ddrfg"); 2871 2872 // Overlapped string test. Make sure that when multiple 2873 // strings can match that the longest one is matched. 2874 rule = 2875 " [a {ab} {abc}] > x;" + 2876 " b > y;" + 2877 " c > z;" + 2878 " q [t {st} {rst}] { e > p;" ; 2879 2880 t = Transliterator.createFromRules("Test", rule, Transliterator.FORWARD); 2881 if (t == null) { 2882 errln("FAIL: createFromRules failed"); 2883 return; 2884 } 2885 2886 expect(t, "a ab abc qte qste qrste", 2887 "x x x qtp qstp qrstp"); 2888 } 2889 2890 /** 2891 * Test that user-registered transliterators can be used under function 2892 * syntax. 2893 */ 2894 public void TestUserFunction() { 2895 Transliterator t; 2896 2897 // There's no need to register inverses if we don't use them 2898 TestUserFunctionFactory.add("Any-gif", 2899 Transliterator.createFromRules("gif", 2900 "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';", 2901 Transliterator.FORWARD)); 2902 //TestUserFunctionFactory.add("gif-Any", Transliterator.getInstance("Any-Null")); 2903 2904 TestUserFunctionFactory.add("Any-RemoveCurly", 2905 Transliterator.createFromRules("RemoveCurly", "[\\{\\}] > ; \\\\N > ;", Transliterator.FORWARD)); 2906 //TestUserFunctionFactory.add("RemoveCurly-Any", Transliterator.getInstance("Any-Null")); 2907 2908 logln("Trying &hex"); 2909 t = Transliterator.createFromRules("hex2", "(.) > &hex($1);", Transliterator.FORWARD); 2910 logln("Registering"); 2911 TestUserFunctionFactory.add("Any-hex2", t); 2912 t = Transliterator.getInstance("Any-hex2"); 2913 expect(t, "abc", "\\u0061\\u0062\\u0063"); 2914 2915 logln("Trying &gif"); 2916 t = Transliterator.createFromRules("gif2", "(.) > &Gif(&Hex2($1));", Transliterator.FORWARD); 2917 logln("Registering"); 2918 TestUserFunctionFactory.add("Any-gif2", t); 2919 t = Transliterator.getInstance("Any-gif2"); 2920 expect(t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" + 2921 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">"); 2922 2923 // Test that filters are allowed after & 2924 t = Transliterator.createFromRules("test", 2925 "(.) > &Hex($1) ' ' &Any-RemoveCurly(&Name($1)) ' ';", Transliterator.FORWARD); 2926 expect(t, "abc", "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "); 2927 2928 // Unregister our test stuff 2929 TestUserFunctionFactory.unregister(); 2930 } 2931 2932 static class TestUserFunctionFactory implements Transliterator.Factory { 2933 static TestUserFunctionFactory singleton = new TestUserFunctionFactory(); 2934 static HashMap m = new HashMap(); 2935 2936 static void add(String ID, Transliterator t) { 2937 m.put(new CaseInsensitiveString(ID), t); 2938 Transliterator.registerFactory(ID, singleton); 2939 } 2940 2941 public Transliterator getInstance(String ID) { 2942 return (Transliterator) m.get(new CaseInsensitiveString(ID)); 2943 } 2944 2945 static void unregister() { 2946 Iterator ids = m.keySet().iterator(); 2947 while (ids.hasNext()) { 2948 CaseInsensitiveString id = (CaseInsensitiveString) ids.next(); 2949 Transliterator.unregister(id.getString()); 2950 ids.remove(); // removes pair from m 2951 } 2952 } 2953 } 2954 2955 /** 2956 * Test the Any-X transliterators. 2957 */ 2958 public void TestAnyX() { 2959 Transliterator anyLatin = 2960 Transliterator.getInstance("Any-Latin", Transliterator.FORWARD); 2961 2962 expect(anyLatin, 2963 "greek:\u03B1\u03B2\u03BA\u0391\u0392\u039A hiragana:\u3042\u3076\u304F cyrillic:\u0430\u0431\u0446", 2964 "greek:abkABK hiragana:abuku cyrillic:abc"); 2965 } 2966 2967 /** 2968 * Test Any-X transliterators with sample letters from all scripts. 2969 */ 2970 public void TestAny() { 2971 UnicodeSet alphabetic = (UnicodeSet) new UnicodeSet("[:alphabetic:]").freeze(); 2972 StringBuffer testString = new StringBuffer(); 2973 for (int i = 0; i < UScript.CODE_LIMIT; ++i) { 2974 UnicodeSet sample = new UnicodeSet().applyPropertyAlias("script", UScript.getShortName(i)).retainAll(alphabetic); 2975 int count = 5; 2976 for (UnicodeSetIterator it = new UnicodeSetIterator(sample); it.next();) { 2977 testString.append(it.getString()); 2978 if (--count < 0) break; 2979 } 2980 } 2981 logln("Sample set for Any-Latin: " + testString); 2982 Transliterator anyLatin = Transliterator.getInstance("any-Latn"); 2983 String result = anyLatin.transliterate(testString.toString()); 2984 logln("Sample result for Any-Latin: " + result); 2985 } 2986 2987 2988 /** 2989 * Test the source and target set API. These are only implemented 2990 * for RBT and CompoundTransliterator at this time. 2991 */ 2992 public void TestSourceTargetSet() { 2993 // Rules 2994 String r = 2995 "a > b; " + 2996 "r [x{lu}] > q;"; 2997 2998 // Expected source 2999 UnicodeSet expSrc = new UnicodeSet("[arx{lu}]"); 3000 3001 // Expected target 3002 UnicodeSet expTrg = new UnicodeSet("[bq]"); 3003 3004 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD); 3005 UnicodeSet src = t.getSourceSet(); 3006 UnicodeSet trg = t.getTargetSet(); 3007 3008 if (src.equals(expSrc) && trg.equals(expTrg)) { 3009 logln("Ok: " + r + " => source = " + src.toPattern(true) + 3010 ", target = " + trg.toPattern(true)); 3011 } else { 3012 errln("FAIL: " + r + " => source = " + src.toPattern(true) + 3013 ", expected " + expSrc.toPattern(true) + 3014 "; target = " + trg.toPattern(true) + 3015 ", expected " + expTrg.toPattern(true)); 3016 } 3017 } 3018 3019 public void TestSourceTargetSet2() { 3020 3021 3022 Normalizer2 nfc = Normalizer2.getNFCInstance(); 3023 Normalizer2 nfd = Normalizer2.getNFDInstance(); 3024 3025 // Normalizer2 nfkd = Normalizer2.getInstance(null, "nfkd", Mode.DECOMPOSE); 3026 // UnicodeSet nfkdSource = new UnicodeSet(); 3027 // UnicodeSet nfkdTarget = new UnicodeSet(); 3028 // for (int i = 0; i <= 0x10FFFF; ++i) { 3029 // if (nfkd.isInert(i)) { 3030 // continue; 3031 // } 3032 // nfkdSource.add(i); 3033 // String t = nfkd.getDecomposition(i); 3034 // if (t != null) { 3035 // nfkdTarget.addAll(t); 3036 // } else { 3037 // nfkdTarget.add(i); 3038 // } 3039 // } 3040 // nfkdSource.freeze(); 3041 // nfkdTarget.freeze(); 3042 // logln("NFKD Source: " + nfkdSource.toPattern(false)); 3043 // logln("NFKD Target: " + nfkdTarget.toPattern(false)); 3044 3045 UnicodeMap<UnicodeSet> leadToTrail = new UnicodeMap(); 3046 UnicodeMap<UnicodeSet> leadToSources = new UnicodeMap(); 3047 UnicodeSet nonStarters = new UnicodeSet("[:^ccc=0:]").freeze(); 3048 CanonicalIterator can = new CanonicalIterator(""); 3049 3050 UnicodeSet disorderedMarks = new UnicodeSet(); 3051 3052 for (int i = 0; i <= 0x10FFFF; ++i) { 3053 String s = nfd.getDecomposition(i); 3054 if (s == null) { 3055 continue; 3056 } 3057 3058 can.setSource(s); 3059 for (String t = can.next(); t != null; t = can.next()) { 3060 disorderedMarks.add(t); 3061 } 3062 3063 // if s has two code points, (or more), add the lead/trail information 3064 int first = s.codePointAt(0); 3065 int firstCount = Character.charCount(first); 3066 if (s.length() == firstCount) continue; 3067 String trailString = s.substring(firstCount); 3068 3069 // add all the trail characters 3070 if (!nonStarters.containsSome(trailString)) { 3071 continue; 3072 } 3073 UnicodeSet trailSet = leadToTrail.get(first); 3074 if (trailSet == null) { 3075 leadToTrail.put(first, trailSet = new UnicodeSet()); 3076 } 3077 trailSet.addAll(trailString); // add remaining trails 3078 3079 // add the sources 3080 UnicodeSet sourcesSet = leadToSources.get(first); 3081 if (sourcesSet == null) { 3082 leadToSources.put(first, sourcesSet = new UnicodeSet()); 3083 } 3084 sourcesSet.add(i); 3085 } 3086 3087 3088 for (Entry<String, UnicodeSet> x : leadToSources.entrySet()) { 3089 String lead = x.getKey(); 3090 UnicodeSet sources = x.getValue(); 3091 UnicodeSet trailSet = leadToTrail.get(lead); 3092 for (String source : sources) { 3093 for (String trail : trailSet) { 3094 can.setSource(source + trail); 3095 for (String t = can.next(); t != null; t = can.next()) { 3096 if (t.endsWith(trail)) continue; 3097 disorderedMarks.add(t); 3098 } 3099 } 3100 } 3101 } 3102 3103 3104 for (String s : nonStarters) { 3105 disorderedMarks.add("\u0345" + s); 3106 disorderedMarks.add(s+"\u0323"); 3107 String xx = nfc.normalize("\u01EC" + s); 3108 if (!xx.startsWith("\u01EC")) { 3109 logln("??"); 3110 } 3111 } 3112 3113 // for (int i = 0; i <= 0x10FFFF; ++i) { 3114 // String s = nfkd.getDecomposition(i); 3115 // if (s != null) { 3116 // disorderedMarks.add(s); 3117 // disorderedMarks.add(nfc.normalize(s)); 3118 // addDerivedStrings(nfc, disorderedMarks, s); 3119 // } 3120 // s = nfd.getDecomposition(i); 3121 // if (s != null) { 3122 // disorderedMarks.add(s); 3123 // } 3124 // if (!nfc.isInert(i)) { 3125 // if (i == 0x00C0) { 3126 // logln("\u00C0"); 3127 // } 3128 // can.setSource(s+"\u0334"); 3129 // for (String t = can.next(); t != null; t = can.next()) { 3130 // addDerivedStrings(nfc, disorderedMarks, t); 3131 // } 3132 // can.setSource(s+"\u0345"); 3133 // for (String t = can.next(); t != null; t = can.next()) { 3134 // addDerivedStrings(nfc, disorderedMarks, t); 3135 // } 3136 // can.setSource(s+"\u0323"); 3137 // for (String t = can.next(); t != null; t = can.next()) { 3138 // addDerivedStrings(nfc, disorderedMarks, t); 3139 // } 3140 // } 3141 // } 3142 logln("Test cases: " + disorderedMarks.size()); 3143 disorderedMarks.addAll(0,0x10FFFF).freeze(); 3144 logln("isInert \u0104 " + nfc.isInert('\u0104')); 3145 3146 Object[][] rules = { 3147 {":: [:sc=COMMON:] any-name;", null}, 3148 3149 {":: [:Greek:] hex-any/C;", null}, 3150 {":: [:Greek:] any-hex/C;", null}, 3151 3152 {":: [[:Mn:][:Me:]] remove;", null}, 3153 {":: [[:Mn:][:Me:]] null;", null}, 3154 3155 3156 {":: lower;", null}, 3157 {":: upper;", null}, 3158 {":: title;", null}, 3159 {":: CaseFold;", null}, 3160 3161 {":: NFD;", null}, 3162 {":: NFC;", null}, 3163 {":: NFKD;", null}, 3164 {":: NFKC;", null}, 3165 3166 {":: [[:Mn:][:Me:]] NFKD;", null}, 3167 {":: Latin-Greek;", null}, 3168 {":: [:Latin:] NFKD;", null}, 3169 {":: NFKD;", null}, 3170 {":: NFKD;\n" + 3171 ":: [[:Mn:][:Me:]] remove;\n" + 3172 ":: NFC;", null}, 3173 }; 3174 for (Object[] rulex : rules) { 3175 String rule = (String) rulex[0]; 3176 Transliterator trans = Transliterator.createFromRules("temp", rule, Transliterator.FORWARD); 3177 UnicodeSet actualSource = trans.getSourceSet(); 3178 UnicodeSet actualTarget = trans.getTargetSet(); 3179 UnicodeSet empiricalSource = new UnicodeSet(); 3180 UnicodeSet empiricalTarget = new UnicodeSet(); 3181 String ruleDisplay = rule.replace("\n", "\t\t"); 3182 UnicodeSet toTest = disorderedMarks; 3183 // if (rulex[1] != null) { 3184 // toTest = new UnicodeSet(disorderedMarks); 3185 // toTest.addAll((UnicodeSet) rulex[1]); 3186 // } 3187 3188 String test = nfd.normalize("\u0104"); 3189 boolean DEBUG = true; 3190 @SuppressWarnings("unused") 3191 int count = 0; // for debugging 3192 for (String s : toTest) { 3193 if (s.equals(test)) { 3194 logln(test); 3195 } 3196 String t = trans.transform(s); 3197 if (!s.equals(t)) { 3198 if (!isAtomic(s, t, trans)) { 3199 isAtomic(s, t, trans); 3200 continue; 3201 } 3202 3203 // only keep the part that changed; so skip the front and end. 3204 // int start = findSharedStartLength(s,t); 3205 // int end = findSharedEndLength(s,t); 3206 // if (start != 0 || end != 0) { 3207 // s = s.substring(start, s.length() - end); 3208 // t = t.substring(start, t.length() - end); 3209 // } 3210 if (DEBUG) { 3211 if (!actualSource.containsAll(s)) { 3212 count++; 3213 } 3214 if (!actualTarget.containsAll(t)) { 3215 count++; 3216 } 3217 } 3218 addSourceTarget(s, empiricalSource, t, empiricalTarget); 3219 } 3220 } 3221 assertEquals("getSource(" + ruleDisplay + ")", empiricalSource, actualSource, SetAssert.MISSING_OK); 3222 assertEquals("getTarget(" + ruleDisplay + ")", empiricalTarget, actualTarget, SetAssert.MISSING_OK); 3223 } 3224 } 3225 3226 public void TestSourceTargetSetFilter() { 3227 String[][] tests = { 3228 // rules, expectedTarget-FORWARD, expectedTarget-REVERSE 3229 {"[] Latin-Greek", null, "[\']"}, 3230 {"::[] ; ::NFD ; ::NFKC ; :: ([]) ;"}, 3231 {"[] Any-Latin"}, 3232 {"[] casefold"}, 3233 {"[] NFKD;"}, 3234 {"[] NFKC;"}, 3235 {"[] hex"}, 3236 {"[] lower"}, 3237 {"[] null"}, 3238 {"[] remove"}, 3239 {"[] title"}, 3240 {"[] upper"}, 3241 }; 3242 UnicodeSet expectedSource = UnicodeSet.EMPTY; 3243 for (String[] testPair : tests) { 3244 String test = testPair[0]; 3245 Transliterator t0; 3246 try { 3247 t0 = Transliterator.getInstance(test); 3248 } catch (Exception e) { 3249 t0 = Transliterator.createFromRules("temp", test, Transliterator.FORWARD); 3250 } 3251 Transliterator t1; 3252 try { 3253 t1 = t0.getInverse(); 3254 } catch (Exception e) { 3255 t1 = Transliterator.createFromRules("temp", test, Transliterator.REVERSE); 3256 } 3257 int targetIndex = 0; 3258 for (Transliterator t : new Transliterator[]{t0, t1}) { 3259 boolean ok; 3260 UnicodeSet source = t.getSourceSet(); 3261 String direction = t == t0 ? "FORWARD\t" : "REVERSE\t"; 3262 targetIndex++; 3263 UnicodeSet expectedTarget = testPair.length <= targetIndex ? expectedSource 3264 : testPair[targetIndex] == null ? expectedSource 3265 : testPair[targetIndex].length() == 0 ? expectedSource 3266 : new UnicodeSet(testPair[targetIndex]); 3267 ok = assertEquals(direction + "getSource\t\"" + test + '"', expectedSource, source); 3268 if (!ok) { // for debugging 3269 source = t.getSourceSet(); 3270 } 3271 UnicodeSet target = t.getTargetSet(); 3272 ok = assertEquals(direction + "getTarget\t\"" + test + '"', expectedTarget, target); 3273 if (!ok) { // for debugging 3274 target = t.getTargetSet(); 3275 } 3276 } 3277 } 3278 } 3279 3280 private boolean isAtomic(String s, String t, Transliterator trans) { 3281 for (int i = 1; i < s.length(); ++i) { 3282 if (!CharSequences.onCharacterBoundary(s, i)) { 3283 continue; 3284 } 3285 String q = trans.transform(s.substring(0,i)); 3286 if (t.startsWith(q)) { 3287 String r = trans.transform(s.substring(i)); 3288 if (t.length() == q.length() + r.length() && t.endsWith(r)) { 3289 return false; 3290 } 3291 } 3292 } 3293 return true; 3294 // // make sure that every part is different 3295 // if (s.codePointCount(0, s.length()) > 1) { 3296 // int[] codePoints = It.codePoints(s); 3297 // for (int k = 0; k < codePoints.length; ++k) { 3298 // int pos = indexOf(t,codePoints[k]); 3299 // if (pos >= 0) { 3300 // int x; 3301 // } 3302 // } 3303 // if (s.contains("\u00C0")) { 3304 // logln("\u00C0"); 3305 // } 3306 // } 3307 } 3308 3309 private void addSourceTarget(String s, UnicodeSet expectedSource, String t, UnicodeSet expectedTarget) { 3310 expectedSource.addAll(s); 3311 if (t.length() > 0) { 3312 expectedTarget.addAll(t); 3313 } 3314 } 3315 3316// private void addDerivedStrings(Normalizer2 nfc, UnicodeSet disorderedMarks, String s) { 3317// disorderedMarks.add(s); 3318// for (int j = 1; j < s.length(); ++j) { 3319// if (CharSequences.onCharacterBoundary(s, j)) { 3320// String shorter = s.substring(0,j); 3321// disorderedMarks.add(shorter); 3322// disorderedMarks.add(nfc.normalize(shorter) + s.substring(j)); 3323// } 3324// } 3325// } 3326 3327 public void TestCharUtils() { 3328 String[][] startTests = { 3329 {"1", "a", "ab"}, 3330 {"0", "a", "xb"}, 3331 {"0", "\uD800", "\uD800\uDC01"}, 3332 {"1", "\uD800a", "\uD800b"}, 3333 {"0", "\uD800\uDC00", "\uD800\uDC01"}, 3334 }; 3335 for (String[] row : startTests) { 3336 int actual = findSharedStartLength(row[1], row[2]); 3337 assertEquals("findSharedStartLength(" + row[1] + "," + row[2] + ")", 3338 Integer.parseInt(row[0]), 3339 actual); 3340 } 3341 String[][] endTests = { 3342 {"0", "\uDC00", "\uD801\uDC00"}, 3343 {"1", "a", "ba"}, 3344 {"0", "a", "bx"}, 3345 {"1", "a\uDC00", "b\uDC00"}, 3346 {"0", "\uD800\uDC00", "\uD801\uDC00"}, 3347 }; 3348 for (String[] row : endTests) { 3349 int actual = findSharedEndLength(row[1], row[2]); 3350 assertEquals("findSharedEndLength(" + row[1] + "," + row[2] + ")", 3351 Integer.parseInt(row[0]), 3352 actual); 3353 } 3354 } 3355 3356 /** 3357 * @param s 3358 * @param t 3359 * @return 3360 */ 3361 // TODO make generally available 3362 private static int findSharedStartLength(CharSequence s, CharSequence t) { 3363 int min = Math.min(s.length(), t.length()); 3364 int i; 3365 char sch, tch; 3366 for (i = 0; i < min; ++i) { 3367 sch = s.charAt(i); 3368 tch = t.charAt(i); 3369 if (sch != tch) { 3370 break; 3371 } 3372 } 3373 return CharSequences.onCharacterBoundary(s,i) && CharSequences.onCharacterBoundary(t,i) ? i : i - 1; 3374 } 3375 3376 /** 3377 * @param s 3378 * @param t 3379 * @return 3380 */ 3381 // TODO make generally available 3382 private static int findSharedEndLength(CharSequence s, CharSequence t) { 3383 int slength = s.length(); 3384 int tlength = t.length(); 3385 int min = Math.min(slength, tlength); 3386 int i; 3387 char sch, tch; 3388 // TODO can make the calculations slightly faster... Not sure if it is worth the complication, tho' 3389 for (i = 0; i < min; ++i) { 3390 sch = s.charAt(slength - i - 1); 3391 tch = t.charAt(tlength - i - 1); 3392 if (sch != tch) { 3393 break; 3394 } 3395 } 3396 return CharSequences.onCharacterBoundary(s,slength - i) && CharSequences.onCharacterBoundary(t,tlength - i) ? i : i - 1; 3397 } 3398 3399 enum SetAssert {EQUALS, MISSING_OK, EXTRA_OK} 3400 3401 void assertEquals(String message, UnicodeSet empirical, UnicodeSet actual, SetAssert setAssert) { 3402 boolean haveError = false; 3403 if (!actual.containsAll(empirical)) { 3404 UnicodeSet missing = new UnicodeSet(empirical).removeAll(actual); 3405 errln(message + " \tgetXSet < empirical (" + missing.size() + "): " + toPattern(missing)); 3406 haveError = true; 3407 } 3408 if (!empirical.containsAll(actual)) { 3409 UnicodeSet extra = new UnicodeSet(actual).removeAll(empirical); 3410 logln("WARNING: " + message + " \tgetXSet > empirical (" + extra.size() + "): " + toPattern(extra)); 3411 haveError = true; 3412 } 3413 if (!haveError) { 3414 logln("OK " + message + ' ' + toPattern(empirical)); 3415 } 3416 } 3417 3418 private String toPattern(UnicodeSet missing) { 3419 String result = missing.toPattern(false); 3420 if (result.length() < 200) { 3421 return result; 3422 } 3423 return result.substring(0, CharSequences.onCharacterBoundary(result, 200) ? 200 : 199) + "\u2026"; 3424 } 3425 3426 3427 /** 3428 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet. 3429 */ 3430 public void TestPatternWhitespace() { 3431 // Rules 3432 String r = "a > \u200E b;"; 3433 3434 Transliterator t = Transliterator.createFromRules("test", r, Transliterator.FORWARD); 3435 3436 expect(t, "a", "b"); 3437 3438 // UnicodeSet 3439 UnicodeSet set = new UnicodeSet("[a \u200E]"); 3440 3441 if (set.contains(0x200E)) { 3442 errln("FAIL: U+200E not being ignored by UnicodeSet"); 3443 } 3444 } 3445 3446 public void TestAlternateSyntax() { 3447 // U+2206 == & 3448 // U+2190 == < 3449 // U+2192 == > 3450 // U+2194 == <> 3451 expect("a \u2192 x; b \u2190 y; c \u2194 z", 3452 "abc", 3453 "xbz"); 3454 expect("([:^ASCII:]) \u2192 \u2206Name($1);", 3455 "<=\u2190; >=\u2192; <>=\u2194; &=\u2206", 3456 "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"); 3457 } 3458 3459 public void TestPositionAPI() { 3460 Transliterator.Position a = new Transliterator.Position(3,5,7,11); 3461 Transliterator.Position b = new Transliterator.Position(a); 3462 Transliterator.Position c = new Transliterator.Position(); 3463 c.set(a); 3464 // Call the toString() API: 3465 if (a.equals(b) && a.equals(c)) { 3466 logln("Ok: " + a + " == " + b + " == " + c); 3467 } else { 3468 errln("FAIL: " + a + " != " + b + " != " + c); 3469 } 3470 } 3471 3472 //====================================================================== 3473 // New tests for the ::BEGIN/::END syntax 3474 //====================================================================== 3475 3476 private static final String[] BEGIN_END_RULES = new String[] { 3477 // [0] 3478 "abc > xy;" 3479 + "aba > z;", 3480 3481 // [1] 3482 /* 3483 "::BEGIN;" 3484 + "abc > xy;" 3485 + "::END;" 3486 + "::BEGIN;" 3487 + "aba > z;" 3488 + "::END;", 3489 */ 3490 "", // test case commented out below, this is here to keep from messing up the indexes 3491 3492 // [2] 3493 /* 3494 "abc > xy;" 3495 + "::BEGIN;" 3496 + "aba > z;" 3497 + "::END;", 3498 */ 3499 "", // test case commented out below, this is here to keep from messing up the indexes 3500 3501 // [3] 3502 /* 3503 "::BEGIN;" 3504 + "abc > xy;" 3505 + "::END;" 3506 + "aba > z;", 3507 */ 3508 "", // test case commented out below, this is here to keep from messing up the indexes 3509 3510 // [4] 3511 "abc > xy;" 3512 + "::Null;" 3513 + "aba > z;", 3514 3515 // [5] 3516 "::Upper;" 3517 + "ABC > xy;" 3518 + "AB > x;" 3519 + "C > z;" 3520 + "::Upper;" 3521 + "XYZ > p;" 3522 + "XY > q;" 3523 + "Z > r;" 3524 + "::Upper;", 3525 3526 // [6] 3527 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3528 + "$delim = [\\-$ws];" 3529 + "$ws $delim* > ' ';" 3530 + "'-' $delim* > '-';", 3531 3532 // [7] 3533 "::Null;" 3534 + "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3535 + "$delim = [\\-$ws];" 3536 + "$ws $delim* > ' ';" 3537 + "'-' $delim* > '-';", 3538 3539 // [8] 3540 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3541 + "$delim = [\\-$ws];" 3542 + "$ws $delim* > ' ';" 3543 + "'-' $delim* > '-';" 3544 + "::Null;", 3545 3546 // [9] 3547 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3548 + "$delim = [\\-$ws];" 3549 + "::Null;" 3550 + "$ws $delim* > ' ';" 3551 + "'-' $delim* > '-';", 3552 3553 // [10] 3554 /* 3555 "::BEGIN;" 3556 + "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3557 + "$delim = [\\-$ws];" 3558 + "::END;" 3559 + "$ws $delim* > ' ';" 3560 + "'-' $delim* > '-';", 3561 */ 3562 "", // test case commented out below, this is here to keep from messing up the indexes 3563 3564 // [11] 3565 /* 3566 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3567 + "$delim = [\\-$ws];" 3568 + "::BEGIN;" 3569 + "$ws $delim* > ' ';" 3570 + "'-' $delim* > '-';" 3571 + "::END;", 3572 */ 3573 "", // test case commented out below, this is here to keep from messing up the indexes 3574 3575 // [12] 3576 /* 3577 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3578 + "$delim = [\\-$ws];" 3579 + "$ab = [ab];" 3580 + "::BEGIN;" 3581 + "$ws $delim* > ' ';" 3582 + "'-' $delim* > '-';" 3583 + "::END;" 3584 + "::BEGIN;" 3585 + "$ab { ' ' } $ab > '-';" 3586 + "c { ' ' > ;" 3587 + "::END;" 3588 + "::BEGIN;" 3589 + "'a-a' > a\\%|a;" 3590 + "::END;", 3591 */ 3592 "", // test case commented out below, this is here to keep from messing up the indexes 3593 3594 // [13] 3595 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 3596 + "$delim = [\\-$ws];" 3597 + "$ab = [ab];" 3598 + "::Null;" 3599 + "$ws $delim* > ' ';" 3600 + "'-' $delim* > '-';" 3601 + "::Null;" 3602 + "$ab { ' ' } $ab > '-';" 3603 + "c { ' ' > ;" 3604 + "::Null;" 3605 + "'a-a' > a\\%|a;", 3606 3607 // [14] 3608 /* 3609 "::[abc];" 3610 + "::BEGIN;" 3611 + "abc > xy;" 3612 + "::END;" 3613 + "::BEGIN;" 3614 + "aba > yz;" 3615 + "::END;" 3616 + "::Upper;", 3617 */ 3618 "", // test case commented out below, this is here to keep from messing up the indexes 3619 3620 // [15] 3621 "::[abc];" 3622 + "abc > xy;" 3623 + "::Null;" 3624 + "aba > yz;" 3625 + "::Upper;", 3626 3627 // [16] 3628 /* 3629 "::[abc];" 3630 + "::BEGIN;" 3631 + "abc <> xy;" 3632 + "::END;" 3633 + "::BEGIN;" 3634 + "aba <> yz;" 3635 + "::END;" 3636 + "::Upper(Lower);" 3637 + "::([XYZ]);", 3638 */ 3639 "", // test case commented out below, this is here to keep from messing up the indexes 3640 3641 // [17] 3642 "::[abc];" 3643 + "abc <> xy;" 3644 + "::Null;" 3645 + "aba <> yz;" 3646 + "::Upper(Lower);" 3647 + "::([XYZ]);" 3648 }; 3649 3650 /* 3651(This entire test is commented out below and will need some heavy revision when we re-add 3652the ::BEGIN/::END stuff) 3653 private static final String[] BOGUS_BEGIN_END_RULES = new String[] { 3654 // [7] 3655 "::BEGIN;" 3656 + "abc > xy;" 3657 + "::BEGIN;" 3658 + "aba > z;" 3659 + "::END;" 3660 + "::END;", 3661 3662 // [8] 3663 "abc > xy;" 3664 + " aba > z;" 3665 + "::END;", 3666 3667 // [9] 3668 "::BEGIN;" 3669 + "::Upper;" 3670 + "::END;" 3671 }; 3672 */ 3673 3674 private static final String[] BEGIN_END_TEST_CASES = new String[] { 3675 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z", 3676 // BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z", 3677 // BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z", 3678 // BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z", 3679 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z", 3680 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR", 3681 3682 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e", 3683 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e", 3684 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e", 3685 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e", 3686 // BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e", 3687 // BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e", 3688 // BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e", 3689 // BEGIN_END_RULES[12], "a a a a", "a%a%a%a", 3690 // BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a", 3691 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e", 3692 BEGIN_END_RULES[13], "a a a a", "a%a%a%a", 3693 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a", 3694 3695 // BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3696 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3697 // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 3698 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ" 3699 }; 3700 3701 public void TestBeginEnd() { 3702 // run through the list of test cases above 3703 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) { 3704 expect(BEGIN_END_TEST_CASES[i], BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]); 3705 } 3706 3707 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing 3708 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17], 3709 Transliterator.REVERSE); 3710 expect(reversed, "xy XY XYZ yz YZ", "xy abc xaba yz aba"); 3711 3712 // finally, run through the list of syntactically-ill-formed rule sets above and make sure 3713 // that all of them cause errors 3714 /* 3715(commented out until we have the real ::BEGIN/::END stuff in place 3716 for (int i = 0; i < BOGUS_BEGIN_END_RULES.length; i++) { 3717 try { 3718 Transliterator t = Transliterator.createFromRules("foo", BOGUS_BEGIN_END_RULES[i], 3719 Transliterator.FORWARD); 3720 errln("Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]); 3721 } 3722 catch (IllegalArgumentException e) { 3723 // this is supposed to happen; do nothing here 3724 } 3725 } 3726 */ 3727 } 3728 3729 public void TestBeginEndToRules() { 3730 // run through the same list of test cases we used above, but this time, instead of just 3731 // instantiating a Transliterator from the rules and running the test against it, we instantiate 3732 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from 3733 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent 3734 // to (i.e., does the same thing as) the original rule set 3735 for (int i = 0; i < BEGIN_END_TEST_CASES.length; i += 3) { 3736 Transliterator t = Transliterator.createFromRules("--", BEGIN_END_TEST_CASES[i], 3737 Transliterator.FORWARD); 3738 String rules = t.toRules(false); 3739 Transliterator t2 = Transliterator.createFromRules("Test case #" + (i / 3), rules, Transliterator.FORWARD); 3740 expect(t2, BEGIN_END_TEST_CASES[i + 1], BEGIN_END_TEST_CASES[i + 2]); 3741 } 3742 3743 // do the same thing for the reversible test case 3744 Transliterator reversed = Transliterator.createFromRules("Reversed", BEGIN_END_RULES[17], 3745 Transliterator.REVERSE); 3746 String rules = reversed.toRules(false); 3747 Transliterator reversed2 = Transliterator.createFromRules("Reversed", rules, Transliterator.FORWARD); 3748 expect(reversed2, "xy XY XYZ yz YZ", "xy abc xaba yz aba"); 3749 } 3750 3751 public void TestRegisterAlias() { 3752 String longID = "Lower;[aeiou]Upper"; 3753 String shortID = "Any-CapVowels"; 3754 String reallyShortID = "CapVowels"; 3755 3756 Transliterator.registerAlias(shortID, longID); 3757 3758 Transliterator t1 = Transliterator.getInstance(longID); 3759 Transliterator t2 = Transliterator.getInstance(reallyShortID); 3760 3761 if (!t1.getID().equals(longID)) 3762 errln("Transliterator instantiated with long ID doesn't have long ID"); 3763 if (!t2.getID().equals(reallyShortID)) 3764 errln("Transliterator instantiated with short ID doesn't have short ID"); 3765 3766 if (!t1.toRules(true).equals(t2.toRules(true))) 3767 errln("Alias transliterators aren't the same"); 3768 3769 Transliterator.unregister(shortID); 3770 3771 try { 3772 t1 = Transliterator.getInstance(shortID); 3773 errln("Instantiation with short ID succeeded after short ID was unregistered"); 3774 } 3775 catch (IllegalArgumentException e) { 3776 } 3777 3778 // try the same thing again, but this time with something other than 3779 // an instance of CompoundTransliterator 3780 String realID = "Latin-Greek"; 3781 String fakeID = "Latin-dlgkjdflkjdl"; 3782 Transliterator.registerAlias(fakeID, realID); 3783 3784 t1 = Transliterator.getInstance(realID); 3785 t2 = Transliterator.getInstance(fakeID); 3786 3787 if (!t1.toRules(true).equals(t2.toRules(true))) 3788 errln("Alias transliterators aren't the same"); 3789 3790 Transliterator.unregister(fakeID); 3791 } 3792 3793 /** 3794 * Test the Halfwidth-Fullwidth transliterator (ticket 6281). 3795 */ 3796 public void TestHalfwidthFullwidth() { 3797 Transliterator hf = Transliterator.getInstance("Halfwidth-Fullwidth"); 3798 Transliterator fh = Transliterator.getInstance("Fullwidth-Halfwidth"); 3799 3800 // Array of 3n items 3801 // Each item is 3802 // "hf"|"fh"|"both", 3803 // <Halfwidth>, 3804 // <Fullwidth> 3805 String[] DATA = { 3806 "both", 3807 "\uFFE9\uFFEA\uFFEB\uFFEC\u0061\uFF71\u00AF\u0020", 3808 "\u2190\u2191\u2192\u2193\uFF41\u30A2\uFFE3\u3000", 3809 }; 3810 3811 for (int i=0; i<DATA.length; i+=3) { 3812 switch (DATA[i].charAt(0)) { 3813 case 'h': // Halfwidth-Fullwidth only 3814 expect(hf, DATA[i+1], DATA[i+2]); 3815 break; 3816 case 'f': // Fullwidth-Halfwidth only 3817 expect(fh, DATA[i+2], DATA[i+1]); 3818 break; 3819 case 'b': // both directions 3820 expect(hf, DATA[i+1], DATA[i+2]); 3821 expect(fh, DATA[i+2], DATA[i+1]); 3822 break; 3823 } 3824 } 3825 3826 } 3827 3828 /** 3829 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site. 3830 * TODO: confirm that the expected results are correct. 3831 * For now, test just confirms that C++ and Java give identical results. 3832 */ 3833 public void TestThai() { 3834 Transliterator tr = Transliterator.getInstance("Any-Latin", Transliterator.FORWARD); 3835 String thaiText = 3836 "\u0e42\u0e14\u0e22\u0e1e\u0e37\u0e49\u0e19\u0e10\u0e32\u0e19\u0e41\u0e25\u0e49\u0e27, \u0e04\u0e2d" + 3837 "\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d\u0e23\u0e4c\u0e08\u0e30\u0e40\u0e01\u0e35\u0e48\u0e22" + 3838 "\u0e27\u0e02\u0e49\u0e2d\u0e07\u0e01\u0e31\u0e1a\u0e40\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e02\u0e2d" + 3839 "\u0e07\u0e15\u0e31\u0e27\u0e40\u0e25\u0e02. \u0e04\u0e2d\u0e21\u0e1e\u0e34\u0e27\u0e40\u0e15\u0e2d" + 3840 "\u0e23\u0e4c\u0e08\u0e31\u0e14\u0e40\u0e01\u0e47\u0e1a\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29" + 3841 "\u0e23\u0e41\u0e25\u0e30\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30\u0e2d\u0e37\u0e48\u0e19\u0e46 \u0e42" + 3842 "\u0e14\u0e22\u0e01\u0e32\u0e23\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25" + 3843 "\u0e02\u0e43\u0e2b\u0e49\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e41\u0e15\u0e48\u0e25\u0e30\u0e15" + 3844 "\u0e31\u0e27. \u0e01\u0e48\u0e2d\u0e19\u0e2b\u0e19\u0e49\u0e32\u0e17\u0e35\u0e48\u0e4a Unicode \u0e08" + 3845 "\u0e30\u0e16\u0e39\u0e01\u0e2a\u0e23\u0e49\u0e32\u0e07\u0e02\u0e36\u0e49\u0e19, \u0e44\u0e14\u0e49" + 3846 "\u0e21\u0e35\u0e23\u0e30\u0e1a\u0e1a encoding \u0e2d\u0e22\u0e39\u0e48\u0e2b\u0e25\u0e32\u0e22\u0e23" + 3847 "\u0e49\u0e2d\u0e22\u0e23\u0e30\u0e1a\u0e1a\u0e2a\u0e33\u0e2b\u0e23\u0e31\u0e1a\u0e01\u0e32\u0e23" + 3848 "\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e2b\u0e21\u0e32\u0e22\u0e40\u0e25\u0e02\u0e40\u0e2b\u0e25\u0e48" + 3849 "\u0e32\u0e19\u0e35\u0e49. \u0e44\u0e21\u0e48\u0e21\u0e35 encoding \u0e43\u0e14\u0e17\u0e35\u0e48" + 3850 "\u0e21\u0e35\u0e08\u0e33\u0e19\u0e27\u0e19\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e02\u0e23\u0e30" + 3851 "\u0e21\u0e32\u0e01\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d: \u0e22\u0e01\u0e15\u0e31\u0e27\u0e2d" + 3852 "\u0e22\u0e48\u0e32\u0e07\u0e40\u0e0a\u0e48\u0e19, \u0e40\u0e09\u0e1e\u0e32\u0e30\u0e43\u0e19\u0e01" + 3853 "\u0e25\u0e38\u0e48\u0e21\u0e2a\u0e2b\u0e20\u0e32\u0e1e\u0e22\u0e38\u0e42\u0e23\u0e1b\u0e40\u0e1e" + 3854 "\u0e35\u0e22\u0e07\u0e41\u0e2b\u0e48\u0e07\u0e40\u0e14\u0e35\u0e22\u0e27 \u0e01\u0e47\u0e15\u0e49" + 3855 "\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e2b\u0e25\u0e32\u0e22 encoding \u0e43\u0e19\u0e01\u0e32\u0e23\u0e04" + 3856 "\u0e23\u0e2d\u0e1a\u0e04\u0e25\u0e38\u0e21\u0e17\u0e38\u0e01\u0e20\u0e32\u0e29\u0e32\u0e43\u0e19" + 3857 "\u0e01\u0e25\u0e38\u0e48\u0e21. \u0e2b\u0e23\u0e37\u0e2d\u0e41\u0e21\u0e49\u0e41\u0e15\u0e48\u0e43" + 3858 "\u0e19\u0e20\u0e32\u0e29\u0e32\u0e40\u0e14\u0e35\u0e48\u0e22\u0e27 \u0e40\u0e0a\u0e48\u0e19 \u0e20" + 3859 "\u0e32\u0e29\u0e32\u0e2d\u0e31\u0e07\u0e01\u0e24\u0e29 \u0e01\u0e47\u0e44\u0e21\u0e48\u0e21\u0e35" + 3860 " encoding \u0e43\u0e14\u0e17\u0e35\u0e48\u0e40\u0e1e\u0e35\u0e22\u0e07\u0e1e\u0e2d\u0e2a\u0e33\u0e2b" + 3861 "\u0e23\u0e31\u0e1a\u0e17\u0e38\u0e01\u0e15\u0e31\u0e27\u0e2d\u0e31\u0e01\u0e29\u0e23, \u0e40\u0e04" + 3862 "\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e2b\u0e21\u0e32\u0e22\u0e27\u0e23\u0e23\u0e04\u0e15\u0e2d\u0e19" + 3863 " \u0e41\u0e25\u0e30\u0e2a\u0e31\u0e0d\u0e25\u0e31\u0e01\u0e29\u0e13\u0e4c\u0e17\u0e32\u0e07\u0e40" + 3864 "\u0e17\u0e04\u0e19\u0e34\u0e04\u0e17\u0e35\u0e48\u0e43\u0e0a\u0e49\u0e01\u0e31\u0e19\u0e2d\u0e22" + 3865 "\u0e39\u0e48\u0e17\u0e31\u0e48\u0e27\u0e44\u0e1b."; 3866 3867 String latinText = 3868 "doy ph\u1ee5\u0304\u0302n \u1e6d\u0304h\u0101n l\u00e6\u0302w, khxmphiwtexr\u0312 ca ke\u012b\u0300" + 3869 "ywk\u0304\u0125xng k\u1ea1b re\u1ee5\u0304\u0300xng k\u0304hxng t\u1ea1wlek\u0304h. khxmphiwtexr" + 3870 "\u0312 c\u1ea1d k\u0115b t\u1ea1w x\u1ea1ks\u0304\u02b9r l\u00e6a x\u1ea1kk\u0304h ra x\u1ee5\u0304" + 3871 "\u0300n\u00ab doy k\u0101r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304\u0131\u0302 s\u0304" + 3872 "\u1ea3h\u0304r\u1ea1b t\u00e6\u0300la t\u1ea1w. k\u0300xn h\u0304n\u0302\u0101 th\u012b\u0300\u0301" + 3873 " Unicode ca t\u0304h\u016bk s\u0304r\u0302\u0101ng k\u0304h\u1ee5\u0302n, d\u1ecb\u0302 m\u012b " + 3874 "rabb encoding xy\u016b\u0300 h\u0304l\u0101y r\u0302xy rabb s\u0304\u1ea3h\u0304r\u1ea1b k\u0101" + 3875 "r k\u1ea3h\u0304nd h\u0304m\u0101ylek\u0304h h\u0304el\u0300\u0101 n\u012b\u0302. m\u1ecb\u0300m" + 3876 "\u012b encoding d\u0131 th\u012b\u0300 m\u012b c\u1ea3nwn t\u1ea1w x\u1ea1kk\u0304hra m\u0101k p" + 3877 "he\u012byng phx: yk t\u1ea1wx\u1ef3\u0101ng ch\u00e8n, c\u0304heph\u0101a n\u0131 kl\u00f9m s\u0304" + 3878 "h\u0304p\u0323h\u0101ph yurop phe\u012byng h\u0304\u00e6\u0300ng de\u012byw k\u0306 t\u0302xngk\u0101" + 3879 "r h\u0304l\u0101y encoding n\u0131 k\u0101r khrxbkhlum thuk p\u0323h\u0101s\u0304\u02b9\u0101 n\u0131" + 3880 " kl\u00f9m. h\u0304r\u1ee5\u0304x m\u00e6\u0302t\u00e6\u0300 n\u0131 p\u0323h\u0101s\u0304\u02b9" + 3881 "\u0101 de\u012b\u0300yw ch\u00e8n p\u0323h\u0101s\u0304\u02b9\u0101 x\u1ea1ngkvs\u0304\u02b9 k\u0306" + 3882 " m\u1ecb\u0300m\u012b encoding d\u0131 th\u012b\u0300 phe\u012byng phx s\u0304\u1ea3h\u0304r\u1ea1" + 3883 "b thuk t\u1ea1w x\u1ea1ks\u0304\u02b9r, kher\u1ee5\u0304\u0300xngh\u0304m\u0101y wrrkh txn l\u00e6" + 3884 "a s\u0304\u1ea1\u1ef5l\u1ea1ks\u0304\u02b9\u1e47\u0312 th\u0101ng thekhnikh th\u012b\u0300 ch\u0131" + 3885 "\u0302 k\u1ea1n xy\u016b\u0300 th\u1ea1\u0300wp\u1ecb."; 3886 3887 expect(tr, thaiText, latinText); 3888 } 3889 3890 3891 //====================================================================== 3892 // These tests are not mirrored (yet) in icu4c at 3893 // source/test/intltest/transtst.cpp 3894 //====================================================================== 3895 3896 /** 3897 * Improve code coverage. 3898 */ 3899 public void TestCoverage() { 3900 // NullTransliterator 3901 Transliterator t = Transliterator.getInstance("Null", Transliterator.FORWARD); 3902 expect(t, "a", "a"); 3903 3904 // Source, target set 3905 t = Transliterator.getInstance("Latin-Greek", Transliterator.FORWARD); 3906 t.setFilter(new UnicodeSet("[A-Z]")); 3907 logln("source = " + t.getSourceSet()); 3908 logln("target = " + t.getTargetSet()); 3909 3910 t = Transliterator.createFromRules("x", "(.) > &Any-Hex($1);", Transliterator.FORWARD); 3911 logln("source = " + t.getSourceSet()); 3912 logln("target = " + t.getTargetSet()); 3913 } 3914 /* 3915 * Test case for threading problem in NormalizationTransliterator 3916 * reported by ticket#5160 3917 */ 3918 public void TestT5160() { 3919 final String[] testData = { 3920 "a", 3921 "b", 3922 "\u09BE", 3923 "A\u0301", 3924 }; 3925 final String[] expected = { 3926 "a", 3927 "b", 3928 "\u09BE", 3929 "\u00C1", 3930 }; 3931 Transliterator translit = Transliterator.getInstance("NFC"); 3932 NormTranslitTask[] tasks = new NormTranslitTask[testData.length]; 3933 for (int i = 0; i < tasks.length; i++) { 3934 tasks[i] = new NormTranslitTask(translit, testData[i], expected[i]); 3935 } 3936 TestUtil.runUntilDone(tasks); 3937 3938 for (int i = 0; i < tasks.length; i++) { 3939 if (tasks[i].getErrorMessage() != null) { 3940 System.out.println("Fail: thread#" + i + " " + tasks[i].getErrorMessage()); 3941 break; 3942 } 3943 } 3944 } 3945 3946 static class NormTranslitTask implements Runnable { 3947 Transliterator translit; 3948 String testData; 3949 String expectedData; 3950 String errorMsg; 3951 3952 NormTranslitTask(Transliterator translit, String testData, String expectedData) { 3953 this.translit = translit; 3954 this.testData = testData; 3955 this.expectedData = expectedData; 3956 } 3957 3958 public void run() { 3959 errorMsg = null; 3960 StringBuffer inBuf = new StringBuffer(testData); 3961 StringBuffer expectedBuf = new StringBuffer(expectedData); 3962 3963 for(int i = 0; i < 1000; i++) { 3964 String in = inBuf.toString(); 3965 String out = translit.transliterate(in); 3966 String expected = expectedBuf.toString(); 3967 if (!out.equals(expected)) { 3968 errorMsg = "in {" + in + "} / out {" + out + "} / expected {" + expected + "}"; 3969 break; 3970 } 3971 inBuf.append(testData); 3972 expectedBuf.append(expectedData); 3973 } 3974 } 3975 3976 public String getErrorMessage() { 3977 return errorMsg; 3978 } 3979 } 3980 3981 //====================================================================== 3982 // Support methods 3983 //====================================================================== 3984 void expect(String rules, 3985 String source, 3986 String expectedResult, 3987 Transliterator.Position pos) { 3988 Transliterator t = Transliterator.createFromRules("<ID>", rules, Transliterator.FORWARD); 3989 expect(t, source, expectedResult, pos); 3990 } 3991 3992 void expect(String rules, String source, String expectedResult) { 3993 expect(rules, source, expectedResult, null); 3994 } 3995 3996 void expect(Transliterator t, String source, String expectedResult, 3997 Transliterator reverseTransliterator) { 3998 expect(t, source, expectedResult); 3999 if (reverseTransliterator != null) { 4000 expect(reverseTransliterator, expectedResult, source); 4001 } 4002 } 4003 4004 void expect(Transliterator t, String source, String expectedResult) { 4005 expect(t, source, expectedResult, (Transliterator.Position) null); 4006 } 4007 4008 void expect(Transliterator t, String source, String expectedResult, 4009 Transliterator.Position pos) { 4010 if (pos == null) { 4011 String result = t.transliterate(source); 4012 if (!expectAux(t.getID() + ":String", source, result, expectedResult)) return; 4013 } 4014 4015 Transliterator.Position index = null; 4016 if (pos == null) { 4017 index = new Transliterator.Position(0, source.length(), 0, source.length()); 4018 } else { 4019 index = new Transliterator.Position(pos.contextStart, pos.contextLimit, 4020 pos.start, pos.limit); 4021 } 4022 4023 ReplaceableString rsource = new ReplaceableString(source); 4024 4025 t.finishTransliteration(rsource, index); 4026 // Do it all at once -- below we do it incrementally 4027 4028 if (index.start != index.limit) { 4029 expectAux(t.getID() + ":UNFINISHED", source, 4030 "start: " + index.start + ", limit: " + index.limit, false, expectedResult); 4031 return; 4032 } 4033 String result = rsource.toString(); 4034 if (!expectAux(t.getID() + ":Replaceable", source, result, expectedResult)) return; 4035 4036 4037 if (pos == null) { 4038 index = new Transliterator.Position(); 4039 } else { 4040 index = new Transliterator.Position(pos.contextStart, pos.contextLimit, 4041 pos.start, pos.limit); 4042 } 4043 4044 // Test incremental transliteration -- this result 4045 // must be the same after we finalize (see below). 4046 List<String> v = new ArrayList<String>(); 4047 v.add(source); 4048 rsource.replace(0, rsource.length(), ""); 4049 if (pos != null) { 4050 rsource.replace(0, 0, source); 4051 v.add(UtilityExtensions.formatInput(rsource, index)); 4052 t.transliterate(rsource, index); 4053 v.add(UtilityExtensions.formatInput(rsource, index)); 4054 } else { 4055 for (int i=0; i<source.length(); ++i) { 4056 //v.add(i == 0 ? "" : " + " + source.charAt(i) + ""); 4057 //log.append(source.charAt(i)).append(" -> ")); 4058 t.transliterate(rsource, index, source.charAt(i)); 4059 //v.add(UtilityExtensions.formatInput(rsource, index) + source.substring(i+1)); 4060 v.add(UtilityExtensions.formatInput(rsource, index) + 4061 ((i<source.length()-1)?(" + '" + source.charAt(i+1) + "' ->"):" =>")); 4062 } 4063 } 4064 4065 // As a final step in keyboard transliteration, we must call 4066 // transliterate to finish off any pending partial matches that 4067 // were waiting for more input. 4068 t.finishTransliteration(rsource, index); 4069 result = rsource.toString(); 4070 //log.append(" => ").append(rsource.toString()); 4071 v.add(result); 4072 4073 String[] results = new String[v.size()]; 4074 v.toArray(results); 4075 expectAux(t.getID() + ":Incremental", results, 4076 result.equals(expectedResult), 4077 expectedResult); 4078 } 4079 4080 boolean expectAux(String tag, String source, 4081 String result, String expectedResult) { 4082 return expectAux(tag, new String[] {source, result}, 4083 result.equals(expectedResult), 4084 expectedResult); 4085 } 4086 4087 boolean expectAux(String tag, String source, 4088 String result, boolean pass, 4089 String expectedResult) { 4090 return expectAux(tag, new String[] {source, result}, 4091 pass, 4092 expectedResult); 4093 } 4094 4095 boolean expectAux(String tag, String source, 4096 boolean pass, 4097 String expectedResult) { 4098 return expectAux(tag, new String[] {source}, 4099 pass, 4100 expectedResult); 4101 } 4102 4103 boolean expectAux(String tag, String[] results, boolean pass, 4104 String expectedResult) { 4105 msg((pass?"(":"FAIL: (")+tag+")", pass ? LOG : ERR, true, true); 4106 4107 for (int i = 0; i < results.length; ++i) { 4108 String label; 4109 if (i == 0) { 4110 label = "source: "; 4111 } else if (i == results.length - 1) { 4112 label = "result: "; 4113 } else { 4114 if (!isVerbose() && pass) continue; 4115 label = "interm" + i + ": "; 4116 } 4117 msg(" " + label + results[i], pass ? LOG : ERR, false, true); 4118 } 4119 4120 if (!pass) { 4121 msg( " expected: " + expectedResult, ERR, false, true); 4122 } 4123 4124 return pass; 4125 } 4126 4127 private void assertTransform(String message, String expected, StringTransform t, String source) { 4128 assertEquals(message + " " + source, expected, t.transform(source)); 4129 } 4130 4131 4132 private void assertTransform(String message, String expected, StringTransform t, StringTransform back, String source, String source2) { 4133 assertEquals(message + " " +source, expected, t.transform(source)); 4134 assertEquals(message + " " +source2, expected, t.transform(source2)); 4135 assertEquals(message + " " + expected, source, back.transform(expected)); 4136 } 4137 4138 /* 4139 * Tests the method public Enumeration<String> getAvailableTargets(String source) 4140 */ 4141 public void TestGetAvailableTargets() { 4142 try { 4143 // Tests when if (targets == null) is true 4144 Transliterator.getAvailableTargets(""); 4145 } catch (Exception e) { 4146 errln("TransliteratorRegistry.getAvailableTargets(String) was not " + "supposed to return an exception."); 4147 } 4148 } 4149 4150 /* 4151 * Tests the method public Enumeration<String> getAvailableVariants(String source, String target) 4152 */ 4153 public void TestGetAvailableVariants() { 4154 try { 4155 // Tests when if (targets == null) is true 4156 Transliterator.getAvailableVariants("", ""); 4157 } catch (Exception e) { 4158 errln("TransliteratorRegistry.getAvailableVariants(String) was not " + "supposed to return an exception."); 4159 } 4160 } 4161 4162 /* 4163 * Tests the mehtod String nextLine() in RuleBody 4164 */ 4165 public void TestNextLine() { 4166 // Tests when "if (s != null && s.length() > 0 && s.charAt(s.length() - 1) == '\\') is true 4167 try{ 4168 Transliterator.createFromRules("gif", "\\", Transliterator.FORWARD); 4169 } catch(Exception e){ 4170 errln("TransliteratorParser.nextLine() was not suppose to return an " + 4171 "exception for a rule of '\\'"); 4172 } 4173 } 4174} 4175