1/* 2********************************************************************** 3* Copyright (C) 1999-2013, International Business Machines 4* Corporation and others. All Rights Reserved. 5********************************************************************** 6* Date Name Description 7* 11/10/99 aliu Creation. 8********************************************************************** 9*/ 10 11#include "unicode/utypes.h" 12 13#if !UCONFIG_NO_TRANSLITERATION 14 15#include "transtst.h" 16#include "unicode/locid.h" 17#include "unicode/dtfmtsym.h" 18#include "unicode/normlzr.h" 19#include "unicode/translit.h" 20#include "unicode/uchar.h" 21#include "unicode/unifilt.h" 22#include "unicode/uniset.h" 23#include "unicode/ustring.h" 24#include "unicode/usetiter.h" 25#include "unicode/uscript.h" 26#include "unicode/utf16.h" 27#include "cpdtrans.h" 28#include "nultrans.h" 29#include "rbt.h" 30#include "rbt_pars.h" 31#include "anytrans.h" 32#include "esctrn.h" 33#include "name2uni.h" 34#include "nortrans.h" 35#include "remtrans.h" 36#include "titletrn.h" 37#include "tolowtrn.h" 38#include "toupptrn.h" 39#include "unesctrn.h" 40#include "uni2name.h" 41#include "cstring.h" 42#include "cmemory.h" 43#include <stdio.h> 44 45/*********************************************************************** 46 47 HOW TO USE THIS TEST FILE 48 -or- 49 How I developed on two platforms 50 without losing (too much of) my mind 51 52 531. Add new tests by copying/pasting/changing existing tests. On Java, 54 any public void method named Test...() taking no parameters becomes 55 a test. On C++, you need to modify the header and add a line to 56 the runIndexedTest() dispatch method. 57 582. Make liberal use of the expect() method; it is your friend. 59 603. The tests in this file exactly match those in a sister file on the 61 other side. The two files are: 62 63 icu4j: src/com/ibm/test/translit/TransliteratorTest.java 64 icu4c: source/test/intltest/transtst.cpp 65 66 ==> THIS IS THE IMPORTANT PART <== 67 68 When you add a test in this file, add it in TransliteratorTest.java 69 too. Give it the same name and put it in the same relative place. 70 This makes maintenance a lot simpler for any poor soul who ends up 71 trying to synchronize the tests between icu4j and icu4c. 72 734. If you MUST enter a test that is NOT paralleled in the sister file, 74 then add it in the special non-mirrored section. These are 75 labeled 76 77 "icu4j ONLY" 78 79 or 80 81 "icu4c ONLY" 82 83 Make sure you document the reason the test is here and not there. 84 85 86Thank you. 87The Management 88***********************************************************************/ 89 90// Define character constants thusly to be EBCDIC-friendly 91enum { 92 LEFT_BRACE=((UChar)0x007B), /*{*/ 93 PIPE =((UChar)0x007C), /*|*/ 94 ZERO =((UChar)0x0030), /*0*/ 95 UPPER_A =((UChar)0x0041) /*A*/ 96}; 97 98TransliteratorTest::TransliteratorTest() 99: DESERET_DEE((UChar32)0x10414), 100 DESERET_dee((UChar32)0x1043C) 101{ 102} 103 104TransliteratorTest::~TransliteratorTest() {} 105 106void 107TransliteratorTest::runIndexedTest(int32_t index, UBool exec, 108 const char* &name, char* /*par*/) { 109 switch (index) { 110 TESTCASE(0,TestInstantiation); 111 TESTCASE(1,TestSimpleRules); 112 TESTCASE(2,TestRuleBasedInverse); 113 TESTCASE(3,TestKeyboard); 114 TESTCASE(4,TestKeyboard2); 115 TESTCASE(5,TestKeyboard3); 116 TESTCASE(6,TestArabic); 117 TESTCASE(7,TestCompoundKana); 118 TESTCASE(8,TestCompoundHex); 119 TESTCASE(9,TestFiltering); 120 TESTCASE(10,TestInlineSet); 121 TESTCASE(11,TestPatternQuoting); 122 TESTCASE(12,TestJ277); 123 TESTCASE(13,TestJ243); 124 TESTCASE(14,TestJ329); 125 TESTCASE(15,TestSegments); 126 TESTCASE(16,TestCursorOffset); 127 TESTCASE(17,TestArbitraryVariableValues); 128 TESTCASE(18,TestPositionHandling); 129 TESTCASE(19,TestHiraganaKatakana); 130 TESTCASE(20,TestCopyJ476); 131 TESTCASE(21,TestAnchors); 132 TESTCASE(22,TestInterIndic); 133 TESTCASE(23,TestFilterIDs); 134 TESTCASE(24,TestCaseMap); 135 TESTCASE(25,TestNameMap); 136 TESTCASE(26,TestLiberalizedID); 137 TESTCASE(27,TestCreateInstance); 138 TESTCASE(28,TestNormalizationTransliterator); 139 TESTCASE(29,TestCompoundRBT); 140 TESTCASE(30,TestCompoundFilter); 141 TESTCASE(31,TestRemove); 142 TESTCASE(32,TestToRules); 143 TESTCASE(33,TestContext); 144 TESTCASE(34,TestSupplemental); 145 TESTCASE(35,TestQuantifier); 146 TESTCASE(36,TestSTV); 147 TESTCASE(37,TestCompoundInverse); 148 TESTCASE(38,TestNFDChainRBT); 149 TESTCASE(39,TestNullInverse); 150 TESTCASE(40,TestAliasInverseID); 151 TESTCASE(41,TestCompoundInverseID); 152 TESTCASE(42,TestUndefinedVariable); 153 TESTCASE(43,TestEmptyContext); 154 TESTCASE(44,TestCompoundFilterID); 155 TESTCASE(45,TestPropertySet); 156 TESTCASE(46,TestNewEngine); 157 TESTCASE(47,TestQuantifiedSegment); 158 TESTCASE(48,TestDevanagariLatinRT); 159 TESTCASE(49,TestTeluguLatinRT); 160 TESTCASE(50,TestCompoundLatinRT); 161 TESTCASE(51,TestSanskritLatinRT); 162 TESTCASE(52,TestLocaleInstantiation); 163 TESTCASE(53,TestTitleAccents); 164 TESTCASE(54,TestLocaleResource); 165 TESTCASE(55,TestParseError); 166 TESTCASE(56,TestOutputSet); 167 TESTCASE(57,TestVariableRange); 168 TESTCASE(58,TestInvalidPostContext); 169 TESTCASE(59,TestIDForms); 170 TESTCASE(60,TestToRulesMark); 171 TESTCASE(61,TestEscape); 172 TESTCASE(62,TestAnchorMasking); 173 TESTCASE(63,TestDisplayName); 174 TESTCASE(64,TestSpecialCases); 175#if !UCONFIG_NO_FILE_IO 176 TESTCASE(65,TestIncrementalProgress); 177#endif 178 TESTCASE(66,TestSurrogateCasing); 179 TESTCASE(67,TestFunction); 180 TESTCASE(68,TestInvalidBackRef); 181 TESTCASE(69,TestMulticharStringSet); 182 TESTCASE(70,TestUserFunction); 183 TESTCASE(71,TestAnyX); 184 TESTCASE(72,TestSourceTargetSet); 185 TESTCASE(73,TestGurmukhiDevanagari); 186 TESTCASE(74,TestPatternWhiteSpace); 187 TESTCASE(75,TestAllCodepoints); 188 TESTCASE(76,TestBoilerplate); 189 TESTCASE(77,TestAlternateSyntax); 190 TESTCASE(78,TestBeginEnd); 191 TESTCASE(79,TestBeginEndToRules); 192 TESTCASE(80,TestRegisterAlias); 193 TESTCASE(81,TestRuleStripping); 194 TESTCASE(82,TestHalfwidthFullwidth); 195 TESTCASE(83,TestThai); 196 TESTCASE(84,TestAny); 197 default: name = ""; break; 198 } 199} 200 201static const UVersionInfo ICU_39 = {3,9,4,0}; 202/** 203 * Make sure every system transliterator can be instantiated. 204 * 205 * ALSO test that the result of toRules() for each rule is a valid 206 * rule. Do this here so we don't have to have another test that 207 * instantiates everything as well. 208 */ 209void TransliteratorTest::TestInstantiation() { 210 UErrorCode ec = U_ZERO_ERROR; 211 StringEnumeration* avail = Transliterator::getAvailableIDs(ec); 212 assertSuccess("getAvailableIDs()", ec); 213 assertTrue("getAvailableIDs()!=NULL", avail!=NULL); 214 int32_t n = Transliterator::countAvailableIDs(); 215 assertTrue("getAvailableIDs().count()==countAvailableIDs()", 216 avail->count(ec) == n); 217 assertSuccess("count()", ec); 218 UnicodeString name; 219 for (int32_t i=0; i<n; ++i) { 220 const UnicodeString& id = *avail->snext(ec); 221 if (!assertSuccess("snext()", ec) || 222 !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) { 223 break; 224 } 225 UnicodeString id2 = Transliterator::getAvailableID(i); 226 if (id.length() < 1) { 227 errln(UnicodeString("FAIL: getAvailableID(") + 228 i + ") returned empty string"); 229 continue; 230 } 231 if (id != id2) { 232 errln(UnicodeString("FAIL: getAvailableID(") + 233 i + ") != getAvailableIDs().snext()"); 234 continue; 235 } 236 UParseError parseError; 237 UErrorCode status = U_ZERO_ERROR; 238 Transliterator* t = Transliterator::createInstance(id, 239 UTRANS_FORWARD, parseError,status); 240 name.truncate(0); 241 Transliterator::getDisplayName(id, name); 242 if (t == 0) { 243#if UCONFIG_NO_BREAK_ITERATION 244 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail. 245 if (id.compare((UnicodeString)"Thai-Latin") != 0) 246#endif 247 dataerrln(UnicodeString("FAIL: Couldn't create ") + id + 248 /*", parse error " + parseError.code +*/ 249 ", line " + parseError.line + 250 ", offset " + parseError.offset + 251 ", pre-context " + prettify(parseError.preContext, TRUE) + 252 ", post-context " +prettify(parseError.postContext,TRUE) + 253 ", Error: " + u_errorName(status)); 254 // When createInstance fails, it deletes the failing 255 // entry from the available ID list. We detect this 256 // here by looking for a change in countAvailableIDs. 257 int32_t nn = Transliterator::countAvailableIDs(); 258 if (nn == (n - 1)) { 259 n = nn; 260 --i; // Compensate for deleted entry 261 } 262 } else { 263 logln(UnicodeString("OK: ") + name + " (" + id + ")"); 264 265 // Now test toRules 266 UnicodeString rules; 267 t->toRules(rules, TRUE); 268 Transliterator *u = Transliterator::createFromRules("x", 269 rules, UTRANS_FORWARD, parseError,status); 270 if (u == 0) { 271 errln(UnicodeString("FAIL: ") + id + 272 ".createFromRules() => bad rules" + 273 /*", parse error " + parseError.code +*/ 274 ", line " + parseError.line + 275 ", offset " + parseError.offset + 276 ", context " + prettify(parseError.preContext, TRUE) + 277 ", rules: " + prettify(rules, TRUE)); 278 } else { 279 delete u; 280 } 281 delete t; 282 } 283 } 284 assertTrue("snext()==NULL", avail->snext(ec)==NULL); 285 assertSuccess("snext()", ec); 286 delete avail; 287 288 // Now test the failure path 289 UParseError parseError; 290 UErrorCode status = U_ZERO_ERROR; 291 UnicodeString id("<Not a valid Transliterator ID>"); 292 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status); 293 if (t != 0) { 294 errln("FAIL: " + id + " returned a transliterator"); 295 delete t; 296 } else { 297 logln("OK: Bogus ID handled properly"); 298 } 299} 300 301void TransliteratorTest::TestSimpleRules(void) { 302 /* Example: rules 1. ab>x|y 303 * 2. yc>z 304 * 305 * []|eabcd start - no match, copy e to tranlated buffer 306 * [e]|abcd match rule 1 - copy output & adjust cursor 307 * [ex|y]cd match rule 2 - copy output & adjust cursor 308 * [exz]|d no match, copy d to transliterated buffer 309 * [exzd]| done 310 */ 311 expect(UnicodeString("ab>x|y;", "") + 312 "yc>z", 313 "eabcd", "exzd"); 314 315 /* Another set of rules: 316 * 1. ab>x|yzacw 317 * 2. za>q 318 * 3. qc>r 319 * 4. cw>n 320 * 321 * []|ab Rule 1 322 * [x|yzacw] No match 323 * [xy|zacw] Rule 2 324 * [xyq|cw] Rule 4 325 * [xyqn]| Done 326 */ 327 expect(UnicodeString("ab>x|yzacw;") + 328 "za>q;" + 329 "qc>r;" + 330 "cw>n", 331 "ab", "xyqn"); 332 333 /* Test categories 334 */ 335 UErrorCode status = U_ZERO_ERROR; 336 UParseError parseError; 337 Transliterator *t = Transliterator::createFromRules( 338 "<ID>", 339 UnicodeString("$dummy=").append((UChar)0xE100) + 340 UnicodeString(";" 341 "$vowel=[aeiouAEIOU];" 342 "$lu=[:Lu:];" 343 "$vowel } $lu > '!';" 344 "$vowel > '&';" 345 "'!' { $lu > '^';" 346 "$lu > '*';" 347 "a > ERROR", ""), 348 UTRANS_FORWARD, parseError, 349 status); 350 if (U_FAILURE(status)) { 351 dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status)); 352 return; 353 } 354 expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&"); 355 delete t; 356} 357 358/** 359 * Test inline set syntax and set variable syntax. 360 */ 361void TransliteratorTest::TestInlineSet(void) { 362 expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz"); 363 expect("a[0-9]b > qrs", "1a7b9", "1qrs9"); 364 365 expect(UnicodeString( 366 "$digit = [0-9];" 367 "$alpha = [a-zA-Z];" 368 "$alphanumeric = [$digit $alpha];" // *** 369 "$special = [^$alphanumeric];" // *** 370 "$alphanumeric > '-';" 371 "$special > '*';", ""), 372 373 "thx-1138", "---*----"); 374} 375 376/** 377 * Create some inverses and confirm that they work. We have to be 378 * careful how we do this, since the inverses will not be true 379 * inverses -- we can't throw any random string at the composition 380 * of the transliterators and expect the identity function. F x 381 * F' != I. However, if we are careful about the input, we will 382 * get the expected results. 383 */ 384void TransliteratorTest::TestRuleBasedInverse(void) { 385 UnicodeString RULES = 386 UnicodeString("abc>zyx;") + 387 "ab>yz;" + 388 "bc>zx;" + 389 "ca>xy;" + 390 "a>x;" + 391 "b>y;" + 392 "c>z;" + 393 394 "abc<zyx;" + 395 "ab<yz;" + 396 "bc<zx;" + 397 "ca<xy;" + 398 "a<x;" + 399 "b<y;" + 400 "c<z;" + 401 402 ""; 403 404 const char* DATA[] = { 405 // Careful here -- random strings will not work. If we keep 406 // the left side to the domain and the right side to the range 407 // we will be okay though (left, abc; right xyz). 408 "a", "x", 409 "abcacab", "zyxxxyy", 410 "caccb", "xyzzy", 411 }; 412 413 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); 414 415 UErrorCode status = U_ZERO_ERROR; 416 UParseError parseError; 417 Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES, 418 UTRANS_FORWARD, parseError, status); 419 Transliterator *rev = Transliterator::createFromRules("<ID>", RULES, 420 UTRANS_REVERSE, parseError, status); 421 if (U_FAILURE(status)) { 422 errln("FAIL: RBT constructor failed"); 423 return; 424 } 425 for (int32_t i=0; i<DATA_length; i+=2) { 426 expect(*fwd, DATA[i], DATA[i+1]); 427 expect(*rev, DATA[i+1], DATA[i]); 428 } 429 delete fwd; 430 delete rev; 431} 432 433/** 434 * Basic test of keyboard. 435 */ 436void TransliteratorTest::TestKeyboard(void) { 437 UParseError parseError; 438 UErrorCode status = U_ZERO_ERROR; 439 Transliterator *t = Transliterator::createFromRules("<ID>", 440 UnicodeString("psch>Y;") 441 +"ps>y;" 442 +"ch>x;" 443 +"a>A;", 444 UTRANS_FORWARD, parseError, 445 status); 446 if (U_FAILURE(status)) { 447 errln("FAIL: RBT constructor failed"); 448 return; 449 } 450 const char* DATA[] = { 451 // insertion, buffer 452 "a", "A", 453 "p", "Ap", 454 "s", "Aps", 455 "c", "Apsc", 456 "a", "AycA", 457 "psch", "AycAY", 458 0, "AycAY", // null means finishKeyboardTransliteration 459 }; 460 461 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); 462 delete t; 463} 464 465/** 466 * Basic test of keyboard with cursor. 467 */ 468void TransliteratorTest::TestKeyboard2(void) { 469 UParseError parseError; 470 UErrorCode status = U_ZERO_ERROR; 471 Transliterator *t = Transliterator::createFromRules("<ID>", 472 UnicodeString("ych>Y;") 473 +"ps>|y;" 474 +"ch>x;" 475 +"a>A;", 476 UTRANS_FORWARD, parseError, 477 status); 478 if (U_FAILURE(status)) { 479 errln("FAIL: RBT constructor failed"); 480 return; 481 } 482 const char* DATA[] = { 483 // insertion, buffer 484 "a", "A", 485 "p", "Ap", 486 "s", "Aps", // modified for rollback - "Ay", 487 "c", "Apsc", // modified for rollback - "Ayc", 488 "a", "AycA", 489 "p", "AycAp", 490 "s", "AycAps", // modified for rollback - "AycAy", 491 "c", "AycApsc", // modified for rollback - "AycAyc", 492 "h", "AycAY", 493 0, "AycAY", // null means finishKeyboardTransliteration 494 }; 495 496 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); 497 delete t; 498} 499 500/** 501 * Test keyboard transliteration with back-replacement. 502 */ 503void TransliteratorTest::TestKeyboard3(void) { 504 // We want th>z but t>y. Furthermore, during keyboard 505 // transliteration we want t>y then yh>z if t, then h are 506 // typed. 507 UnicodeString RULES("t>|y;" 508 "yh>z;"); 509 510 const char* DATA[] = { 511 // Column 1: characters to add to buffer (as if typed) 512 // Column 2: expected appearance of buffer after 513 // keyboard xliteration. 514 "a", "a", 515 "b", "ab", 516 "t", "abt", // modified for rollback - "aby", 517 "c", "abyc", 518 "t", "abyct", // modified for rollback - "abycy", 519 "h", "abycz", 520 0, "abycz", // null means finishKeyboardTransliteration 521 }; 522 523 UParseError parseError; 524 UErrorCode status = U_ZERO_ERROR; 525 Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status); 526 if (U_FAILURE(status)) { 527 errln("FAIL: RBT constructor failed"); 528 return; 529 } 530 keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); 531 delete t; 532} 533 534void TransliteratorTest::keyboardAux(const Transliterator& t, 535 const char* DATA[], int32_t DATA_length) { 536 UErrorCode status = U_ZERO_ERROR; 537 UTransPosition index={0, 0, 0, 0}; 538 UnicodeString s; 539 for (int32_t i=0; i<DATA_length; i+=2) { 540 UnicodeString log; 541 if (DATA[i] != 0) { 542 log = s + " + " 543 + DATA[i] 544 + " -> "; 545 t.transliterate(s, index, DATA[i], status); 546 } else { 547 log = s + " => "; 548 t.finishTransliteration(s, index); 549 } 550 // Show the start index '{' and the cursor '|' 551 UnicodeString a, b, c; 552 s.extractBetween(0, index.contextStart, a); 553 s.extractBetween(index.contextStart, index.start, b); 554 s.extractBetween(index.start, s.length(), c); 555 log.append(a). 556 append((UChar)LEFT_BRACE). 557 append(b). 558 append((UChar)PIPE). 559 append(c); 560 if (s == DATA[i+1] && U_SUCCESS(status)) { 561 logln(log); 562 } else { 563 errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]); 564 } 565 } 566} 567 568void TransliteratorTest::TestArabic(void) { 569// Test disabled for 2.0 until new Arabic transliterator can be written. 570// /* 571// const char* DATA[] = { 572// "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+ 573// "\u0627\u0644\u0644\u063a\u0629\u0020"+ 574// "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+ 575// "\u0628\u0628\u0646\u0638\u0645\u0020"+ 576// "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+ 577// "\u062c\u0645\u064a\u0644\u0629", 578// }; 579// */ 580// 581// UChar ar_raw[] = { 582// 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627, 583// 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644, 584// 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020, 585// 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643, 586// 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020, 587// 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0 588// }; 589// UnicodeString ar(ar_raw); 590// UErrorCode status=U_ZERO_ERROR; 591// UParseError parseError; 592// Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status); 593// if (t == 0) { 594// errln("FAIL: createInstance failed"); 595// return; 596// } 597// expect(*t, "Arabic", ar); 598// delete t; 599} 600 601/** 602 * Compose the Kana transliterator forward and reverse and try 603 * some strings that should come out unchanged. 604 */ 605void TransliteratorTest::TestCompoundKana(void) { 606 UParseError parseError; 607 UErrorCode status = U_ZERO_ERROR; 608 Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status); 609 if (t == 0) { 610 dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status)); 611 } else { 612 expect(*t, "aaaaa", "aaaaa"); 613 delete t; 614 } 615} 616 617/** 618 * Compose the hex transliterators forward and reverse. 619 */ 620void TransliteratorTest::TestCompoundHex(void) { 621 UParseError parseError; 622 UErrorCode status = U_ZERO_ERROR; 623 Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status); 624 Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status); 625 Transliterator* transab[] = { a, b }; 626 Transliterator* transba[] = { b, a }; 627 if (a == 0 || b == 0) { 628 errln("FAIL: construction failed"); 629 delete a; 630 delete b; 631 return; 632 } 633 // Do some basic tests of a 634 expect(*a, "01", UnicodeString("\\u0030\\u0031", "")); 635 // Do some basic tests of b 636 expect(*b, UnicodeString("\\u0030\\u0031", ""), "01"); 637 638 Transliterator* ab = new CompoundTransliterator(transab, 2); 639 UnicodeString s("abcde", ""); 640 expect(*ab, s, s); 641 642 UnicodeString str(s); 643 a->transliterate(str); 644 Transliterator* ba = new CompoundTransliterator(transba, 2); 645 expect(*ba, str, str); 646 647 delete ab; 648 delete ba; 649 delete a; 650 delete b; 651} 652 653int gTestFilterClassID = 0; 654/** 655 * Used by TestFiltering(). 656 */ 657class TestFilter : public UnicodeFilter { 658 virtual UnicodeFunctor* clone() const { 659 return new TestFilter(*this); 660 } 661 virtual UBool contains(UChar32 c) const { 662 return c != (UChar)0x0063 /*c*/; 663 } 664 // Stubs 665 virtual UnicodeString& toPattern(UnicodeString& result, 666 UBool /*escapeUnprintable*/) const { 667 return result; 668 } 669 virtual UBool matchesIndexValue(uint8_t /*v*/) const { 670 return FALSE; 671 } 672 virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {} 673public: 674 UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; } 675}; 676 677/** 678 * Do some basic tests of filtering. 679 */ 680void TransliteratorTest::TestFiltering(void) { 681 UParseError parseError; 682 UErrorCode status = U_ZERO_ERROR; 683 Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status); 684 if (hex == 0) { 685 errln("FAIL: createInstance(Any-Hex) failed"); 686 return; 687 } 688 hex->adoptFilter(new TestFilter()); 689 UnicodeString s("abcde"); 690 hex->transliterate(s); 691 UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", ""); 692 if (s == exp) { 693 logln(UnicodeString("Ok: \"") + exp + "\""); 694 } else { 695 logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\""); 696 } 697 698 // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J. 699 UnicodeFilter *f = hex->orphanFilter(); 700 if (f == NULL){ 701 errln("FAIL: orphanFilter() should get a UnicodeFilter"); 702 } else { 703 delete f; 704 } 705 delete hex; 706} 707 708/** 709 * Test anchors 710 */ 711void TransliteratorTest::TestAnchors(void) { 712 expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""), 713 "aaa", 714 "012"); 715 expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""), 716 "aaa", 717 "012"); 718 expect(UnicodeString("^ab > 01 ;" 719 " ab > |8 ;" 720 " b > k ;" 721 " 8x$ > 45 ;" 722 " 8x > 77 ;", ""), 723 724 "ababbabxabx", 725 "018k7745"); 726 expect(UnicodeString("$s = [z$] ;" 727 "$s{ab > 01 ;" 728 " ab > |8 ;" 729 " b > k ;" 730 " 8x}$s > 45 ;" 731 " 8x > 77 ;", ""), 732 733 "abzababbabxzabxabx", 734 "01z018k45z01x45"); 735} 736 737/** 738 * Test pattern quoting and escape mechanisms. 739 */ 740void TransliteratorTest::TestPatternQuoting(void) { 741 // Array of 3n items 742 // Each item is <rules>, <input>, <expected output> 743 const UnicodeString DATA[] = { 744 UnicodeString(UChar(0x4E01)) + ">'[male adult]'", 745 UnicodeString(UChar(0x4E01)), 746 "[male adult]" 747 }; 748 749 for (int32_t i=0; i<3; i+=3) { 750 logln(UnicodeString("Pattern: ") + prettify(DATA[i])); 751 UParseError parseError; 752 UErrorCode status = U_ZERO_ERROR; 753 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status); 754 if (U_FAILURE(status)) { 755 errln("RBT constructor failed"); 756 } else { 757 expect(*t, DATA[i+1], DATA[i+2]); 758 } 759 delete t; 760 } 761} 762 763/** 764 * Regression test for bugs found in Greek transliteration. 765 */ 766void TransliteratorTest::TestJ277(void) { 767 UErrorCode status = U_ZERO_ERROR; 768 UParseError parseError; 769 Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status); 770 if (gl == NULL) { 771 dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status)); 772 return; 773 } 774 775 UChar sigma = 0x3C3; 776 UChar upsilon = 0x3C5; 777 UChar nu = 0x3BD; 778// UChar PHI = 0x3A6; 779 UChar alpha = 0x3B1; 780// UChar omega = 0x3C9; 781// UChar omicron = 0x3BF; 782// UChar epsilon = 0x3B5; 783 784 // sigma upsilon nu -> syn 785 UnicodeString syn; 786 syn.append(sigma).append(upsilon).append(nu); 787 expect(*gl, syn, "syn"); 788 789 // sigma alpha upsilon nu -> saun 790 UnicodeString sayn; 791 sayn.append(sigma).append(alpha).append(upsilon).append(nu); 792 expect(*gl, sayn, "saun"); 793 794 // Again, using a smaller rule set 795 UnicodeString rules( 796 "$alpha = \\u03B1;" 797 "$nu = \\u03BD;" 798 "$sigma = \\u03C3;" 799 "$ypsilon = \\u03C5;" 800 "$vowel = [aeiouAEIOU$alpha$ypsilon];" 801 "s <> $sigma;" 802 "a <> $alpha;" 803 "u <> $vowel { $ypsilon;" 804 "y <> $ypsilon;" 805 "n <> $nu;", 806 ""); 807 Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status); 808 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; } 809 expect(*mini, syn, "syn"); 810 expect(*mini, sayn, "saun"); 811 delete mini; 812 mini = NULL; 813 814#if !UCONFIG_NO_FORMATTING 815 // Transliterate the Greek locale data 816 Locale el("el"); 817 DateFormatSymbols syms(el, status); 818 if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; } 819 int32_t i, count; 820 const UnicodeString* data = syms.getMonths(count); 821 for (i=0; i<count; ++i) { 822 if (data[i].length() == 0) { 823 continue; 824 } 825 UnicodeString out(data[i]); 826 gl->transliterate(out); 827 UBool ok = TRUE; 828 if (data[i].length() >= 2 && out.length() >= 2 && 829 u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) { 830 if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) { 831 ok = FALSE; 832 } 833 } 834 if (ok) { 835 logln(prettify(data[i] + " -> " + out)); 836 } else { 837 errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out)); 838 } 839 } 840#endif 841 842 delete gl; 843} 844 845/** 846 * Prefix, suffix support in hex transliterators 847 */ 848void TransliteratorTest::TestJ243(void) { 849 UErrorCode ec = U_ZERO_ERROR; 850 851 // Test default Hex-Any, which should handle 852 // \u, \U, u+, and U+ 853 Transliterator *hex = 854 Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec); 855 if (assertSuccess("getInstance", ec)) { 856 expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz"); 857 } 858 delete hex; 859 860// // Try a custom Hex-Unicode 861// // \uXXXX and &#xXXXX; 862// ec = U_ZERO_ERROR; 863// HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec); 864// expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""), 865// "abcd5fx0123"); 866// // Try custom Any-Hex (default is tested elsewhere) 867// ec = U_ZERO_ERROR; 868// UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec); 869// expect(hex3, "012", "012"); 870} 871 872/** 873 * Parsers need better syntax error messages. 874 */ 875void TransliteratorTest::TestJ329(void) { 876 877 struct { UBool containsErrors; const char* rule; } DATA[] = { 878 { FALSE, "a > b; c > d" }, 879 { TRUE, "a > b; no operator; c > d" }, 880 }; 881 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); 882 883 for (int32_t i=0; i<DATA_length; ++i) { 884 UErrorCode status = U_ZERO_ERROR; 885 UParseError parseError; 886 Transliterator *rbt = Transliterator::createFromRules("<ID>", 887 DATA[i].rule, 888 UTRANS_FORWARD, 889 parseError, 890 status); 891 UBool gotError = U_FAILURE(status); 892 UnicodeString desc(DATA[i].rule); 893 desc.append(gotError ? " -> error" : " -> no error"); 894 if (gotError) { 895 desc = desc + ", ParseError code=" + u_errorName(status) + 896 " line=" + parseError.line + 897 " offset=" + parseError.offset + 898 " context=" + parseError.preContext; 899 } 900 if (gotError == DATA[i].containsErrors) { 901 logln(UnicodeString("Ok: ") + desc); 902 } else { 903 errln(UnicodeString("FAIL: ") + desc); 904 } 905 delete rbt; 906 } 907} 908 909/** 910 * Test segments and segment references. 911 */ 912void TransliteratorTest::TestSegments(void) { 913 // Array of 3n items 914 // Each item is <rules>, <input>, <expected output> 915 UnicodeString DATA[] = { 916 "([a-z]) '.' ([0-9]) > $2 '-' $1", 917 "abc.123.xyz.456", 918 "ab1-c23.xy4-z56", 919 920 // nested 921 "(([a-z])([0-9])) > $1 '.' $2 '.' $3;", 922 "a1 b2", 923 "a1.a.1 b2.b.2", 924 }; 925 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA)); 926 927 for (int32_t i=0; i<DATA_length; i+=3) { 928 logln("Pattern: " + prettify(DATA[i])); 929 UParseError parseError; 930 UErrorCode status = U_ZERO_ERROR; 931 Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status); 932 if (U_FAILURE(status)) { 933 errln("FAIL: RBT constructor"); 934 } else { 935 expect(*t, DATA[i+1], DATA[i+2]); 936 } 937 delete t; 938 } 939} 940 941/** 942 * Test cursor positioning outside of the key 943 */ 944void TransliteratorTest::TestCursorOffset(void) { 945 // Array of 3n items 946 // Each item is <rules>, <input>, <expected output> 947 UnicodeString DATA[] = { 948 "pre {alpha} post > | @ ALPHA ;" 949 "eALPHA > beta ;" 950 "pre {beta} post > BETA @@ | ;" 951 "post > xyz", 952 953 "prealphapost prebetapost", 954 955 "prbetaxyz preBETApost", 956 }; 957 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA)); 958 959 for (int32_t i=0; i<DATA_length; i+=3) { 960 logln("Pattern: " + prettify(DATA[i])); 961 UParseError parseError; 962 UErrorCode status = U_ZERO_ERROR; 963 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status); 964 if (U_FAILURE(status)) { 965 errln("FAIL: RBT constructor"); 966 } else { 967 expect(*t, DATA[i+1], DATA[i+2]); 968 } 969 delete t; 970 } 971} 972 973/** 974 * Test zero length and > 1 char length variable values. Test 975 * use of variable refs in UnicodeSets. 976 */ 977void TransliteratorTest::TestArbitraryVariableValues(void) { 978 // Array of 3n items 979 // Each item is <rules>, <input>, <expected output> 980 UnicodeString DATA[] = { 981 "$abe = ab;" 982 "$pat = x[yY]z;" 983 "$ll = 'a-z';" 984 "$llZ = [$ll];" 985 "$llY = [$ll$pat];" 986 "$emp = ;" 987 988 "$abe > ABE;" 989 "$pat > END;" 990 "$llZ > 1;" 991 "$llY > 2;" 992 "7$emp 8 > 9;" 993 "", 994 995 "ab xYzxyz stY78", 996 "ABE ENDEND 1129", 997 }; 998 int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA)); 999 1000 for (int32_t i=0; i<DATA_length; i+=3) { 1001 logln("Pattern: " + prettify(DATA[i])); 1002 UParseError parseError; 1003 UErrorCode status = U_ZERO_ERROR; 1004 Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status); 1005 if (U_FAILURE(status)) { 1006 errln("FAIL: RBT constructor"); 1007 } else { 1008 expect(*t, DATA[i+1], DATA[i+2]); 1009 } 1010 delete t; 1011 } 1012} 1013 1014/** 1015 * Confirm that the contextStart, contextLimit, start, and limit 1016 * behave correctly. J474. 1017 */ 1018void TransliteratorTest::TestPositionHandling(void) { 1019 // Array of 3n items 1020 // Each item is <rules>, <input>, <expected output> 1021 const char* DATA[] = { 1022 "a{t} > SS ; {t}b > UU ; {t} > TT ;", 1023 "xtat txtb", // pos 0,9,0,9 1024 "xTTaSS TTxUUb", 1025 1026 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;", 1027 "xtat txtb", // pos 2,9,3,8 1028 "xtaSS TTxUUb", 1029 1030 "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;", 1031 "xtat txtb", // pos 3,8,3,8 1032 "xtaTT TTxTTb", 1033 }; 1034 1035 // Array of 4n positions -- these go with the DATA array 1036 // They are: contextStart, contextLimit, start, limit 1037 int32_t POS[] = { 1038 0, 9, 0, 9, 1039 2, 9, 3, 8, 1040 3, 8, 3, 8, 1041 }; 1042 1043 int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3; 1044 for (int32_t i=0; i<n; i++) { 1045 UErrorCode status = U_ZERO_ERROR; 1046 UParseError parseError; 1047 Transliterator *t = Transliterator::createFromRules("<ID>", 1048 DATA[3*i], UTRANS_FORWARD, parseError, status); 1049 if (U_FAILURE(status)) { 1050 delete t; 1051 errln("FAIL: RBT constructor"); 1052 return; 1053 } 1054 UTransPosition pos; 1055 pos.contextStart= POS[4*i]; 1056 pos.contextLimit = POS[4*i+1]; 1057 pos.start = POS[4*i+2]; 1058 pos.limit = POS[4*i+3]; 1059 UnicodeString rsource(DATA[3*i+1]); 1060 t->transliterate(rsource, pos, status); 1061 if (U_FAILURE(status)) { 1062 delete t; 1063 errln("FAIL: transliterate"); 1064 return; 1065 } 1066 t->finishTransliteration(rsource, pos); 1067 expectAux(DATA[3*i], 1068 DATA[3*i+1], 1069 rsource, 1070 DATA[3*i+2]); 1071 delete t; 1072 } 1073} 1074 1075/** 1076 * Test the Hiragana-Katakana transliterator. 1077 */ 1078void TransliteratorTest::TestHiraganaKatakana(void) { 1079 UParseError parseError; 1080 UErrorCode status = U_ZERO_ERROR; 1081 Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status); 1082 Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status); 1083 if (hk == 0 || kh == 0) { 1084 dataerrln("FAIL: createInstance failed - %s", u_errorName(status)); 1085 delete hk; 1086 delete kh; 1087 return; 1088 } 1089 1090 // Array of 3n items 1091 // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana> 1092 const char* DATA[] = { 1093 "both", 1094 "\\u3042\\u3090\\u3099\\u3092\\u3050", 1095 "\\u30A2\\u30F8\\u30F2\\u30B0", 1096 1097 "kh", 1098 "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC", 1099 "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC", 1100 }; 1101 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); 1102 1103 for (int32_t i=0; i<DATA_length; i+=3) { 1104 UnicodeString h = CharsToUnicodeString(DATA[i+1]); 1105 UnicodeString k = CharsToUnicodeString(DATA[i+2]); 1106 switch (*DATA[i]) { 1107 case 0x68: //'h': // Hiragana-Katakana 1108 expect(*hk, h, k); 1109 break; 1110 case 0x6B: //'k': // Katakana-Hiragana 1111 expect(*kh, k, h); 1112 break; 1113 case 0x62: //'b': // both 1114 expect(*hk, h, k); 1115 expect(*kh, k, h); 1116 break; 1117 } 1118 } 1119 delete hk; 1120 delete kh; 1121} 1122 1123/** 1124 * Test cloning / copy constructor of RBT. 1125 */ 1126void TransliteratorTest::TestCopyJ476(void) { 1127 // The real test here is what happens when the destructors are 1128 // called. So we let one object get destructed, and check to 1129 // see that its copy still works. 1130 Transliterator *t2 = 0; 1131 { 1132 UParseError parseError; 1133 UErrorCode status = U_ZERO_ERROR; 1134 Transliterator *t1 = Transliterator::createFromRules("t1", 1135 "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status); 1136 if (U_FAILURE(status)) { 1137 errln("FAIL: RBT constructor"); 1138 return; 1139 } 1140 t2 = t1->clone(); // Call copy constructor under the covers. 1141 expect(*t1, "abcfoofoo", "ABcbar"); 1142 delete t1; 1143 } 1144 expect(*t2, "abcfoofoo", "ABcbar"); 1145 delete t2; 1146} 1147 1148/** 1149 * Test inter-Indic transliterators. These are composed. 1150 * ICU4C Jitterbug 483. 1151 */ 1152void TransliteratorTest::TestInterIndic(void) { 1153 UnicodeString ID("Devanagari-Gujarati", ""); 1154 UErrorCode status = U_ZERO_ERROR; 1155 UParseError parseError; 1156 Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status); 1157 if (dg == 0) { 1158 dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status)); 1159 return; 1160 } 1161 UnicodeString id = dg->getID(); 1162 if (id != ID) { 1163 errln("FAIL: createInstance(" + ID + ")->getID() => " + id); 1164 } 1165 UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925"); 1166 UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5"); 1167 expect(*dg, dev, guj); 1168 delete dg; 1169} 1170 1171/** 1172 * Test filter syntax in IDs. (J918) 1173 */ 1174void TransliteratorTest::TestFilterIDs(void) { 1175 // Array of 3n strings: 1176 // <id>, <inverse id>, <input>, <expected output> 1177 const char* DATA[] = { 1178 "[aeiou]Any-Hex", // ID 1179 "[aeiou]Hex-Any", // expected inverse ID 1180 "quizzical", // src 1181 "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src) 1182 1183 "[aeiou]Any-Hex;[^5]Hex-Any", 1184 "[^5]Any-Hex;[aeiou]Hex-Any", 1185 "quizzical", 1186 "q\\u0075izzical", 1187 1188 "[abc]Null", 1189 "[abc]Null", 1190 "xyz", 1191 "xyz", 1192 }; 1193 enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) }; 1194 1195 for (int i=0; i<DATA_length; i+=4) { 1196 UnicodeString ID(DATA[i], ""); 1197 UnicodeString uID(DATA[i+1], ""); 1198 UnicodeString data2(DATA[i+2], ""); 1199 UnicodeString data3(DATA[i+3], ""); 1200 UParseError parseError; 1201 UErrorCode status = U_ZERO_ERROR; 1202 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status); 1203 if (t == 0) { 1204 errln("FAIL: createInstance(" + ID + ") returned NULL"); 1205 return; 1206 } 1207 expect(*t, data2, data3); 1208 1209 // Check the ID 1210 if (ID != t->getID()) { 1211 errln("FAIL: createInstance(" + ID + ").getID() => " + 1212 t->getID()); 1213 } 1214 1215 // Check the inverse 1216 Transliterator *u = t->createInverse(status); 1217 if (u == 0) { 1218 errln("FAIL: " + ID + ".createInverse() returned NULL"); 1219 } else if (u->getID() != uID) { 1220 errln("FAIL: " + ID + ".createInverse().getID() => " + 1221 u->getID() + ", expected " + uID); 1222 } 1223 1224 delete t; 1225 delete u; 1226 } 1227} 1228 1229/** 1230 * Test the case mapping transliterators. 1231 */ 1232void TransliteratorTest::TestCaseMap(void) { 1233 UParseError parseError; 1234 UErrorCode status = U_ZERO_ERROR; 1235 Transliterator* toUpper = 1236 Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status); 1237 Transliterator* toLower = 1238 Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status); 1239 Transliterator* toTitle = 1240 Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status); 1241 if (toUpper==0 || toLower==0 || toTitle==0) { 1242 errln("FAIL: createInstance returned NULL"); 1243 delete toUpper; 1244 delete toLower; 1245 delete toTitle; 1246 return; 1247 } 1248 1249 expect(*toUpper, "The quick brown fox jumped over the lazy dogs.", 1250 "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS."); 1251 expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.", 1252 "the quick brown foX jumped over the lazY dogs."); 1253 expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.", 1254 "The Quick Brown FoX Can't Jump Over The LaZy Dogs."); 1255 1256 delete toUpper; 1257 delete toLower; 1258 delete toTitle; 1259} 1260 1261/** 1262 * Test the name mapping transliterators. 1263 */ 1264void TransliteratorTest::TestNameMap(void) { 1265 UParseError parseError; 1266 UErrorCode status = U_ZERO_ERROR; 1267 Transliterator* uni2name = 1268 Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status); 1269 Transliterator* name2uni = 1270 Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status); 1271 if (uni2name==0 || name2uni==0) { 1272 errln("FAIL: createInstance returned NULL"); 1273 delete uni2name; 1274 delete name2uni; 1275 return; 1276 } 1277 1278 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N 1279 expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"), 1280 CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}")); 1281 expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"), 1282 CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{")); 1283 1284 delete uni2name; 1285 delete name2uni; 1286 1287 // round trip 1288 Transliterator* t = 1289 Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status); 1290 if (t==0) { 1291 errln("FAIL: createInstance returned NULL"); 1292 delete t; 1293 return; 1294 } 1295 1296 // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N 1297 UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"); 1298 expect(*t, s, s); 1299 delete t; 1300} 1301 1302/** 1303 * Test liberalized ID syntax. 1006c 1304 */ 1305void TransliteratorTest::TestLiberalizedID(void) { 1306 // Some test cases have an expected getID() value of NULL. This 1307 // means I have disabled the test case for now. This stuff is 1308 // still under development, and I haven't decided whether to make 1309 // getID() return canonical case yet. It will all get rewritten 1310 // with the move to Source-Target/Variant IDs anyway. [aliu] 1311 const char* DATA[] = { 1312 "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity", 1313 " Null ", "Null", "whitespace", 1314 " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter", 1315 " null ; latin-greek ", NULL /*"Null;Latin-Greek"*/, "compound whitespace", 1316 }; 1317 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]); 1318 UParseError parseError; 1319 UErrorCode status= U_ZERO_ERROR; 1320 for (int32_t i=0; i<DATA_length; i+=3) { 1321 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status); 1322 if (t == 0) { 1323 dataerrln(UnicodeString("FAIL: ") + DATA[i+2] + 1324 " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status)); 1325 } else { 1326 UnicodeString exp; 1327 if (DATA[i+1]) { 1328 exp = UnicodeString(DATA[i+1], ""); 1329 } 1330 // Don't worry about getID() if the expected char* 1331 // is NULL -- see above. 1332 if (exp.length() == 0 || exp == t->getID()) { 1333 logln(UnicodeString("Ok: ") + DATA[i+2] + 1334 " create ID \"" + DATA[i] + "\" => \"" + 1335 exp + "\""); 1336 } else { 1337 errln(UnicodeString("FAIL: ") + DATA[i+2] + 1338 " create ID \"" + DATA[i] + "\" => \"" + 1339 t->getID() + "\", exp \"" + exp + "\""); 1340 } 1341 delete t; 1342 } 1343 } 1344} 1345 1346/* test for Jitterbug 912 */ 1347void TransliteratorTest::TestCreateInstance(){ 1348 const char* FORWARD = "F"; 1349 const char* REVERSE = "R"; 1350 const char* DATA[] = { 1351 // Column 1: id 1352 // Column 2: direction 1353 // Column 3: expected ID, or "" if expect failure 1354 "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912 1355 1356 // JB#2689: bad compound causes crash 1357 "InvalidSource-InvalidTarget", FORWARD, "", 1358 "InvalidSource-InvalidTarget", REVERSE, "", 1359 "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "", 1360 "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "", 1361 "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "", 1362 "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "", 1363 1364 NULL 1365 }; 1366 1367 for (int32_t i=0; DATA[i]; i+=3) { 1368 UParseError err; 1369 UErrorCode ec = U_ZERO_ERROR; 1370 UnicodeString id(DATA[i]); 1371 UTransDirection dir = (DATA[i+1]==FORWARD)? 1372 UTRANS_FORWARD:UTRANS_REVERSE; 1373 UnicodeString expID(DATA[i+2]); 1374 Transliterator* t = 1375 Transliterator::createInstance(id,dir,err,ec); 1376 UnicodeString newID; 1377 if (t) { 1378 newID = t->getID(); 1379 } 1380 UBool ok = (newID == expID); 1381 if (!t) { 1382 newID = u_errorName(ec); 1383 } 1384 if (ok) { 1385 logln((UnicodeString)"Ok: createInstance(" + 1386 id + "," + DATA[i+1] + ") => " + newID); 1387 } else { 1388 dataerrln((UnicodeString)"FAIL: createInstance(" + 1389 id + "," + DATA[i+1] + ") => " + newID + 1390 ", expected " + expID); 1391 } 1392 delete t; 1393 } 1394} 1395 1396/** 1397 * Test the normalization transliterator. 1398 */ 1399void TransliteratorTest::TestNormalizationTransliterator() { 1400 // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest 1401 // PLEASE KEEP THEM IN SYNC WITH BasicTest. 1402 const char* CANON[] = { 1403 // Input Decomposed Composed 1404 "cat", "cat", "cat" , 1405 "\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" , 1406 1407 "\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above 1408 "D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above 1409 1410 "\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above 1411 "\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below 1412 "D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above 1413 1414 "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above 1415 "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below 1416 1417 "\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave 1418 "\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave 1419 "\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron 1420 1421 "\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign 1422 "\\u00c5", "A\\u030a", "\\u00c5" , // A-ring 1423 1424 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0 1425 "\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0 1426 1427 "Henry IV", "Henry IV", "Henry IV" , 1428 "Henry \\u2163", "Henry \\u2163", "Henry \\u2163" , 1429 1430 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana) 1431 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten 1432 "\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten 1433 "\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten 1434 "\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten 1435 1436 "A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" , 1437 0 // end 1438 }; 1439 1440 const char* COMPAT[] = { 1441 // Input Decomposed Composed 1442 "\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed 1443 1444 "\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0 1445 "\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i 1446 1447 "Henry IV", "Henry IV", "Henry IV" , 1448 "Henry \\u2163", "Henry IV", "Henry IV" , 1449 1450 "\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana) 1451 "\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten 1452 1453 "\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten 1454 0 // end 1455 }; 1456 1457 int32_t i; 1458 UParseError parseError; 1459 UErrorCode status = U_ZERO_ERROR; 1460 Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status); 1461 Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status); 1462 if (!NFD || !NFC) { 1463 dataerrln("FAIL: createInstance failed: %s", u_errorName(status)); 1464 delete NFD; 1465 delete NFC; 1466 return; 1467 } 1468 for (i=0; CANON[i]; i+=3) { 1469 UnicodeString in = CharsToUnicodeString(CANON[i]); 1470 UnicodeString expd = CharsToUnicodeString(CANON[i+1]); 1471 UnicodeString expc = CharsToUnicodeString(CANON[i+2]); 1472 expect(*NFD, in, expd); 1473 expect(*NFC, in, expc); 1474 } 1475 delete NFD; 1476 delete NFC; 1477 1478 Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status); 1479 Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status); 1480 if (!NFKD || !NFKC) { 1481 errln("FAIL: createInstance failed"); 1482 delete NFKD; 1483 delete NFKC; 1484 return; 1485 } 1486 for (i=0; COMPAT[i]; i+=3) { 1487 UnicodeString in = CharsToUnicodeString(COMPAT[i]); 1488 UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]); 1489 UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]); 1490 expect(*NFKD, in, expkd); 1491 expect(*NFKC, in, expkc); 1492 } 1493 delete NFKD; 1494 delete NFKC; 1495 1496 UParseError pe; 1497 status = U_ZERO_ERROR; 1498 Transliterator *t = Transliterator::createInstance("NFD; [x]Remove", 1499 UTRANS_FORWARD, 1500 pe, status); 1501 if (t == 0) { 1502 errln("FAIL: createInstance failed"); 1503 } 1504 expect(*t, CharsToUnicodeString("\\u010dx"), 1505 CharsToUnicodeString("c\\u030C")); 1506 delete t; 1507} 1508 1509/** 1510 * Test compound RBT rules. 1511 */ 1512void TransliteratorTest::TestCompoundRBT(void) { 1513 // Careful with spacing and ';' here: Phrase this exactly 1514 // as toRules() is going to return it. If toRules() changes 1515 // with regard to spacing or ';', then adjust this string. 1516 UnicodeString rule("::Hex-Any;\n" 1517 "::Any-Lower;\n" 1518 "a > '.A.';\n" 1519 "b > '.B.';\n" 1520 "::[^t]Any-Upper;", ""); 1521 UParseError parseError; 1522 UErrorCode status = U_ZERO_ERROR; 1523 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status); 1524 if (t == 0) { 1525 errln("FAIL: createFromRules failed"); 1526 return; 1527 } 1528 expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"), 1529 "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t"); 1530 UnicodeString r; 1531 t->toRules(r, TRUE); 1532 if (r == rule) { 1533 logln((UnicodeString)"OK: toRules() => " + r); 1534 } else { 1535 errln((UnicodeString)"FAIL: toRules() => " + r + 1536 ", expected " + rule); 1537 } 1538 delete t; 1539 1540 // Now test toRules 1541 t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status); 1542 if (t == 0) { 1543 dataerrln("FAIL: createInstance failed - %s", u_errorName(status)); 1544 return; 1545 } 1546 UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;"); 1547 t->toRules(r, TRUE); 1548 if (r != exp) { 1549 errln((UnicodeString)"FAIL: toRules() => " + r + 1550 ", expected " + exp); 1551 } else { 1552 logln((UnicodeString)"OK: toRules() => " + r); 1553 } 1554 delete t; 1555 1556 // Round trip the result of toRules 1557 t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status); 1558 if (t == 0) { 1559 errln("FAIL: createFromRules #2 failed"); 1560 return; 1561 } else { 1562 logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded"); 1563 } 1564 1565 // Test toRules again 1566 t->toRules(r, TRUE); 1567 if (r != exp) { 1568 errln((UnicodeString)"FAIL: toRules() => " + r + 1569 ", expected " + exp); 1570 } else { 1571 logln((UnicodeString)"OK: toRules() => " + r); 1572 } 1573 1574 delete t; 1575 1576 // Test Foo(Bar) IDs. Careful with spacing in id; make it conform 1577 // to what the regenerated ID will look like. 1578 UnicodeString id("Upper(Lower);(NFKC)", ""); 1579 t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status); 1580 if (t == 0) { 1581 errln("FAIL: createInstance #2 failed"); 1582 return; 1583 } 1584 if (t->getID() == id) { 1585 logln((UnicodeString)"OK: created " + id); 1586 } else { 1587 errln((UnicodeString)"FAIL: createInstance(" + id + 1588 ").getID() => " + t->getID()); 1589 } 1590 1591 Transliterator *u = t->createInverse(status); 1592 if (u == 0) { 1593 errln("FAIL: createInverse failed"); 1594 delete t; 1595 return; 1596 } 1597 exp = "NFKC();Lower(Upper)"; 1598 if (u->getID() == exp) { 1599 logln((UnicodeString)"OK: createInverse(" + id + ") => " + 1600 u->getID()); 1601 } else { 1602 errln((UnicodeString)"FAIL: createInverse(" + id + ") => " + 1603 u->getID()); 1604 } 1605 delete t; 1606 delete u; 1607} 1608 1609/** 1610 * Compound filter semantics were orginially not implemented 1611 * correctly. Originally, each component filter f(i) is replaced by 1612 * f'(i) = f(i) && g, where g is the filter for the compound 1613 * transliterator. 1614 * 1615 * From Mark: 1616 * 1617 * Suppose and I have a transliterator X. Internally X is 1618 * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A]. 1619 * 1620 * The compound should convert all greek characters (through latin) to 1621 * cyrillic, then lowercase the result. The filter should say "don't 1622 * touch 'A' in the original". But because an intermediate result 1623 * happens to go through "A", the Greek Alpha gets hung up. 1624 */ 1625void TransliteratorTest::TestCompoundFilter(void) { 1626 UParseError parseError; 1627 UErrorCode status = U_ZERO_ERROR; 1628 Transliterator *t = Transliterator::createInstance 1629 ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status); 1630 if (t == 0) { 1631 dataerrln("FAIL: createInstance failed - %s", u_errorName(status)); 1632 return; 1633 } 1634 t->adoptFilter(new UnicodeSet("[^A]", status)); 1635 if (U_FAILURE(status)) { 1636 errln("FAIL: UnicodeSet ct failed"); 1637 delete t; 1638 return; 1639 } 1640 1641 // Only the 'A' at index 1 should remain unchanged 1642 expect(*t, 1643 CharsToUnicodeString("BA\\u039A\\u0391"), 1644 CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1")); 1645 delete t; 1646} 1647 1648void TransliteratorTest::TestRemove(void) { 1649 UParseError parseError; 1650 UErrorCode status = U_ZERO_ERROR; 1651 Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status); 1652 if (t == 0) { 1653 errln("FAIL: createInstance failed"); 1654 return; 1655 } 1656 1657 expect(*t, "Able bodied baker's cats", "Ale odied ker's ts"); 1658 1659 // extra test for RemoveTransliterator::clone(), which at one point wasn't 1660 // duplicating the filter 1661 Transliterator* t2 = t->clone(); 1662 expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts"); 1663 1664 delete t; 1665 delete t2; 1666} 1667 1668void TransliteratorTest::TestToRules(void) { 1669 const char* RBT = "rbt"; 1670 const char* SET = "set"; 1671 static const char* DATA[] = { 1672 RBT, 1673 "$a=\\u4E61; [$a] > A;", 1674 "[\\u4E61] > A;", 1675 1676 RBT, 1677 "$white=[[:Zs:][:Zl:]]; $white{a} > A;", 1678 "[[:Zs:][:Zl:]]{a} > A;", 1679 1680 SET, 1681 "[[:Zs:][:Zl:]]", 1682 "[[:Zs:][:Zl:]]", 1683 1684 SET, 1685 "[:Ps:]", 1686 "[:Ps:]", 1687 1688 SET, 1689 "[:L:]", 1690 "[:L:]", 1691 1692 SET, 1693 "[[:L:]-[A]]", 1694 "[[:L:]-[A]]", 1695 1696 SET, 1697 "[~[:Lu:][:Ll:]]", 1698 "[~[:Lu:][:Ll:]]", 1699 1700 SET, 1701 "[~[a-z]]", 1702 "[~[a-z]]", 1703 1704 RBT, 1705 "$white=[:Zs:]; $black=[^$white]; $black{a} > A;", 1706 "[^[:Zs:]]{a} > A;", 1707 1708 RBT, 1709 "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;", 1710 "[[a-z]-[:Zs:]]{a} > A;", 1711 1712 RBT, 1713 "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;", 1714 "[[:Zs:]&[a-z]]{a} > A;", 1715 1716 RBT, 1717 "$a=[:Zs:]; $b=[x$a]; $b{a} > A;", 1718 "[x[:Zs:]]{a} > A;", 1719 1720 RBT, 1721 "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;" 1722 "$macron = \\u0304 ;" 1723 "$evowel = [aeiouyAEIOUY] ;" 1724 "$iotasub = \\u0345 ;" 1725 "($evowel $macron $accentMinus *) i > | $1 $iotasub ;", 1726 "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;", 1727 1728 RBT, 1729 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1730 "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 1731 }; 1732 static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); 1733 1734 for (int32_t d=0; d < DATA_length; d+=3) { 1735 if (DATA[d] == RBT) { 1736 // Transliterator test 1737 UParseError parseError; 1738 UErrorCode status = U_ZERO_ERROR; 1739 Transliterator *t = Transliterator::createFromRules("ID", 1740 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status); 1741 if (t == 0) { 1742 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status)); 1743 return; 1744 } 1745 UnicodeString rules, escapedRules; 1746 t->toRules(rules, FALSE); 1747 t->toRules(escapedRules, TRUE); 1748 UnicodeString expRules = CharsToUnicodeString(DATA[d+2]); 1749 UnicodeString expEscapedRules(DATA[d+2], -1, US_INV); 1750 if (rules == expRules) { 1751 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) + 1752 " => " + rules); 1753 } else { 1754 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) + 1755 " => " + rules + ", exp " + expRules); 1756 } 1757 if (escapedRules == expEscapedRules) { 1758 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) + 1759 " => " + escapedRules); 1760 } else { 1761 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) + 1762 " => " + escapedRules + ", exp " + expEscapedRules); 1763 } 1764 delete t; 1765 1766 } else { 1767 // UnicodeSet test 1768 UErrorCode status = U_ZERO_ERROR; 1769 UnicodeString pat(DATA[d+1], -1, US_INV); 1770 UnicodeString expToPat(DATA[d+2], -1, US_INV); 1771 UnicodeSet set(pat, status); 1772 if (U_FAILURE(status)) { 1773 errln("FAIL: UnicodeSet ct failed"); 1774 return; 1775 } 1776 // Adjust spacing etc. as necessary. 1777 UnicodeString toPat; 1778 set.toPattern(toPat); 1779 if (expToPat == toPat) { 1780 logln((UnicodeString)"Ok: " + pat + 1781 " => " + toPat); 1782 } else { 1783 errln((UnicodeString)"FAIL: " + pat + 1784 " => " + prettify(toPat, TRUE) + 1785 ", exp " + prettify(pat, TRUE)); 1786 } 1787 } 1788 } 1789} 1790 1791void TransliteratorTest::TestContext() { 1792 UTransPosition pos = {0, 2, 0, 1}; // cs cl s l 1793 expect("de > x; {d}e > y;", 1794 "de", 1795 "ye", 1796 &pos); 1797 1798 expect("ab{c} > z;", 1799 "xadabdabcy", 1800 "xadabdabzy"); 1801} 1802 1803void TransliteratorTest::TestSupplemental() { 1804 1805 expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" 1806 "a > $a; $s > i;"), 1807 CharsToUnicodeString("ab\\U0001030Fx"), 1808 CharsToUnicodeString("\\U00010300bix")); 1809 1810 expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" 1811 "$b=[A-Z\\U00010400-\\U0001044D];" 1812 "($a)($b) > $2 $1;"), 1813 CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"), 1814 CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301")); 1815 1816 // k|ax\\U00010300xm 1817 1818 // k|a\\U00010400\\U00010300xm 1819 // ky|\\U00010400\\U00010300xm 1820 // ky\\U00010400|\\U00010300xm 1821 1822 // ky\\U00010400|\\U00010300\\U00010400m 1823 // ky\\U00010400y|\\U00010400m 1824 expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" 1825 "$a {x} > | @ \\U00010400;" 1826 "{$a} [^\\u0000-\\uFFFF] > y;"), 1827 CharsToUnicodeString("kax\\U00010300xm"), 1828 CharsToUnicodeString("ky\\U00010400y\\U00010400m")); 1829 1830 expectT("Any-Name", 1831 CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"), 1832 UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}")); 1833 1834 expectT("Any-Hex/Unicode", 1835 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1836 UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0")); 1837 1838 expectT("Any-Hex/C", 1839 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1840 UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0")); 1841 1842 expectT("Any-Hex/Perl", 1843 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1844 UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}")); 1845 1846 expectT("Any-Hex/Java", 1847 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1848 UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0")); 1849 1850 expectT("Any-Hex/XML", 1851 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1852 "𐌰􏼀󠁡 "); 1853 1854 expectT("Any-Hex/XML10", 1855 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1856 "𐌰􏼀󠁡 "); 1857 1858 expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"), 1859 CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 1860 CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0")); 1861} 1862 1863void TransliteratorTest::TestQuantifier() { 1864 1865 // Make sure @ in a quantified anteContext works 1866 expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';", 1867 "AAAAAb", 1868 "aaa(aac)"); 1869 1870 // Make sure @ in a quantified postContext works 1871 expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';", 1872 "baaaaa", 1873 "caa(aaa)"); 1874 1875 // Make sure @ in a quantified postContext with seg ref works 1876 expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';", 1877 "baaaaa", 1878 "baa(aaa)"); 1879 1880 // Make sure @ past ante context doesn't enter ante context 1881 UTransPosition pos = {0, 5, 3, 5}; 1882 expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';", 1883 "xxxab", 1884 "xxx(ac)", 1885 &pos); 1886 1887 // Make sure @ past post context doesn't pass limit 1888 UTransPosition pos2 = {0, 4, 0, 2}; 1889 expect("{b} a+ > c @@ |; x > y; a > A;", 1890 "baxx", 1891 "caxx", 1892 &pos2); 1893 1894 // Make sure @ past post context doesn't enter post context 1895 expect("{b} a+ > c @@ |; x > y; a > A;", 1896 "baxx", 1897 "cayy"); 1898 1899 expect("(ab)? c > d;", 1900 "c abc ababc", 1901 "d d abd"); 1902 1903 // NOTE: The (ab)+ when referenced just yields a single "ab", 1904 // not the full sequence of them. This accords with perl behavior. 1905 expect("(ab)+ {x} > '(' $1 ')';", 1906 "x abx ababxy", 1907 "x ab(ab) abab(ab)y"); 1908 1909 expect("b+ > x;", 1910 "ac abc abbc abbbc", 1911 "ac axc axc axc"); 1912 1913 expect("[abc]+ > x;", 1914 "qac abrc abbcs abtbbc", 1915 "qx xrx xs xtx"); 1916 1917 expect("q{(ab)+} > x;", 1918 "qa qab qaba qababc qaba", 1919 "qa qx qxa qxc qxa"); 1920 1921 expect("q(ab)* > x;", 1922 "qa qab qaba qababc", 1923 "xa x xa xc"); 1924 1925 // NOTE: The (ab)+ when referenced just yields a single "ab", 1926 // not the full sequence of them. This accords with perl behavior. 1927 expect("q(ab)* > '(' $1 ')';", 1928 "qa qab qaba qababc", 1929 "()a (ab) (ab)a (ab)c"); 1930 1931 // 'foo'+ and 'foo'* -- the quantifier should apply to the entire 1932 // quoted string 1933 expect("'ab'+ > x;", 1934 "bb ab ababb", 1935 "bb x xb"); 1936 1937 // $foo+ and $foo* -- the quantifier should apply to the entire 1938 // variable reference 1939 expect("$var = ab; $var+ > x;", 1940 "bb ab ababb", 1941 "bb x xb"); 1942} 1943 1944class TestTrans : public Transliterator { 1945public: 1946 TestTrans(const UnicodeString& id) : Transliterator(id, 0) { 1947 } 1948 virtual Transliterator* clone(void) const { 1949 return new TestTrans(getID()); 1950 } 1951 virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets, 1952 UBool /*isIncremental*/) const 1953 { 1954 offsets.start = offsets.limit; 1955 } 1956 virtual UClassID getDynamicClassID() const; 1957 static UClassID U_EXPORT2 getStaticClassID(); 1958}; 1959UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans) 1960 1961/** 1962 * Test Source-Target/Variant. 1963 */ 1964void TransliteratorTest::TestSTV(void) { 1965 int32_t ns = Transliterator::countAvailableSources(); 1966 if (ns < 0 || ns > 255) { 1967 errln((UnicodeString)"FAIL: Bad source count: " + ns); 1968 return; 1969 } 1970 int32_t i, j; 1971 for (i=0; i<ns; ++i) { 1972 UnicodeString source; 1973 Transliterator::getAvailableSource(i, source); 1974 logln((UnicodeString)"" + i + ": " + source); 1975 if (source.length() == 0) { 1976 errln("FAIL: empty source"); 1977 continue; 1978 } 1979 int32_t nt = Transliterator::countAvailableTargets(source); 1980 if (nt < 0 || nt > 255) { 1981 errln((UnicodeString)"FAIL: Bad target count: " + nt); 1982 continue; 1983 } 1984 for (int32_t j=0; j<nt; ++j) { 1985 UnicodeString target; 1986 Transliterator::getAvailableTarget(j, source, target); 1987 logln((UnicodeString)" " + j + ": " + target); 1988 if (target.length() == 0) { 1989 errln("FAIL: empty target"); 1990 continue; 1991 } 1992 int32_t nv = Transliterator::countAvailableVariants(source, target); 1993 if (nv < 0 || nv > 255) { 1994 errln((UnicodeString)"FAIL: Bad variant count: " + nv); 1995 continue; 1996 } 1997 for (int32_t k=0; k<nv; ++k) { 1998 UnicodeString variant; 1999 Transliterator::getAvailableVariant(k, source, target, variant); 2000 if (variant.length() == 0) { 2001 logln((UnicodeString)" " + k + ": <empty>"); 2002 } else { 2003 logln((UnicodeString)" " + k + ": " + variant); 2004 } 2005 } 2006 } 2007 } 2008 2009 // Test registration 2010 const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 2011 const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 2012 const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" }; 2013 for (i=0; i<3; ++i) { 2014 Transliterator *t = new TestTrans(IDS[i]); 2015 if (t == 0) { 2016 errln("FAIL: out of memory"); 2017 return; 2018 } 2019 if (t->getID() != IDS[i]) { 2020 errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]); 2021 delete t; 2022 return; 2023 } 2024 Transliterator::registerInstance(t); 2025 UErrorCode status = U_ZERO_ERROR; 2026 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status); 2027 if (t == NULL) { 2028 errln((UnicodeString)"FAIL: Registration/creation failed for ID " + 2029 IDS[i]); 2030 } else { 2031 logln((UnicodeString)"Ok: Registration/creation succeeded for ID " + 2032 IDS[i]); 2033 delete t; 2034 } 2035 Transliterator::unregister(IDS[i]); 2036 t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status); 2037 if (t != NULL) { 2038 errln((UnicodeString)"FAIL: Unregistration failed for ID " + 2039 IDS[i]); 2040 delete t; 2041 } 2042 } 2043 2044 // Make sure getAvailable API reflects removal 2045 int32_t n = Transliterator::countAvailableIDs(); 2046 for (i=0; i<n; ++i) { 2047 UnicodeString id = Transliterator::getAvailableID(i); 2048 for (j=0; j<3; ++j) { 2049 if (id.caseCompare(FULL_IDS[j],0)==0) { 2050 errln((UnicodeString)"FAIL: unregister(" + id + ") failed"); 2051 } 2052 } 2053 } 2054 n = Transliterator::countAvailableTargets("Any"); 2055 for (i=0; i<n; ++i) { 2056 UnicodeString t; 2057 Transliterator::getAvailableTarget(i, "Any", t); 2058 if (t.caseCompare(IDS[0],0)==0) { 2059 errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed"); 2060 } 2061 } 2062 n = Transliterator::countAvailableSources(); 2063 for (i=0; i<n; ++i) { 2064 UnicodeString s; 2065 Transliterator::getAvailableSource(i, s); 2066 for (j=0; j<3; ++j) { 2067 if (SOURCES[j] == NULL) continue; 2068 if (s.caseCompare(SOURCES[j],0)==0) { 2069 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed"); 2070 } 2071 } 2072 } 2073} 2074 2075/** 2076 * Test inverse of Greek-Latin; Title() 2077 */ 2078void TransliteratorTest::TestCompoundInverse(void) { 2079 UParseError parseError; 2080 UErrorCode status = U_ZERO_ERROR; 2081 Transliterator *t = Transliterator::createInstance 2082 ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status); 2083 if (t == 0) { 2084 dataerrln("FAIL: createInstance - %s", u_errorName(status)); 2085 return; 2086 } 2087 UnicodeString exp("(Title);Latin-Greek"); 2088 if (t->getID() == exp) { 2089 logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" + 2090 t->getID()); 2091 } else { 2092 errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" + 2093 t->getID() + "\", expected \"" + exp + "\""); 2094 } 2095 delete t; 2096} 2097 2098/** 2099 * Test NFD chaining with RBT 2100 */ 2101void TransliteratorTest::TestNFDChainRBT() { 2102 UParseError pe; 2103 UErrorCode ec = U_ZERO_ERROR; 2104 Transliterator* t = Transliterator::createFromRules( 2105 "TEST", "::NFD; aa > Q; a > q;", 2106 UTRANS_FORWARD, pe, ec); 2107 if (t == NULL || U_FAILURE(ec)) { 2108 dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec)); 2109 return; 2110 } 2111 expect(*t, "aa", "Q"); 2112 delete t; 2113 2114 // TEMPORARY TESTS -- BEING DEBUGGED 2115//=- UnicodeString s, s2; 2116//=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec); 2117//=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t"); 2118//=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D"); 2119//=- expect(*t, s, s2); 2120//=- delete t; 2121//=- 2122//=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec); 2123//=- expect(*t, s2, s); 2124//=- delete t; 2125//=- 2126//=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec); 2127//=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t"); 2128//=- expect(*t, s, s); 2129//=- delete t; 2130 2131// const char* source[] = { 2132// /* 2133// "\\u015Br\\u012Bmad", 2134// "bhagavadg\\u012Bt\\u0101", 2135// "adhy\\u0101ya", 2136// "arjuna", 2137// "vi\\u1E63\\u0101da", 2138// "y\\u014Dga", 2139// "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra", 2140// "uv\\u0101cr\\u0325", 2141// */ 2142// "rmk\\u1E63\\u0113t", 2143// //"dharmak\\u1E63\\u0113tr\\u0113", 2144// /* 2145// "kuruk\\u1E63\\u0113tr\\u0113", 2146// "samav\\u0113t\\u0101", 2147// "yuyutsava-\\u1E25", 2148// "m\\u0101mak\\u0101-\\u1E25", 2149// // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva", 2150// "kimakurvata", 2151// "san\\u0304java", 2152// */ 2153// 2154// 0 2155// }; 2156// const char* expected[] = { 2157// /* 2158// "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d", 2159// "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e", 2160// "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f", 2161// "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928", 2162// "\\u0935\\u093f\\u0937\\u093e\\u0926", 2163// "\\u092f\\u094b\\u0917", 2164// "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930", 2165// "\\u0909\\u0935\\u093E\\u091A\\u0943", 2166// */ 2167// "\\u0927", 2168// //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 2169// /* 2170// "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 2171// "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e", 2172// "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903", 2173// "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903", 2174// // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935", 2175// "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924", 2176// "\\u0938\\u0902\\u091c\\u0935", 2177// */ 2178// 0 2179// }; 2180// UErrorCode status = U_ZERO_ERROR; 2181// UParseError parseError; 2182// UnicodeString message; 2183// Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status); 2184// Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status); 2185// if(U_FAILURE(status)){ 2186// errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status)); 2187// errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) ); 2188// delete latinToDevToLatin; 2189// delete devToLatinToDev; 2190// return; 2191// } 2192// UnicodeString gotResult; 2193// for(int i= 0; source[i] != 0; i++){ 2194// gotResult = source[i]; 2195// expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i])); 2196// expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i])); 2197// } 2198// delete latinToDevToLatin; 2199// delete devToLatinToDev; 2200} 2201 2202/** 2203 * Inverse of "Null" should be "Null". (J21) 2204 */ 2205void TransliteratorTest::TestNullInverse() { 2206 UParseError pe; 2207 UErrorCode ec = U_ZERO_ERROR; 2208 Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec); 2209 if (t == 0 || U_FAILURE(ec)) { 2210 errln("FAIL: createInstance"); 2211 return; 2212 } 2213 Transliterator *u = t->createInverse(ec); 2214 if (u == 0 || U_FAILURE(ec)) { 2215 errln("FAIL: createInverse"); 2216 delete t; 2217 return; 2218 } 2219 if (u->getID() != "Null") { 2220 errln("FAIL: Inverse of Null should be Null"); 2221 } 2222 delete t; 2223 delete u; 2224} 2225 2226/** 2227 * Check ID of inverse of alias. (J22) 2228 */ 2229void TransliteratorTest::TestAliasInverseID() { 2230 UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse 2231 UParseError pe; 2232 UErrorCode ec = U_ZERO_ERROR; 2233 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec); 2234 if (t == 0 || U_FAILURE(ec)) { 2235 dataerrln("FAIL: createInstance - %s", u_errorName(ec)); 2236 return; 2237 } 2238 Transliterator *u = t->createInverse(ec); 2239 if (u == 0 || U_FAILURE(ec)) { 2240 errln("FAIL: createInverse"); 2241 delete t; 2242 return; 2243 } 2244 UnicodeString exp = "Hangul-Latin"; 2245 UnicodeString got = u->getID(); 2246 if (got != exp) { 2247 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got + 2248 ", expected " + exp); 2249 } 2250 delete t; 2251 delete u; 2252} 2253 2254/** 2255 * Test IDs of inverses of compound transliterators. (J20) 2256 */ 2257void TransliteratorTest::TestCompoundInverseID() { 2258 UnicodeString ID = "Latin-Jamo;NFC(NFD)"; 2259 UParseError pe; 2260 UErrorCode ec = U_ZERO_ERROR; 2261 Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec); 2262 if (t == 0 || U_FAILURE(ec)) { 2263 dataerrln("FAIL: createInstance - %s", u_errorName(ec)); 2264 return; 2265 } 2266 Transliterator *u = t->createInverse(ec); 2267 if (u == 0 || U_FAILURE(ec)) { 2268 errln("FAIL: createInverse"); 2269 delete t; 2270 return; 2271 } 2272 UnicodeString exp = "NFD(NFC);Jamo-Latin"; 2273 UnicodeString got = u->getID(); 2274 if (got != exp) { 2275 errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got + 2276 ", expected " + exp); 2277 } 2278 delete t; 2279 delete u; 2280} 2281 2282/** 2283 * Test undefined variable. 2284 2285 */ 2286void TransliteratorTest::TestUndefinedVariable() { 2287 UnicodeString rule = "$initial } a <> \\u1161;"; 2288 UParseError pe; 2289 UErrorCode ec = U_ZERO_ERROR; 2290 Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec); 2291 delete t; 2292 if (U_FAILURE(ec)) { 2293 logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " + 2294 u_errorName(ec)); 2295 return; 2296 } 2297 errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " + 2298 u_errorName(ec)); 2299} 2300 2301/** 2302 * Test empty context. 2303 */ 2304void TransliteratorTest::TestEmptyContext() { 2305 expect(" { a } > b;", "xay a ", "xby b "); 2306} 2307 2308/** 2309* Test compound filter ID syntax 2310*/ 2311void TransliteratorTest::TestCompoundFilterID(void) { 2312 static const char* DATA[] = { 2313 // Col. 1 = ID or rule set (latter must start with #) 2314 2315 // = columns > 1 are null if expect col. 1 to be illegal = 2316 2317 // Col. 2 = direction, "F..." or "R..." 2318 // Col. 3 = source string 2319 // Col. 4 = exp result 2320 2321 "[abc]; [abc]", NULL, NULL, NULL, // multiple filters 2322 "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter 2323 "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c", 2324 "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393", 2325 "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c", 2326 "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393", 2327 NULL, 2328 }; 2329 2330 for (int32_t i=0; DATA[i]; i+=4) { 2331 UnicodeString id = CharsToUnicodeString(DATA[i]); 2332 UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ? 2333 UTRANS_REVERSE : UTRANS_FORWARD; 2334 UnicodeString source; 2335 UnicodeString exp; 2336 if (DATA[i+2] != NULL) { 2337 source = CharsToUnicodeString(DATA[i+2]); 2338 exp = CharsToUnicodeString(DATA[i+3]); 2339 } 2340 UBool expOk = (DATA[i+1] != NULL); 2341 Transliterator* t = NULL; 2342 UParseError pe; 2343 UErrorCode ec = U_ZERO_ERROR; 2344 if (id.charAt(0) == 0x23/*#*/) { 2345 t = Transliterator::createFromRules("ID", id, direction, pe, ec); 2346 } else { 2347 t = Transliterator::createInstance(id, direction, pe, ec); 2348 } 2349 UBool ok = (t != NULL && U_SUCCESS(ec)); 2350 UnicodeString transID; 2351 if (t!=0) { 2352 transID = t->getID(); 2353 } 2354 else { 2355 transID = UnicodeString("NULL", ""); 2356 } 2357 if (ok == expOk) { 2358 logln((UnicodeString)"Ok: " + id + " => " + transID + ", " + 2359 u_errorName(ec)); 2360 if (source.length() != 0) { 2361 expect(*t, source, exp); 2362 } 2363 delete t; 2364 } else { 2365 dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " + 2366 u_errorName(ec)); 2367 } 2368 } 2369} 2370 2371/** 2372 * Test new property set syntax 2373 */ 2374void TransliteratorTest::TestPropertySet() { 2375 expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx"); 2376 expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9", 2377 "[ a stitch ]\n[ in time ]\r[ saves 9]"); 2378} 2379 2380/** 2381 * Test various failure points of the new 2.0 engine. 2382 */ 2383void TransliteratorTest::TestNewEngine() { 2384 UParseError pe; 2385 UErrorCode ec = U_ZERO_ERROR; 2386 Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec); 2387 if (t == 0 || U_FAILURE(ec)) { 2388 dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec)); 2389 return; 2390 } 2391 // Katakana should be untouched 2392 expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"), 2393 CharsToUnicodeString("\\u3042\\u3042\\u30A2")); 2394 2395 delete t; 2396 2397#if 1 2398 // This test will only work if Transliterator.ROLLBACK is 2399 // true. Otherwise, this test will fail, revealing a 2400 // limitation of global filters in incremental mode. 2401 Transliterator *a = 2402 Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec); 2403 Transliterator *A = 2404 Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec); 2405 if (U_FAILURE(ec)) { 2406 delete a; 2407 delete A; 2408 return; 2409 } 2410 2411 Transliterator* array[3]; 2412 array[0] = a; 2413 array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec); 2414 array[2] = A; 2415 if (U_FAILURE(ec)) { 2416 errln("FAIL: createInstance NFD"); 2417 delete a; 2418 delete A; 2419 delete array[1]; 2420 return; 2421 } 2422 2423 t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec)); 2424 if (U_FAILURE(ec)) { 2425 errln("FAIL: UnicodeSet constructor"); 2426 delete a; 2427 delete A; 2428 delete array[1]; 2429 delete t; 2430 return; 2431 } 2432 2433 expect(*t, "aAaA", "bAbA"); 2434 2435 assertTrue("countElements", t->countElements() == 3); 2436 assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A"); 2437 assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD"); 2438 assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b"); 2439 assertSuccess("getElement", ec); 2440 2441 delete a; 2442 delete A; 2443 delete array[1]; 2444 delete t; 2445#endif 2446 2447 expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;", 2448 "a", 2449 "ax"); 2450 2451 UnicodeString gr = CharsToUnicodeString( 2452 "$ddot = \\u0308 ;" 2453 "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;" 2454 "$rough = \\u0314 ;" 2455 "($lcgvowel+ $ddot?) $rough > h | $1 ;" 2456 "\\u03b1 <> a ;" 2457 "$rough <> h ;"); 2458 2459 expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha"); 2460} 2461 2462/** 2463 * Test quantified segment behavior. We want: 2464 * ([abc])+ > x $1 x; applied to "cba" produces "xax" 2465 */ 2466void TransliteratorTest::TestQuantifiedSegment(void) { 2467 // The normal case 2468 expect("([abc]+) > x $1 x;", "cba", "xcbax"); 2469 2470 // The tricky case; the quantifier is around the segment 2471 expect("([abc])+ > x $1 x;", "cba", "xax"); 2472 2473 // Tricky case in reverse direction 2474 expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax"); 2475 2476 // Check post-context segment 2477 expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba"); 2478 2479 // Test toRule/toPattern for non-quantified segment. 2480 // Careful with spacing here. 2481 UnicodeString r("([a-c]){q} > x $1 x;"); 2482 UParseError pe; 2483 UErrorCode ec = U_ZERO_ERROR; 2484 Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec); 2485 if (U_FAILURE(ec)) { 2486 errln("FAIL: createFromRules"); 2487 delete t; 2488 return; 2489 } 2490 UnicodeString rr; 2491 t->toRules(rr, TRUE); 2492 if (r != rr) { 2493 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 2494 } else { 2495 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 2496 } 2497 delete t; 2498 2499 // Test toRule/toPattern for quantified segment. 2500 // Careful with spacing here. 2501 r = "([a-c])+{q} > x $1 x;"; 2502 t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec); 2503 if (U_FAILURE(ec)) { 2504 errln("FAIL: createFromRules"); 2505 delete t; 2506 return; 2507 } 2508 t->toRules(rr, TRUE); 2509 if (r != rr) { 2510 errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\""); 2511 } else { 2512 logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\""); 2513 } 2514 delete t; 2515} 2516 2517//====================================================================== 2518// Ram's tests 2519//====================================================================== 2520void TransliteratorTest::TestDevanagariLatinRT(){ 2521 const int MAX_LEN= 52; 2522 const char* const source[MAX_LEN] = { 2523 "bh\\u0101rata", 2524 "kra", 2525 "k\\u1E63a", 2526 "khra", 2527 "gra", 2528 "\\u1E45ra", 2529 "cra", 2530 "chra", 2531 "j\\u00F1a", 2532 "jhra", 2533 "\\u00F1ra", 2534 "\\u1E6Dya", 2535 "\\u1E6Dhra", 2536 "\\u1E0Dya", 2537 //"r\\u0323ya", // \u095c is not valid in Devanagari 2538 "\\u1E0Dhya", 2539 "\\u1E5Bhra", 2540 "\\u1E47ra", 2541 "tta", 2542 "thra", 2543 "dda", 2544 "dhra", 2545 "nna", 2546 "pra", 2547 "phra", 2548 "bra", 2549 "bhra", 2550 "mra", 2551 "\\u1E49ra", 2552 //"l\\u0331ra", 2553 "yra", 2554 "\\u1E8Fra", 2555 //"l-", 2556 "vra", 2557 "\\u015Bra", 2558 "\\u1E63ra", 2559 "sra", 2560 "hma", 2561 "\\u1E6D\\u1E6Da", 2562 "\\u1E6D\\u1E6Dha", 2563 "\\u1E6Dh\\u1E6Dha", 2564 "\\u1E0D\\u1E0Da", 2565 "\\u1E0D\\u1E0Dha", 2566 "\\u1E6Dya", 2567 "\\u1E6Dhya", 2568 "\\u1E0Dya", 2569 "\\u1E0Dhya", 2570 // Not roundtrippable -- 2571 // \\u0939\\u094d\\u094d\\u092E - hma 2572 // \\u0939\\u094d\\u092E - hma 2573 // CharsToUnicodeString("hma"), 2574 "hya", 2575 "\\u015Br\\u0325", 2576 "\\u015Bca", 2577 "\\u0115", 2578 "san\\u0304j\\u012Bb s\\u0113nagupta", 2579 "\\u0101nand vaddir\\u0101ju", 2580 "\\u0101", 2581 "a" 2582 }; 2583 const char* const expected[MAX_LEN] = { 2584 "\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */ 2585 "\\u0915\\u094D\\u0930", /* kra */ 2586 "\\u0915\\u094D\\u0937", /* ks\\u0323a */ 2587 "\\u0916\\u094D\\u0930", /* khra */ 2588 "\\u0917\\u094D\\u0930", /* gra */ 2589 "\\u0919\\u094D\\u0930", /* n\\u0307ra */ 2590 "\\u091A\\u094D\\u0930", /* cra */ 2591 "\\u091B\\u094D\\u0930", /* chra */ 2592 "\\u091C\\u094D\\u091E", /* jn\\u0303a */ 2593 "\\u091D\\u094D\\u0930", /* jhra */ 2594 "\\u091E\\u094D\\u0930", /* n\\u0303ra */ 2595 "\\u091F\\u094D\\u092F", /* t\\u0323ya */ 2596 "\\u0920\\u094D\\u0930", /* t\\u0323hra */ 2597 "\\u0921\\u094D\\u092F", /* d\\u0323ya */ 2598 //"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari 2599 "\\u0922\\u094D\\u092F", /* d\\u0323hya */ 2600 "\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */ 2601 "\\u0923\\u094D\\u0930", /* n\\u0323ra */ 2602 "\\u0924\\u094D\\u0924", /* tta */ 2603 "\\u0925\\u094D\\u0930", /* thra */ 2604 "\\u0926\\u094D\\u0926", /* dda */ 2605 "\\u0927\\u094D\\u0930", /* dhra */ 2606 "\\u0928\\u094D\\u0928", /* nna */ 2607 "\\u092A\\u094D\\u0930", /* pra */ 2608 "\\u092B\\u094D\\u0930", /* phra */ 2609 "\\u092C\\u094D\\u0930", /* bra */ 2610 "\\u092D\\u094D\\u0930", /* bhra */ 2611 "\\u092E\\u094D\\u0930", /* mra */ 2612 "\\u0929\\u094D\\u0930", /* n\\u0331ra */ 2613 //"\\u0934\\u094D\\u0930", /* l\\u0331ra */ 2614 "\\u092F\\u094D\\u0930", /* yra */ 2615 "\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */ 2616 //"l-", 2617 "\\u0935\\u094D\\u0930", /* vra */ 2618 "\\u0936\\u094D\\u0930", /* s\\u0301ra */ 2619 "\\u0937\\u094D\\u0930", /* s\\u0323ra */ 2620 "\\u0938\\u094D\\u0930", /* sra */ 2621 "\\u0939\\u094d\\u092E", /* hma */ 2622 "\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */ 2623 "\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */ 2624 "\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/ 2625 "\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */ 2626 "\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */ 2627 "\\u091F\\u094D\\u092F", /* t\\u0323ya */ 2628 "\\u0920\\u094D\\u092F", /* t\\u0323hya */ 2629 "\\u0921\\u094D\\u092F", /* d\\u0323ya */ 2630 "\\u0922\\u094D\\u092F", /* d\\u0323hya */ 2631 // "hma", /* hma */ 2632 "\\u0939\\u094D\\u092F", /* hya */ 2633 "\\u0936\\u0943", /* s\\u0301r\\u0325a */ 2634 "\\u0936\\u094D\\u091A", /* s\\u0301ca */ 2635 "\\u090d", /* e\\u0306 */ 2636 "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924", 2637 "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941", 2638 "\\u0906", 2639 "\\u0905", 2640 }; 2641 UErrorCode status = U_ZERO_ERROR; 2642 UParseError parseError; 2643 UnicodeString message; 2644 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status); 2645 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status); 2646 if(U_FAILURE(status)){ 2647 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status)); 2648 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) ); 2649 return; 2650 } 2651 UnicodeString gotResult; 2652 for(int i= 0; i<MAX_LEN; i++){ 2653 gotResult = source[i]; 2654 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i])); 2655 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i])); 2656 } 2657 delete latinToDev; 2658 delete devToLatin; 2659} 2660 2661void TransliteratorTest::TestTeluguLatinRT(){ 2662 const int MAX_LEN=10; 2663 const char* const source[MAX_LEN] = { 2664 "raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */ 2665 "\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */ 2666 "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */ 2667 "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */ 2668 "san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */ 2669 "amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */ 2670 "ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */ 2671 "\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */ 2672 "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */ 2673 "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */ 2674 }; 2675 2676 const char* const expected[MAX_LEN] = { 2677 "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27", 2678 "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41", 2679 "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26", 2680 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26", 2681 "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24", 2682 "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32", 2683 "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27", 2684 "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32", 2685 "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f", 2686 "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f", 2687 }; 2688 2689 UErrorCode status = U_ZERO_ERROR; 2690 UParseError parseError; 2691 UnicodeString message; 2692 Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status); 2693 Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status); 2694 if(U_FAILURE(status)){ 2695 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status)); 2696 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) ); 2697 return; 2698 } 2699 UnicodeString gotResult; 2700 for(int i= 0; i<MAX_LEN; i++){ 2701 gotResult = source[i]; 2702 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i])); 2703 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i])); 2704 } 2705 delete latinToDev; 2706 delete devToLatin; 2707} 2708 2709void TransliteratorTest::TestSanskritLatinRT(){ 2710 const int MAX_LEN =16; 2711 const char* const source[MAX_LEN] = { 2712 "rmk\\u1E63\\u0113t", 2713 "\\u015Br\\u012Bmad", 2714 "bhagavadg\\u012Bt\\u0101", 2715 "adhy\\u0101ya", 2716 "arjuna", 2717 "vi\\u1E63\\u0101da", 2718 "y\\u014Dga", 2719 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra", 2720 "uv\\u0101cr\\u0325", 2721 "dharmak\\u1E63\\u0113tr\\u0113", 2722 "kuruk\\u1E63\\u0113tr\\u0113", 2723 "samav\\u0113t\\u0101", 2724 "yuyutsava\\u1E25", 2725 "m\\u0101mak\\u0101\\u1E25", 2726 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva", 2727 "kimakurvata", 2728 "san\\u0304java", 2729 }; 2730 const char* const expected[MAX_LEN] = { 2731 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D", 2732 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d", 2733 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e", 2734 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f", 2735 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928", 2736 "\\u0935\\u093f\\u0937\\u093e\\u0926", 2737 "\\u092f\\u094b\\u0917", 2738 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930", 2739 "\\u0909\\u0935\\u093E\\u091A\\u0943", 2740 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 2741 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 2742 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e", 2743 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903", 2744 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903", 2745 //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935", 2746 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924", 2747 "\\u0938\\u0902\\u091c\\u0935", 2748 }; 2749 UErrorCode status = U_ZERO_ERROR; 2750 UParseError parseError; 2751 UnicodeString message; 2752 Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status); 2753 Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status); 2754 if(U_FAILURE(status)){ 2755 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status)); 2756 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) ); 2757 return; 2758 } 2759 UnicodeString gotResult; 2760 for(int i= 0; i<MAX_LEN; i++){ 2761 gotResult = source[i]; 2762 expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i])); 2763 expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i])); 2764 } 2765 delete latinToDev; 2766 delete devToLatin; 2767} 2768 2769 2770void TransliteratorTest::TestCompoundLatinRT(){ 2771 const char* const source[] = { 2772 "rmk\\u1E63\\u0113t", 2773 "\\u015Br\\u012Bmad", 2774 "bhagavadg\\u012Bt\\u0101", 2775 "adhy\\u0101ya", 2776 "arjuna", 2777 "vi\\u1E63\\u0101da", 2778 "y\\u014Dga", 2779 "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra", 2780 "uv\\u0101cr\\u0325", 2781 "dharmak\\u1E63\\u0113tr\\u0113", 2782 "kuruk\\u1E63\\u0113tr\\u0113", 2783 "samav\\u0113t\\u0101", 2784 "yuyutsava\\u1E25", 2785 "m\\u0101mak\\u0101\\u1E25", 2786 // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva", 2787 "kimakurvata", 2788 "san\\u0304java" 2789 }; 2790 const int MAX_LEN = sizeof(source)/sizeof(source[0]); 2791 const char* const expected[MAX_LEN] = { 2792 "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D", 2793 "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d", 2794 "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e", 2795 "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f", 2796 "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928", 2797 "\\u0935\\u093f\\u0937\\u093e\\u0926", 2798 "\\u092f\\u094b\\u0917", 2799 "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930", 2800 "\\u0909\\u0935\\u093E\\u091A\\u0943", 2801 "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 2802 "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 2803 "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e", 2804 "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903", 2805 "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903", 2806 // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935", 2807 "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924", 2808 "\\u0938\\u0902\\u091c\\u0935" 2809 }; 2810 if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) { 2811 errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!"); 2812 return; 2813 } 2814 2815 UErrorCode status = U_ZERO_ERROR; 2816 UParseError parseError; 2817 UnicodeString message; 2818 Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status); 2819 Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status); 2820 Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status); 2821 Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status); 2822 2823 if(U_FAILURE(status)){ 2824 dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status)); 2825 dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) ); 2826 return; 2827 } 2828 UnicodeString gotResult; 2829 for(int i= 0; i<MAX_LEN; i++){ 2830 gotResult = source[i]; 2831 expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i])); 2832 expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i])); 2833 expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i])); 2834 2835 } 2836 delete(latinToDevToLatin); 2837 delete(devToLatinToDev); 2838 delete(devToTelToDev); 2839 delete(latinToTelToLatin); 2840} 2841 2842/** 2843 * Test Gurmukhi-Devanagari Tippi and Bindi 2844 */ 2845void TransliteratorTest::TestGurmukhiDevanagari(){ 2846 // the rule says: 2847 // (\u0902) (when preceded by vowel) ---> (\u0A02) 2848 // (\u0902) (when preceded by consonant) ---> (\u0A70) 2849 UErrorCode status = U_ZERO_ERROR; 2850 UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status); 2851 UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status); 2852 UParseError parseError; 2853 2854 UnicodeSetIterator vIter(vowel); 2855 UnicodeSetIterator nvIter(non_vowel); 2856 Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status); 2857 if(U_FAILURE(status)) { 2858 dataerrln("Error creating transliterator %s", u_errorName(status)); 2859 delete trans; 2860 return; 2861 } 2862 UnicodeString src (" \\u0902", -1, US_INV); 2863 UnicodeString expected(" \\u0A02", -1, US_INV); 2864 src = src.unescape(); 2865 expected= expected.unescape(); 2866 2867 while(vIter.next()){ 2868 src.setCharAt(0,(UChar) vIter.getCodepoint()); 2869 expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100)); 2870 expect(*trans,src,expected); 2871 } 2872 2873 expected.setCharAt(1,0x0A70); 2874 while(nvIter.next()){ 2875 //src.setCharAt(0,(char) nvIter.codepoint); 2876 src.setCharAt(0,(UChar)nvIter.getCodepoint()); 2877 expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100)); 2878 expect(*trans,src,expected); 2879 } 2880 delete trans; 2881} 2882/** 2883 * Test instantiation from a locale. 2884 */ 2885void TransliteratorTest::TestLocaleInstantiation(void) { 2886 UParseError pe; 2887 UErrorCode ec = U_ZERO_ERROR; 2888 Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec); 2889 if (U_FAILURE(ec)) { 2890 dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec)); 2891 delete t; 2892 return; 2893 } 2894 expect(*t, CharsToUnicodeString("\\u0430"), "a"); 2895 delete t; 2896 2897 t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec); 2898 if (U_FAILURE(ec)) { 2899 errln("FAIL: createInstance(en-el)"); 2900 delete t; 2901 return; 2902 } 2903 expect(*t, "a", CharsToUnicodeString("\\u03B1")); 2904 delete t; 2905} 2906 2907/** 2908 * Test title case handling of accent (should ignore accents) 2909 */ 2910void TransliteratorTest::TestTitleAccents(void) { 2911 UParseError pe; 2912 UErrorCode ec = U_ZERO_ERROR; 2913 Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec); 2914 if (U_FAILURE(ec)) { 2915 errln("FAIL: createInstance(Title)"); 2916 delete t; 2917 return; 2918 } 2919 expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe")); 2920 delete t; 2921} 2922 2923/** 2924 * Basic test of a locale resource based rule. 2925 */ 2926void TransliteratorTest::TestLocaleResource() { 2927 const char* DATA[] = { 2928 // id from to 2929 //"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0", 2930 "Latin-el", "b", "\\u03bc\\u03c0", 2931 "Latin-Greek", "b", "\\u03B2", 2932 "Greek-Latin/UNGEGN", "\\u03B2", "v", 2933 "el-Latin", "\\u03B2", "v", 2934 "Greek-Latin", "\\u03B2", "b", 2935 }; 2936 const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]); 2937 for (int32_t i=0; i<DATA_length; i+=3) { 2938 UParseError pe; 2939 UErrorCode ec = U_ZERO_ERROR; 2940 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec); 2941 if (U_FAILURE(ec)) { 2942 dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec)); 2943 delete t; 2944 continue; 2945 } 2946 expect(*t, CharsToUnicodeString(DATA[i+1]), 2947 CharsToUnicodeString(DATA[i+2])); 2948 delete t; 2949 } 2950} 2951 2952/** 2953 * Make sure parse errors reference the right line. 2954 */ 2955void TransliteratorTest::TestParseError() { 2956 static const char* rule = 2957 "a > b;\n" 2958 "# more stuff\n" 2959 "d << b;"; 2960 UErrorCode ec = U_ZERO_ERROR; 2961 UParseError pe; 2962 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec); 2963 delete t; 2964 if (U_FAILURE(ec)) { 2965 UnicodeString err(pe.preContext); 2966 err.append((UChar)124/*|*/).append(pe.postContext); 2967 if (err.indexOf("d << b") >= 0) { 2968 logln("Ok: " + err); 2969 } else { 2970 errln("FAIL: " + err); 2971 } 2972 } 2973 else { 2974 errln("FAIL: no syntax error"); 2975 } 2976 static const char* maskingRule = 2977 "a>x;\n" 2978 "# more stuff\n" 2979 "ab>y;"; 2980 ec = U_ZERO_ERROR; 2981 delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec); 2982 if (ec != U_RULE_MASK_ERROR) { 2983 errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec)); 2984 } 2985 else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) { 2986 errln("FAIL: did not get expected precontext"); 2987 } 2988 else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) { 2989 errln("FAIL: did not get expected postcontext"); 2990 } 2991} 2992 2993/** 2994 * Make sure sets on output are disallowed. 2995 */ 2996void TransliteratorTest::TestOutputSet() { 2997 UnicodeString rule = "$set = [a-cm-n]; b > $set;"; 2998 UErrorCode ec = U_ZERO_ERROR; 2999 UParseError pe; 3000 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec); 3001 delete t; 3002 if (U_FAILURE(ec)) { 3003 UnicodeString err(pe.preContext); 3004 err.append((UChar)124/*|*/).append(pe.postContext); 3005 logln("Ok: " + err); 3006 return; 3007 } 3008 errln("FAIL: No syntax error"); 3009} 3010 3011/** 3012 * Test the use variable range pragma, making sure that use of 3013 * variable range characters is detected and flagged as an error. 3014 */ 3015void TransliteratorTest::TestVariableRange() { 3016 UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;"; 3017 UErrorCode ec = U_ZERO_ERROR; 3018 UParseError pe; 3019 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec); 3020 delete t; 3021 if (U_FAILURE(ec)) { 3022 UnicodeString err(pe.preContext); 3023 err.append((UChar)124/*|*/).append(pe.postContext); 3024 logln("Ok: " + err); 3025 return; 3026 } 3027 errln("FAIL: No syntax error"); 3028} 3029 3030/** 3031 * Test invalid post context error handling 3032 */ 3033void TransliteratorTest::TestInvalidPostContext() { 3034 UnicodeString rule = "a}b{c>d;"; 3035 UErrorCode ec = U_ZERO_ERROR; 3036 UParseError pe; 3037 Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec); 3038 delete t; 3039 if (U_FAILURE(ec)) { 3040 UnicodeString err(pe.preContext); 3041 err.append((UChar)124/*|*/).append(pe.postContext); 3042 if (err.indexOf("a}b{c") >= 0) { 3043 logln("Ok: " + err); 3044 } else { 3045 errln("FAIL: " + err); 3046 } 3047 return; 3048 } 3049 errln("FAIL: No syntax error"); 3050} 3051 3052/** 3053 * Test ID form variants 3054 */ 3055void TransliteratorTest::TestIDForms() { 3056 const char* DATA[] = { 3057 "NFC", NULL, "NFD", 3058 "nfd", NULL, "NFC", // make sure case is ignored 3059 "Any-NFKD", NULL, "Any-NFKC", 3060 "Null", NULL, "Null", 3061 "-nfkc", "nfkc", "NFKD", 3062 "-nfkc/", "nfkc", "NFKD", 3063 "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN", 3064 "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN", 3065 "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali", 3066 "Source-", NULL, NULL, 3067 "Source/Variant-", NULL, NULL, 3068 "Source-/Variant", NULL, NULL, 3069 "/Variant", NULL, NULL, 3070 "/Variant-", NULL, NULL, 3071 "-/Variant", NULL, NULL, 3072 "-/", NULL, NULL, 3073 "-", NULL, NULL, 3074 "/", NULL, NULL, 3075 }; 3076 const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]); 3077 3078 for (int32_t i=0; i<DATA_length; i+=3) { 3079 const char* ID = DATA[i]; 3080 const char* expID = DATA[i+1]; 3081 const char* expInvID = DATA[i+2]; 3082 UBool expValid = (expInvID != NULL); 3083 if (expID == NULL) { 3084 expID = ID; 3085 } 3086 UParseError pe; 3087 UErrorCode ec = U_ZERO_ERROR; 3088 Transliterator *t = 3089 Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec); 3090 if (U_FAILURE(ec)) { 3091 if (!expValid) { 3092 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec)); 3093 } else { 3094 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec)); 3095 } 3096 delete t; 3097 continue; 3098 } 3099 Transliterator *u = t->createInverse(ec); 3100 if (U_FAILURE(ec)) { 3101 errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID); 3102 delete t; 3103 delete u; 3104 continue; 3105 } 3106 if (t->getID() == expID && 3107 u->getID() == expInvID) { 3108 logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID); 3109 } else { 3110 errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " + 3111 t->getID() + " x getInverse() => " + u->getID() + 3112 ", expected " + expInvID); 3113 } 3114 delete t; 3115 delete u; 3116 } 3117} 3118 3119static const UChar SPACE[] = {32,0}; 3120static const UChar NEWLINE[] = {10,0}; 3121static const UChar RETURN[] = {13,0}; 3122static const UChar EMPTY[] = {0}; 3123 3124void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2, 3125 const UnicodeString& testRulesForward) { 3126 UnicodeString rules2; t2.toRules(rules2, TRUE); 3127 //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), ""); 3128 rules2.findAndReplace(SPACE, EMPTY); 3129 rules2.findAndReplace(NEWLINE, EMPTY); 3130 rules2.findAndReplace(RETURN, EMPTY); 3131 3132 UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY); 3133 3134 if (rules2 != testRules) { 3135 errln(label); 3136 logln((UnicodeString)"GENERATED RULES: " + rules2); 3137 logln((UnicodeString)"SHOULD BE: " + testRulesForward); 3138 } 3139} 3140 3141/** 3142 * Mark's toRules test. 3143 */ 3144void TransliteratorTest::TestToRulesMark() { 3145 const char* testRules = 3146 "::[[:Latin:][:Mark:]];" 3147 "::NFKD (NFC);" 3148 "::Lower (Lower);" 3149 "a <> \\u03B1;" // alpha 3150 "::NFKC (NFD);" 3151 "::Upper (Lower);" 3152 "::Lower ();" 3153 "::([[:Greek:][:Mark:]]);" 3154 ; 3155 const char* testRulesForward = 3156 "::[[:Latin:][:Mark:]];" 3157 "::NFKD(NFC);" 3158 "::Lower(Lower);" 3159 "a > \\u03B1;" 3160 "::NFKC(NFD);" 3161 "::Upper (Lower);" 3162 "::Lower ();" 3163 ; 3164 const char* testRulesBackward = 3165 "::[[:Greek:][:Mark:]];" 3166 "::Lower (Upper);" 3167 "::NFD(NFKC);" 3168 "\\u03B1 > a;" 3169 "::Lower(Lower);" 3170 "::NFC(NFKD);" 3171 ; 3172 UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute 3173 UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute 3174 3175 UParseError pe; 3176 UErrorCode ec = U_ZERO_ERROR; 3177 Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec); 3178 Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec); 3179 3180 if (U_FAILURE(ec)) { 3181 delete t2; 3182 delete t3; 3183 dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec)); 3184 return; 3185 } 3186 3187 expect(*t2, source, target); 3188 expect(*t3, target, source); 3189 3190 checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV)); 3191 checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV)); 3192 3193 delete t2; 3194 delete t3; 3195} 3196 3197/** 3198 * Test Escape and Unescape transliterators. 3199 */ 3200void TransliteratorTest::TestEscape() { 3201 UParseError pe; 3202 UErrorCode ec; 3203 Transliterator *t; 3204 3205 ec = U_ZERO_ERROR; 3206 t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec); 3207 if (U_FAILURE(ec)) { 3208 errln((UnicodeString)"FAIL: createInstance"); 3209 } else { 3210 expect(*t, 3211 UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"), 3212 "@12Q"); 3213 } 3214 delete t; 3215 3216 ec = U_ZERO_ERROR; 3217 t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec); 3218 if (U_FAILURE(ec)) { 3219 errln((UnicodeString)"FAIL: createInstance"); 3220 } else { 3221 expect(*t, 3222 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 3223 UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED")); 3224 } 3225 delete t; 3226 3227 ec = U_ZERO_ERROR; 3228 t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec); 3229 if (U_FAILURE(ec)) { 3230 errln((UnicodeString)"FAIL: createInstance"); 3231 } else { 3232 expect(*t, 3233 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 3234 UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED")); 3235 } 3236 delete t; 3237 3238 ec = U_ZERO_ERROR; 3239 t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec); 3240 if (U_FAILURE(ec)) { 3241 errln((UnicodeString)"FAIL: createInstance"); 3242 } else { 3243 expect(*t, 3244 CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 3245 UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}")); 3246 } 3247 delete t; 3248} 3249 3250 3251void TransliteratorTest::TestAnchorMasking(){ 3252 UnicodeString rule ("^a > Q; a > q;"); 3253 UErrorCode status= U_ZERO_ERROR; 3254 UParseError parseError; 3255 3256 Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status); 3257 if(U_FAILURE(status)){ 3258 errln(UnicodeString("FAIL: ") + "ID" + 3259 ".createFromRules() => bad rules" + 3260 /*", parse error " + parseError.code +*/ 3261 ", line " + parseError.line + 3262 ", offset " + parseError.offset + 3263 ", context " + prettify(parseError.preContext, TRUE) + 3264 ", rules: " + prettify(rule, TRUE)); 3265 } 3266 delete t; 3267} 3268 3269/** 3270 * Make sure display names of variants look reasonable. 3271 */ 3272void TransliteratorTest::TestDisplayName() { 3273#if UCONFIG_NO_FORMATTING 3274 logln("Skipping, UCONFIG_NO_FORMATTING is set\n"); 3275 return; 3276#else 3277 static const char* DATA[] = { 3278 // ID, forward name, reverse name 3279 // Update the text as necessary -- the important thing is 3280 // not the text itself, but how various cases are handled. 3281 3282 // Basic test 3283 "Any-Hex", "Any to Hex Escape", "Hex Escape to Any", 3284 3285 // Variants 3286 "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl", 3287 3288 // Target-only IDs 3289 "NFC", "Any to NFC", "Any to NFD", 3290 }; 3291 3292 int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]); 3293 3294 Locale US("en", "US"); 3295 3296 for (int32_t i=0; i<DATA_length; i+=3) { 3297 UnicodeString name; 3298 Transliterator::getDisplayName(DATA[i], US, name); 3299 if (name != DATA[i+1]) { 3300 dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " + 3301 name + ", expected " + DATA[i+1]); 3302 } else { 3303 logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name); 3304 } 3305 UErrorCode ec = U_ZERO_ERROR; 3306 UParseError pe; 3307 Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec); 3308 if (U_FAILURE(ec)) { 3309 delete t; 3310 dataerrln("FAIL: createInstance failed - %s", u_errorName(ec)); 3311 continue; 3312 } 3313 name = Transliterator::getDisplayName(t->getID(), US, name); 3314 if (name != DATA[i+2]) { 3315 dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " + 3316 name + ", expected " + DATA[i+2]); 3317 } else { 3318 logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name); 3319 } 3320 delete t; 3321 } 3322#endif 3323} 3324 3325void TransliteratorTest::TestSpecialCases(void) { 3326 const UnicodeString registerRules[] = { 3327 "Any-Dev1", "x > X; y > Y;", 3328 "Any-Dev2", "XY > Z", 3329 "Greek-Latin/FAKE", 3330 CharsToUnicodeString 3331 ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"), 3332 "" // END MARKER 3333 }; 3334 3335 const UnicodeString testCases[] = { 3336 // NORMALIZATION 3337 // should add more test cases 3338 "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "", 3339 "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "", 3340 "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "", 3341 "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "", 3342 3343 // mp -> b BUG 3344 "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)", 3345 "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)", 3346 3347 // check for devanagari bug 3348 "nfd;Dev1;Dev2;nfc", "xy", "Z", 3349 3350 // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE 3351 "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, 3352 CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee, 3353 3354 //TODO: enable this test once Titlecase works right 3355 /* 3356 "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, 3357 CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee, 3358 */ 3359 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, 3360 CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE, 3361 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, 3362 CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee, 3363 3364 "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "", 3365 "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "", 3366 3367 // FORMS OF S 3368 "Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"), 3369 CharsToUnicodeString("s ss s\\u0331s\\u0331") , 3370 "Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"), 3371 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") , 3372 "Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"), 3373 CharsToUnicodeString("s ss s\\u0331s\\u0331") , 3374 "Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"), 3375 CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"), 3376 // Tatiana bug 3377 // Upper: TAT\\u02B9\\u00C2NA 3378 // Lower: tat\\u02B9\\u00E2na 3379 // Title: Tat\\u02B9\\u00E2na 3380 "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"), 3381 CharsToUnicodeString("TAT\\u02B9\\u00C2NA"), 3382 "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"), 3383 CharsToUnicodeString("tat\\u02B9\\u00E2na"), 3384 "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"), 3385 CharsToUnicodeString("Tat\\u02B9\\u00E2na"), 3386 3387 "" // END MARKER 3388 }; 3389 3390 UParseError pos; 3391 int32_t i; 3392 for (i = 0; registerRules[i].length()!=0; i+=2) { 3393 UErrorCode status = U_ZERO_ERROR; 3394 3395 Transliterator *t = Transliterator::createFromRules(registerRules[0+i], 3396 registerRules[i+1], UTRANS_FORWARD, pos, status); 3397 if (U_FAILURE(status)) { 3398 dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status)); 3399 } else { 3400 Transliterator::registerInstance(t); 3401 } 3402 } 3403 for (i = 0; testCases[i].length()!=0; i+=3) { 3404 UErrorCode ec = U_ZERO_ERROR; 3405 UParseError pe; 3406 const UnicodeString& name = testCases[i]; 3407 Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec); 3408 if (U_FAILURE(ec)) { 3409 dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec)); 3410 delete t; 3411 continue; 3412 } 3413 const UnicodeString& id = t->getID(); 3414 const UnicodeString& source = testCases[i+1]; 3415 UnicodeString target; 3416 3417 // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe) 3418 3419 if (testCases[i+2].length() > 0) { 3420 target = testCases[i+2]; 3421 } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) { 3422 Normalizer::normalize(source, UNORM_NFD, 0, target, ec); 3423 } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) { 3424 Normalizer::normalize(source, UNORM_NFC, 0, target, ec); 3425 } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) { 3426 Normalizer::normalize(source, UNORM_NFKD, 0, target, ec); 3427 } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) { 3428 Normalizer::normalize(source, UNORM_NFKC, 0, target, ec); 3429 } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) { 3430 target = source; 3431 target.toLower(Locale::getUS()); 3432 } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) { 3433 target = source; 3434 target.toUpper(Locale::getUS()); 3435 } 3436 if (U_FAILURE(ec)) { 3437 errln((UnicodeString)"FAIL: Internal error normalizing " + source); 3438 continue; 3439 } 3440 3441 expect(*t, source, target); 3442 delete t; 3443 } 3444 for (i = 0; registerRules[i].length()!=0; i+=2) { 3445 Transliterator::unregister(registerRules[i]); 3446 } 3447} 3448 3449char* Char32ToEscapedChars(UChar32 ch, char* buffer) { 3450 if (ch <= 0xFFFF) { 3451 sprintf(buffer, "\\u%04x", (int)ch); 3452 } else { 3453 sprintf(buffer, "\\U%08x", (int)ch); 3454 } 3455 return buffer; 3456} 3457 3458void TransliteratorTest::TestSurrogateCasing (void) { 3459 // check that casing handles surrogates 3460 // titlecase is currently defective 3461 char buffer[20]; 3462 UChar buffer2[20]; 3463 UChar32 dee; 3464 U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee); 3465 UnicodeString DEE(u_totitle(dee)); 3466 if (DEE != DESERET_DEE) { 3467 err("Fails titlecase of surrogates"); 3468 err(Char32ToEscapedChars(dee, buffer)); 3469 err(", "); 3470 errln(Char32ToEscapedChars(DEE.char32At(0), buffer)); 3471 } 3472 3473 UnicodeString deeDEETest=DESERET_dee + DESERET_DEE; 3474 UnicodeString deedeeTest = DESERET_dee + DESERET_dee; 3475 UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE; 3476 UErrorCode status= U_ZERO_ERROR; 3477 3478 u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status); 3479 if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) { 3480 errln("Fails: Can't uppercase surrogates."); 3481 } 3482 3483 status= U_ZERO_ERROR; 3484 u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status); 3485 if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) { 3486 errln("Fails: Can't lowercase surrogates."); 3487 } 3488} 3489 3490static void _trans(Transliterator& t, const UnicodeString& src, 3491 UnicodeString& result) { 3492 result = src; 3493 t.transliterate(result); 3494} 3495 3496static void _trans(const UnicodeString& id, const UnicodeString& src, 3497 UnicodeString& result, UErrorCode ec) { 3498 UParseError pe; 3499 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec); 3500 if (U_SUCCESS(ec)) { 3501 _trans(*t, src, result); 3502 } 3503 delete t; 3504} 3505 3506static UnicodeString _findMatch(const UnicodeString& source, 3507 const UnicodeString* pairs) { 3508 UnicodeString empty; 3509 for (int32_t i=0; pairs[i].length() > 0; i+=2) { 3510 if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) { 3511 return pairs[i+1]; 3512 } 3513 } 3514 return empty; 3515} 3516 3517// Check to see that incremental gets at least part way through a reasonable string. 3518 3519void TransliteratorTest::TestIncrementalProgress(void) { 3520 UErrorCode ec = U_ZERO_ERROR; 3521 UnicodeString latinTest = "The Quick Brown Fox."; 3522 UnicodeString devaTest; 3523 _trans("Latin-Devanagari", latinTest, devaTest, ec); 3524 UnicodeString kataTest; 3525 _trans("Latin-Katakana", latinTest, kataTest, ec); 3526 if (U_FAILURE(ec)) { 3527 errln("FAIL: Internal error"); 3528 return; 3529 } 3530 const UnicodeString tests[] = { 3531 "Any", latinTest, 3532 "Latin", latinTest, 3533 "Halfwidth", latinTest, 3534 "Devanagari", devaTest, 3535 "Katakana", kataTest, 3536 "" // END MARKER 3537 }; 3538 3539 UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog."); 3540 int32_t i = 0, j=0, k=0; 3541 int32_t sources = Transliterator::countAvailableSources(); 3542 for (i = 0; i < sources; i++) { 3543 UnicodeString source; 3544 Transliterator::getAvailableSource(i, source); 3545 UnicodeString test = _findMatch(source, tests); 3546 if (test.length() == 0) { 3547 logln((UnicodeString)"Skipping " + source + "-X"); 3548 continue; 3549 } 3550 int32_t targets = Transliterator::countAvailableTargets(source); 3551 for (j = 0; j < targets; j++) { 3552 UnicodeString target; 3553 Transliterator::getAvailableTarget(j, source, target); 3554 int32_t variants = Transliterator::countAvailableVariants(source, target); 3555 for (k =0; k< variants; k++) { 3556 UnicodeString variant; 3557 UParseError err; 3558 UErrorCode status = U_ZERO_ERROR; 3559 3560 Transliterator::getAvailableVariant(k, source, target, variant); 3561 UnicodeString id = source + "-" + target + "/" + variant; 3562 3563 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status); 3564 if (U_FAILURE(status)) { 3565 dataerrln((UnicodeString)"FAIL: Could not create " + id); 3566 delete t; 3567 continue; 3568 } 3569 status = U_ZERO_ERROR; 3570 CheckIncrementalAux(t, test); 3571 3572 UnicodeString rev; 3573 _trans(*t, test, rev); 3574 Transliterator *inv = t->createInverse(status); 3575 if (U_FAILURE(status)) { 3576#if UCONFIG_NO_BREAK_ITERATION 3577 // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail. 3578 if (id.compare((UnicodeString)"Latin-Thai/") != 0) 3579#endif 3580 errln((UnicodeString)"FAIL: Could not create inverse of " + id); 3581 3582 delete t; 3583 delete inv; 3584 continue; 3585 } 3586 CheckIncrementalAux(inv, rev); 3587 delete t; 3588 delete inv; 3589 } 3590 } 3591 } 3592} 3593 3594void TransliteratorTest::CheckIncrementalAux(const Transliterator* t, 3595 const UnicodeString& input) { 3596 UErrorCode ec = U_ZERO_ERROR; 3597 UTransPosition pos; 3598 UnicodeString test = input; 3599 3600 pos.contextStart = 0; 3601 pos.contextLimit = input.length(); 3602 pos.start = 0; 3603 pos.limit = input.length(); 3604 3605 t->transliterate(test, pos, ec); 3606 if (U_FAILURE(ec)) { 3607 errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec)); 3608 return; 3609 } 3610 UBool gotError = FALSE; 3611 (void)gotError; // Suppress set but not used warning. 3612 3613 // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X? 3614 3615 if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") { 3616 errln((UnicodeString)"No Progress, " + 3617 t->getID() + ": " + formatInput(test, input, pos)); 3618 gotError = TRUE; 3619 } else { 3620 logln((UnicodeString)"PASS Progress, " + 3621 t->getID() + ": " + formatInput(test, input, pos)); 3622 } 3623 t->finishTransliteration(test, pos); 3624 if (pos.start != pos.limit) { 3625 errln((UnicodeString)"Incomplete, " + 3626 t->getID() + ": " + formatInput(test, input, pos)); 3627 gotError = TRUE; 3628 } 3629} 3630 3631void TransliteratorTest::TestFunction() { 3632 // Careful with spacing and ';' here: Phrase this exactly 3633 // as toRules() is going to return it. If toRules() changes 3634 // with regard to spacing or ';', then adjust this string. 3635 UnicodeString rule = 3636 "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';"; 3637 3638 UParseError pe; 3639 UErrorCode ec = U_ZERO_ERROR; 3640 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec); 3641 if (t == NULL) { 3642 dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec)); 3643 return; 3644 } 3645 3646 UnicodeString r; 3647 t->toRules(r, TRUE); 3648 if (r == rule) { 3649 logln((UnicodeString)"OK: toRules() => " + r); 3650 } else { 3651 errln((UnicodeString)"FAIL: toRules() => " + r + 3652 ", expected " + rule); 3653 } 3654 3655 expect(*t, "The Quick Brown Fox", 3656 UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox")); 3657 3658 delete t; 3659} 3660 3661void TransliteratorTest::TestInvalidBackRef(void) { 3662 UnicodeString rule = ". > $1;"; 3663 UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;"); 3664 UParseError pe; 3665 UErrorCode ec = U_ZERO_ERROR; 3666 Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec); 3667 Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec); 3668 3669 if (t != NULL) { 3670 errln("FAIL: createFromRules should have returned NULL"); 3671 delete t; 3672 } 3673 3674 if (t2 != NULL) { 3675 errln("FAIL: createFromRules should have returned NULL"); 3676 delete t2; 3677 } 3678 3679 if (U_SUCCESS(ec)) { 3680 errln("FAIL: Ok: . > $1; => no error"); 3681 } else { 3682 logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec)); 3683 } 3684} 3685 3686void TransliteratorTest::TestMulticharStringSet() { 3687 // Basic testing 3688 const char* rule = 3689 " [{aa}] > x;" 3690 " a > y;" 3691 " [b{bc}] > z;" 3692 "[{gd}] { e > q;" 3693 " e } [{fg}] > r;" ; 3694 3695 UParseError pe; 3696 UErrorCode ec = U_ZERO_ERROR; 3697 Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec); 3698 if (t == NULL || U_FAILURE(ec)) { 3699 delete t; 3700 errln("FAIL: createFromRules failed"); 3701 return; 3702 } 3703 3704 expect(*t, "a aa ab bc d gd de gde gdefg ddefg", 3705 "y x yz z d gd de gdq gdqfg ddrfg"); 3706 delete t; 3707 3708 // Overlapped string test. Make sure that when multiple 3709 // strings can match that the longest one is matched. 3710 rule = 3711 " [a {ab} {abc}] > x;" 3712 " b > y;" 3713 " c > z;" 3714 " q [t {st} {rst}] { e > p;" ; 3715 3716 t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec); 3717 if (t == NULL || U_FAILURE(ec)) { 3718 delete t; 3719 errln("FAIL: createFromRules failed"); 3720 return; 3721 } 3722 3723 expect(*t, "a ab abc qte qste qrste", 3724 "x x x qtp qstp qrstp"); 3725 delete t; 3726} 3727 3728// vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 3729// BEGIN TestUserFunction support factory 3730 3731Transliterator* _TUFF[4]; 3732UnicodeString* _TUFID[4]; 3733 3734static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/, 3735 Transliterator::Token context) { 3736 return _TUFF[context.integer]->clone(); 3737} 3738 3739static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) { 3740 _TUFF[n] = t; 3741 _TUFID[n] = new UnicodeString(ID); 3742 Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n)); 3743} 3744 3745static void _TUFUnreg(int32_t n) { 3746 if (_TUFF[n] != NULL) { 3747 Transliterator::unregister(*_TUFID[n]); 3748 delete _TUFF[n]; 3749 delete _TUFID[n]; 3750 } 3751} 3752 3753// END TestUserFunction support factory 3754// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 3755 3756/** 3757 * Test that user-registered transliterators can be used under function 3758 * syntax. 3759 */ 3760void TransliteratorTest::TestUserFunction() { 3761 3762 Transliterator* t; 3763 UParseError pe; 3764 UErrorCode ec = U_ZERO_ERROR; 3765 3766 // Setup our factory 3767 int32_t i; 3768 for (i=0; i<4; ++i) { 3769 _TUFF[i] = NULL; 3770 } 3771 3772 // There's no need to register inverses if we don't use them 3773 t = Transliterator::createFromRules("gif", 3774 UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"), 3775 UTRANS_FORWARD, pe, ec); 3776 if (t == NULL || U_FAILURE(ec)) { 3777 dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec)); 3778 return; 3779 } 3780 _TUFReg("Any-gif", t, 0); 3781 3782 t = Transliterator::createFromRules("RemoveCurly", 3783 UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"), 3784 UTRANS_FORWARD, pe, ec); 3785 if (t == NULL || U_FAILURE(ec)) { 3786 errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec)); 3787 goto FAIL; 3788 } 3789 expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name"); 3790 _TUFReg("Any-RemoveCurly", t, 1); 3791 3792 logln("Trying &hex"); 3793 t = Transliterator::createFromRules("hex2", 3794 "(.) > &hex($1);", 3795 UTRANS_FORWARD, pe, ec); 3796 if (t == NULL || U_FAILURE(ec)) { 3797 errln("FAIL: createFromRules"); 3798 goto FAIL; 3799 } 3800 logln("Registering"); 3801 _TUFReg("Any-hex2", t, 2); 3802 t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec); 3803 if (t == NULL || U_FAILURE(ec)) { 3804 errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec)); 3805 goto FAIL; 3806 } 3807 expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063")); 3808 delete t; 3809 3810 logln("Trying &gif"); 3811 t = Transliterator::createFromRules("gif2", 3812 "(.) > &Gif(&Hex2($1));", 3813 UTRANS_FORWARD, pe, ec); 3814 if (t == NULL || U_FAILURE(ec)) { 3815 errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec)); 3816 goto FAIL; 3817 } 3818 logln("Registering"); 3819 _TUFReg("Any-gif2", t, 3); 3820 t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec); 3821 if (t == NULL || U_FAILURE(ec)) { 3822 errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec)); 3823 goto FAIL; 3824 } 3825 expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" 3826 "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">"); 3827 delete t; 3828 3829 // Test that filters are allowed after & 3830 t = Transliterator::createFromRules("test", 3831 "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';", 3832 UTRANS_FORWARD, pe, ec); 3833 if (t == NULL || U_FAILURE(ec)) { 3834 errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec)); 3835 goto FAIL; 3836 } 3837 expect(*t, "abc", 3838 UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ")); 3839 delete t; 3840 3841 FAIL: 3842 for (i=0; i<4; ++i) { 3843 _TUFUnreg(i); 3844 } 3845} 3846 3847/** 3848 * Test the Any-X transliterators. 3849 */ 3850void TransliteratorTest::TestAnyX(void) { 3851 UParseError parseError; 3852 UErrorCode status = U_ZERO_ERROR; 3853 Transliterator* anyLatin = 3854 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status); 3855 if (anyLatin==0) { 3856 dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status)); 3857 delete anyLatin; 3858 return; 3859 } 3860 3861 expect(*anyLatin, 3862 CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"), 3863 CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc")); 3864 3865 delete anyLatin; 3866} 3867 3868/** 3869 * Test Any-X transliterators with sample letters from all scripts. 3870 */ 3871void TransliteratorTest::TestAny(void) { 3872 UErrorCode status = U_ZERO_ERROR; 3873 // Note: there is a lot of implict construction of UnicodeStrings from (char *) in 3874 // function call parameters going on in this test. 3875 UnicodeSet alphabetic("[:alphabetic:]", status); 3876 if (U_FAILURE(status)) { 3877 dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status)); 3878 return; 3879 } 3880 alphabetic.freeze(); 3881 3882 UnicodeString testString; 3883 for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) { 3884 const char *scriptName = uscript_getShortName((UScriptCode)i); 3885 if (scriptName == NULL) { 3886 errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i); 3887 return; 3888 } 3889 3890 UnicodeSet sample; 3891 sample.applyPropertyAlias("script", scriptName, status); 3892 if (U_FAILURE(status)) { 3893 errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status)); 3894 return; 3895 } 3896 sample.retainAll(alphabetic); 3897 for (int32_t count=0; count<5; count++) { 3898 UChar32 c = sample.charAt(count); 3899 if (c == -1) { 3900 break; 3901 } 3902 testString.append(c); 3903 } 3904 } 3905 3906 UParseError parseError; 3907 Transliterator* anyLatin = 3908 Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status); 3909 if (U_FAILURE(status)) { 3910 dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status)); 3911 return; 3912 } 3913 3914 logln(UnicodeString("Sample set for Any-Latin: ") + testString); 3915 anyLatin->transliterate(testString); 3916 logln(UnicodeString("Sample result for Any-Latin: ") + testString); 3917 delete anyLatin; 3918} 3919 3920 3921/** 3922 * Test the source and target set API. These are only implemented 3923 * for RBT and CompoundTransliterator at this time. 3924 */ 3925void TransliteratorTest::TestSourceTargetSet() { 3926 UErrorCode ec = U_ZERO_ERROR; 3927 3928 // Rules 3929 const char* r = 3930 "a > b; " 3931 "r [x{lu}] > q;"; 3932 3933 // Expected source 3934 UnicodeSet expSrc("[arx{lu}]", ec); 3935 3936 // Expected target 3937 UnicodeSet expTrg("[bq]", ec); 3938 3939 UParseError pe; 3940 Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec); 3941 3942 if (U_FAILURE(ec)) { 3943 delete t; 3944 errln("FAIL: Couldn't set up test"); 3945 return; 3946 } 3947 3948 UnicodeSet src; t->getSourceSet(src); 3949 UnicodeSet trg; t->getTargetSet(trg); 3950 3951 if (src == expSrc && trg == expTrg) { 3952 UnicodeString a, b; 3953 logln((UnicodeString)"Ok: " + 3954 r + " => source = " + src.toPattern(a, TRUE) + 3955 ", target = " + trg.toPattern(b, TRUE)); 3956 } else { 3957 UnicodeString a, b, c, d; 3958 errln((UnicodeString)"FAIL: " + 3959 r + " => source = " + src.toPattern(a, TRUE) + 3960 ", expected " + expSrc.toPattern(b, TRUE) + 3961 "; target = " + trg.toPattern(c, TRUE) + 3962 ", expected " + expTrg.toPattern(d, TRUE)); 3963 } 3964 3965 delete t; 3966} 3967 3968/** 3969 * Test handling of Pattern_White_Space, for both RBT and UnicodeSet. 3970 */ 3971void TransliteratorTest::TestPatternWhiteSpace() { 3972 // Rules 3973 const char* r = "a > \\u200E b;"; 3974 3975 UErrorCode ec = U_ZERO_ERROR; 3976 UParseError pe; 3977 Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec); 3978 3979 if (U_FAILURE(ec)) { 3980 errln("FAIL: Couldn't set up test"); 3981 } else { 3982 expect(*t, "a", "b"); 3983 } 3984 delete t; 3985 3986 // UnicodeSet 3987 ec = U_ZERO_ERROR; 3988 UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec); 3989 3990 if (U_FAILURE(ec)) { 3991 errln("FAIL: Couldn't set up test"); 3992 } else { 3993 if (set.contains(0x200E)) { 3994 errln("FAIL: U+200E not being ignored by UnicodeSet"); 3995 } 3996 } 3997} 3998//====================================================================== 3999// this method is in TestUScript.java 4000//====================================================================== 4001void TransliteratorTest::TestAllCodepoints(){ 4002 UScriptCode code= USCRIPT_INVALID_CODE; 4003 char id[256]={'\0'}; 4004 char abbr[256]={'\0'}; 4005 char newId[256]={'\0'}; 4006 char newAbbrId[256]={'\0'}; 4007 char oldId[256]={'\0'}; 4008 char oldAbbrId[256]={'\0'}; 4009 4010 UErrorCode status =U_ZERO_ERROR; 4011 UParseError pe; 4012 4013 for(uint32_t i = 0; i<=0x10ffff; i++){ 4014 code = uscript_getScript(i,&status); 4015 if(code == USCRIPT_INVALID_CODE){ 4016 dataerrln("uscript_getScript for codepoint \\U%08X failed.", i); 4017 } 4018 const char* myId = uscript_getName(code); 4019 if(!myId) { 4020 dataerrln("Valid script code returned NULL name. Check your data!"); 4021 return; 4022 } 4023 uprv_strcpy(id,myId); 4024 uprv_strcpy(abbr,uscript_getShortName(code)); 4025 4026 uprv_strcpy(newId,"[:"); 4027 uprv_strcat(newId,id); 4028 uprv_strcat(newId,":];NFD"); 4029 4030 uprv_strcpy(newAbbrId,"[:"); 4031 uprv_strcat(newAbbrId,abbr); 4032 uprv_strcat(newAbbrId,":];NFD"); 4033 4034 if(uprv_strcmp(newId,oldId)!=0){ 4035 Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status); 4036 if(t==NULL || U_FAILURE(status)){ 4037 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status)); 4038 } 4039 delete t; 4040 } 4041 if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){ 4042 Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status); 4043 if(t==NULL || U_FAILURE(status)){ 4044 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status)); 4045 } 4046 delete t; 4047 } 4048 uprv_strcpy(oldId,newId); 4049 uprv_strcpy(oldAbbrId, newAbbrId); 4050 4051 } 4052 4053} 4054 4055#define TEST_TRANSLIT_ID(id, cls) { \ 4056 UErrorCode ec = U_ZERO_ERROR; \ 4057 Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \ 4058 if (U_FAILURE(ec)) { \ 4059 dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \ 4060 } else { \ 4061 if (t->getDynamicClassID() != cls::getStaticClassID()) { \ 4062 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \ 4063 } \ 4064 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \ 4065 } \ 4066 delete t; \ 4067} 4068 4069#define TEST_TRANSLIT_RULE(rule, cls) { \ 4070 UErrorCode ec = U_ZERO_ERROR; \ 4071 UParseError pe; \ 4072 Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \ 4073 if (U_FAILURE(ec)) { \ 4074 errln("FAIL: Couldn't create " rule); \ 4075 } else { \ 4076 if (t->getDynamicClassID() != cls ::getStaticClassID()) { \ 4077 errln("FAIL: " #cls " dynamic and static class ID mismatch"); \ 4078 } \ 4079 /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \ 4080 } \ 4081 delete t; \ 4082} 4083 4084void TransliteratorTest::TestBoilerplate() { 4085 TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator); 4086 TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator); 4087 TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator); 4088 TEST_TRANSLIT_ID("Lower", LowercaseTransliterator); 4089 TEST_TRANSLIT_ID("Upper", UppercaseTransliterator); 4090 TEST_TRANSLIT_ID("Title", TitlecaseTransliterator); 4091 TEST_TRANSLIT_ID("Null", NullTransliterator); 4092 TEST_TRANSLIT_ID("Remove", RemoveTransliterator); 4093 TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator); 4094 TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator); 4095 TEST_TRANSLIT_ID("NFD", NormalizationTransliterator); 4096 TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator); 4097 TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator); 4098} 4099 4100void TransliteratorTest::TestAlternateSyntax() { 4101 // U+2206 == & 4102 // U+2190 == < 4103 // U+2192 == > 4104 // U+2194 == <> 4105 expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"), 4106 "abc", 4107 "xbz"); 4108 expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"), 4109 CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"), 4110 UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}")); 4111} 4112 4113static const char* BEGIN_END_RULES[] = { 4114 // [0] 4115 "abc > xy;" 4116 "aba > z;", 4117 4118 // [1] 4119/* 4120 "::BEGIN;" 4121 "abc > xy;" 4122 "::END;" 4123 "::BEGIN;" 4124 "aba > z;" 4125 "::END;", 4126*/ 4127 "", // test case commented out below, this is here to keep from messing up the indexes 4128 4129 // [2] 4130/* 4131 "abc > xy;" 4132 "::BEGIN;" 4133 "aba > z;" 4134 "::END;", 4135*/ 4136 "", // test case commented out below, this is here to keep from messing up the indexes 4137 4138 // [3] 4139/* 4140 "::BEGIN;" 4141 "abc > xy;" 4142 "::END;" 4143 "aba > z;", 4144*/ 4145 "", // test case commented out below, this is here to keep from messing up the indexes 4146 4147 // [4] 4148 "abc > xy;" 4149 "::Null;" 4150 "aba > z;", 4151 4152 // [5] 4153 "::Upper;" 4154 "ABC > xy;" 4155 "AB > x;" 4156 "C > z;" 4157 "::Upper;" 4158 "XYZ > p;" 4159 "XY > q;" 4160 "Z > r;" 4161 "::Upper;", 4162 4163 // [6] 4164 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 4165 "$delim = [\\-$ws];" 4166 "$ws $delim* > ' ';" 4167 "'-' $delim* > '-';", 4168 4169 // [7] 4170 "::Null;" 4171 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 4172 "$delim = [\\-$ws];" 4173 "$ws $delim* > ' ';" 4174 "'-' $delim* > '-';", 4175 4176 // [8] 4177 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 4178 "$delim = [\\-$ws];" 4179 "$ws $delim* > ' ';" 4180 "'-' $delim* > '-';" 4181 "::Null;", 4182 4183 // [9] 4184 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 4185 "$delim = [\\-$ws];" 4186 "::Null;" 4187 "$ws $delim* > ' ';" 4188 "'-' $delim* > '-';", 4189 4190 // [10] 4191/* 4192 "::BEGIN;" 4193 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 4194 "$delim = [\\-$ws];" 4195 "::END;" 4196 "$ws $delim* > ' ';" 4197 "'-' $delim* > '-';", 4198*/ 4199 "", // test case commented out below, this is here to keep from messing up the indexes 4200 4201 // [11] 4202/* 4203 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 4204 "$delim = [\\-$ws];" 4205 "::BEGIN;" 4206 "$ws $delim* > ' ';" 4207 "'-' $delim* > '-';" 4208 "::END;", 4209*/ 4210 "", // test case commented out below, this is here to keep from messing up the indexes 4211 4212 // [12] 4213/* 4214 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 4215 "$delim = [\\-$ws];" 4216 "$ab = [ab];" 4217 "::BEGIN;" 4218 "$ws $delim* > ' ';" 4219 "'-' $delim* > '-';" 4220 "::END;" 4221 "::BEGIN;" 4222 "$ab { ' ' } $ab > '-';" 4223 "c { ' ' > ;" 4224 "::END;" 4225 "::BEGIN;" 4226 "'a-a' > a\\%|a;" 4227 "::END;", 4228*/ 4229 "", // test case commented out below, this is here to keep from messing up the indexes 4230 4231 // [13] 4232 "$ws = [[:Separator:][\\u0009-\\u000C]$];" 4233 "$delim = [\\-$ws];" 4234 "$ab = [ab];" 4235 "::Null;" 4236 "$ws $delim* > ' ';" 4237 "'-' $delim* > '-';" 4238 "::Null;" 4239 "$ab { ' ' } $ab > '-';" 4240 "c { ' ' > ;" 4241 "::Null;" 4242 "'a-a' > a\\%|a;", 4243 4244 // [14] 4245/* 4246 "::[abc];" 4247 "::BEGIN;" 4248 "abc > xy;" 4249 "::END;" 4250 "::BEGIN;" 4251 "aba > yz;" 4252 "::END;" 4253 "::Upper;", 4254*/ 4255 "", // test case commented out below, this is here to keep from messing up the indexes 4256 4257 // [15] 4258 "::[abc];" 4259 "abc > xy;" 4260 "::Null;" 4261 "aba > yz;" 4262 "::Upper;", 4263 4264 // [16] 4265/* 4266 "::[abc];" 4267 "::BEGIN;" 4268 "abc <> xy;" 4269 "::END;" 4270 "::BEGIN;" 4271 "aba <> yz;" 4272 "::END;" 4273 "::Upper(Lower);" 4274 "::([XYZ]);" 4275*/ 4276 "", // test case commented out below, this is here to keep from messing up the indexes 4277 4278 // [17] 4279 "::[abc];" 4280 "abc <> xy;" 4281 "::Null;" 4282 "aba <> yz;" 4283 "::Upper(Lower);" 4284 "::([XYZ]);" 4285}; 4286static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0])); 4287 4288/* 4289(This entire test is commented out below and will need some heavy revision when we re-add 4290the ::BEGIN/::END stuff) 4291static const char* BOGUS_BEGIN_END_RULES[] = { 4292 // [7] 4293 "::BEGIN;" 4294 "abc > xy;" 4295 "::BEGIN;" 4296 "aba > z;" 4297 "::END;" 4298 "::END;", 4299 4300 // [8] 4301 "abc > xy;" 4302 " aba > z;" 4303 "::END;", 4304 4305 // [9] 4306 "::BEGIN;" 4307 "::Upper;" 4308 "::END;" 4309}; 4310static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0])); 4311*/ 4312 4313static const char* BEGIN_END_TEST_CASES[] = { 4314 // rules input expected output 4315 BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z", 4316// BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z", 4317// BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z", 4318// BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z", 4319 BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z", 4320 BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR", 4321 4322 BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e", 4323 BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e", 4324 BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e", 4325 BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e", 4326// BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e", 4327// BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e", 4328// BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e", 4329// BEGIN_END_RULES[12], "a a a a", "a%a%a%a", 4330// BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a", 4331 BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e", 4332 BEGIN_END_RULES[13], "a a a a", "a%a%a%a", 4333 BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a", 4334 4335// BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 4336 BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 4337// BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 4338 BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ" 4339}; 4340static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0])); 4341 4342void TransliteratorTest::TestBeginEnd() { 4343 // run through the list of test cases above 4344 int32_t i = 0; 4345 for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) { 4346 expect((UnicodeString)"Test case #" + (i / 3), 4347 UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV), 4348 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV), 4349 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV)); 4350 } 4351 4352 // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing 4353 UParseError parseError; 4354 UErrorCode status = U_ZERO_ERROR; 4355 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]), 4356 UTRANS_REVERSE, parseError, status); 4357 if (reversed == 0 || U_FAILURE(status)) { 4358 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status); 4359 } else { 4360 expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba")); 4361 } 4362 delete reversed; 4363 4364 // finally, run through the list of syntactically-ill-formed rule sets above and make sure 4365 // that all of them cause errors 4366/* 4367(commented out until we have the real ::BEGIN/::END stuff in place 4368 for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) { 4369 UParseError parseError; 4370 UErrorCode status = U_ZERO_ERROR; 4371 Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]), 4372 UTRANS_FORWARD, parseError, status); 4373 if (!U_FAILURE(status)) { 4374 delete t; 4375 errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]); 4376 } 4377 } 4378*/ 4379} 4380 4381void TransliteratorTest::TestBeginEndToRules() { 4382 // run through the same list of test cases we used above, but this time, instead of just 4383 // instantiating a Transliterator from the rules and running the test against it, we instantiate 4384 // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from 4385 // the resulting set of rules, and make sure that the generated rule set is semantically equivalent 4386 // to (i.e., does the same thing as) the original rule set 4387 for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) { 4388 UParseError parseError; 4389 UErrorCode status = U_ZERO_ERROR; 4390 Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV), 4391 UTRANS_FORWARD, parseError, status); 4392 if (U_FAILURE(status)) { 4393 reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status); 4394 } else { 4395 UnicodeString rules; 4396 t->toRules(rules, TRUE); 4397 Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules, 4398 UTRANS_FORWARD, parseError, status); 4399 if (U_FAILURE(status)) { 4400 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"), 4401 parseError, status); 4402 delete t; 4403 } else { 4404 expect(*t2, 4405 UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV), 4406 UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV)); 4407 delete t; 4408 delete t2; 4409 } 4410 } 4411 } 4412 4413 // do the same thing for the reversible test case 4414 UParseError parseError; 4415 UErrorCode status = U_ZERO_ERROR; 4416 Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]), 4417 UTRANS_REVERSE, parseError, status); 4418 if (U_FAILURE(status)) { 4419 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status); 4420 } else { 4421 UnicodeString rules; 4422 reversed->toRules(rules, FALSE); 4423 Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD, 4424 parseError, status); 4425 if (U_FAILURE(status)) { 4426 reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"), 4427 parseError, status); 4428 delete reversed; 4429 } else { 4430 expect(*reversed2, 4431 UnicodeString("xy XY XYZ yz YZ"), 4432 UnicodeString("xy abc xaba yz aba")); 4433 delete reversed; 4434 delete reversed2; 4435 } 4436 } 4437} 4438 4439void TransliteratorTest::TestRegisterAlias() { 4440 UnicodeString longID("Lower;[aeiou]Upper"); 4441 UnicodeString shortID("Any-CapVowels"); 4442 UnicodeString reallyShortID("CapVowels"); 4443 4444 Transliterator::registerAlias(shortID, longID); 4445 4446 UErrorCode err = U_ZERO_ERROR; 4447 Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err); 4448 if (U_FAILURE(err)) { 4449 errln("Failed to instantiate transliterator with long ID"); 4450 Transliterator::unregister(shortID); 4451 return; 4452 } 4453 Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err); 4454 if (U_FAILURE(err)) { 4455 errln("Failed to instantiate transliterator with short ID"); 4456 delete t1; 4457 Transliterator::unregister(shortID); 4458 return; 4459 } 4460 4461 if (t1->getID() != longID) 4462 errln("Transliterator instantiated with long ID doesn't have long ID"); 4463 if (t2->getID() != reallyShortID) 4464 errln("Transliterator instantiated with short ID doesn't have short ID"); 4465 4466 UnicodeString rules1; 4467 UnicodeString rules2; 4468 4469 t1->toRules(rules1, TRUE); 4470 t2->toRules(rules2, TRUE); 4471 if (rules1 != rules2) 4472 errln("Alias transliterators aren't the same"); 4473 4474 delete t1; 4475 delete t2; 4476 Transliterator::unregister(shortID); 4477 4478 t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err); 4479 if (U_SUCCESS(err)) { 4480 errln("Instantiation with short ID succeeded after short ID was unregistered"); 4481 delete t1; 4482 } 4483 4484 // try the same thing again, but this time with something other than 4485 // an instance of CompoundTransliterator 4486 UnicodeString realID("Latin-Greek"); 4487 UnicodeString fakeID("Latin-dlgkjdflkjdl"); 4488 Transliterator::registerAlias(fakeID, realID); 4489 4490 err = U_ZERO_ERROR; 4491 t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err); 4492 if (U_FAILURE(err)) { 4493 dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err)); 4494 Transliterator::unregister(realID); 4495 return; 4496 } 4497 t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err); 4498 if (U_FAILURE(err)) { 4499 errln("Failed to instantiate transliterator with fake ID"); 4500 delete t1; 4501 Transliterator::unregister(realID); 4502 return; 4503 } 4504 4505 t1->toRules(rules1, TRUE); 4506 t2->toRules(rules2, TRUE); 4507 if (rules1 != rules2) 4508 errln("Alias transliterators aren't the same"); 4509 4510 delete t1; 4511 delete t2; 4512 Transliterator::unregister(fakeID); 4513} 4514 4515void TransliteratorTest::TestRuleStripping() { 4516 /* 4517# 4518\uE001>\u0C01; # SIGN 4519 */ 4520 static const UChar rule[] = { 4521 0x0023,0x0020,0x000D,0x000A, 4522 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0 4523 }; 4524 static const UChar expectedRule[] = { 4525 0xE001,0x003E,0x0C01,0x003B,0 4526 }; 4527 UChar result[sizeof(rule)/sizeof(rule[0])]; 4528 UErrorCode status = U_ZERO_ERROR; 4529 int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status); 4530 if (len != u_strlen(expectedRule)) { 4531 errln("utrans_stripRules return len = %d", len); 4532 } 4533 if (u_strncmp(expectedRule, result, len) != 0) { 4534 errln("utrans_stripRules did not return expected string"); 4535 } 4536} 4537 4538/** 4539 * Test the Halfwidth-Fullwidth transliterator (ticket 6281). 4540 */ 4541void TransliteratorTest::TestHalfwidthFullwidth(void) { 4542 UParseError parseError; 4543 UErrorCode status = U_ZERO_ERROR; 4544 Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status); 4545 Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status); 4546 if (hf == 0 || fh == 0) { 4547 dataerrln("FAIL: createInstance failed - %s", u_errorName(status)); 4548 delete hf; 4549 delete fh; 4550 return; 4551 } 4552 4553 // Array of 2n items 4554 // Each item is 4555 // "hf"|"fh"|"both", 4556 // <Halfwidth>, 4557 // <Fullwidth> 4558 const char* DATA[] = { 4559 "both", 4560 "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020", 4561 "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000", 4562 }; 4563 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); 4564 4565 for (int32_t i=0; i<DATA_length; i+=3) { 4566 UnicodeString h = CharsToUnicodeString(DATA[i+1]); 4567 UnicodeString f = CharsToUnicodeString(DATA[i+2]); 4568 switch (*DATA[i]) { 4569 case 0x68: //'h': // Halfwidth-Fullwidth only 4570 expect(*hf, h, f); 4571 break; 4572 case 0x66: //'f': // Fullwidth-Halfwidth only 4573 expect(*fh, f, h); 4574 break; 4575 case 0x62: //'b': // both directions 4576 expect(*hf, h, f); 4577 expect(*fh, f, h); 4578 break; 4579 } 4580 } 4581 delete hf; 4582 delete fh; 4583} 4584 4585 4586 /** 4587 * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site. 4588 * TODO: confirm that the expected results are correct. 4589 * For now, test just confirms that C++ and Java give identical results. 4590 */ 4591void TransliteratorTest::TestThai(void) { 4592#if !UCONFIG_NO_BREAK_ITERATION 4593 UParseError parseError; 4594 UErrorCode status = U_ZERO_ERROR; 4595 Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status); 4596 if (tr == 0) { 4597 dataerrln("FAIL: createInstance failed - %s", u_errorName(status)); 4598 return; 4599 } 4600 if (U_FAILURE(status)) { 4601 errln("FAIL: createInstance failed with %s", u_errorName(status)); 4602 return; 4603 } 4604 const char *thaiText = 4605 "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d" 4606 "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22" 4607 "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d" 4608 "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d" 4609 "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29" 4610 "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42" 4611 "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25" 4612 "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15" 4613 "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08" 4614 "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49" 4615 "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23" 4616 "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23" 4617 "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48" 4618 "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48" 4619 "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30" 4620 "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d" 4621 "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01" 4622 "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e" 4623 "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49" 4624 "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04" 4625 "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19" 4626 "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43" 4627 "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20" 4628 "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35" 4629 " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b" 4630 "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04" 4631 "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19" 4632 " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40" 4633 "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22" 4634 "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b."; 4635 4636 const char *latinText = 4637 "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300" 4638 "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr" 4639 "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304" 4640 "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304" 4641 "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301" 4642 " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b " 4643 "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101" 4644 "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m" 4645 "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p" 4646 "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304" 4647 "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101" 4648 "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131" 4649 " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9" 4650 "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306" 4651 " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1" 4652 "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6" 4653 "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131" 4654 "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb."; 4655 4656 4657 UnicodeString xlitText(thaiText); 4658 xlitText = xlitText.unescape(); 4659 tr->transliterate(xlitText); 4660 4661 UnicodeString expectedText(latinText); 4662 expectedText = expectedText.unescape(); 4663 expect(*tr, xlitText, expectedText); 4664 4665 delete tr; 4666#endif 4667} 4668 4669 4670//====================================================================== 4671// Support methods 4672//====================================================================== 4673void TransliteratorTest::expectT(const UnicodeString& id, 4674 const UnicodeString& source, 4675 const UnicodeString& expectedResult) { 4676 UErrorCode ec = U_ZERO_ERROR; 4677 UParseError pe; 4678 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec); 4679 if (U_FAILURE(ec)) { 4680 errln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(ec)); 4681 delete t; 4682 return; 4683 } 4684 expect(*t, source, expectedResult); 4685 delete t; 4686} 4687 4688void TransliteratorTest::reportParseError(const UnicodeString& message, 4689 const UParseError& parseError, 4690 const UErrorCode& status) { 4691 dataerrln(message + 4692 /*", parse error " + parseError.code +*/ 4693 ", line " + parseError.line + 4694 ", offset " + parseError.offset + 4695 ", pre-context " + prettify(parseError.preContext, TRUE) + 4696 ", post-context " + prettify(parseError.postContext,TRUE) + 4697 ", Error: " + u_errorName(status)); 4698} 4699 4700void TransliteratorTest::expect(const UnicodeString& rules, 4701 const UnicodeString& source, 4702 const UnicodeString& expectedResult, 4703 UTransPosition *pos) { 4704 expect("<ID>", rules, source, expectedResult, pos); 4705} 4706 4707void TransliteratorTest::expect(const UnicodeString& id, 4708 const UnicodeString& rules, 4709 const UnicodeString& source, 4710 const UnicodeString& expectedResult, 4711 UTransPosition *pos) { 4712 UErrorCode status = U_ZERO_ERROR; 4713 UParseError parseError; 4714 Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status); 4715 if (U_FAILURE(status)) { 4716 reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status); 4717 } else { 4718 expect(*t, source, expectedResult, pos); 4719 } 4720 delete t; 4721} 4722 4723void TransliteratorTest::expect(const Transliterator& t, 4724 const UnicodeString& source, 4725 const UnicodeString& expectedResult, 4726 const Transliterator& reverseTransliterator) { 4727 expect(t, source, expectedResult); 4728 expect(reverseTransliterator, expectedResult, source); 4729} 4730 4731void TransliteratorTest::expect(const Transliterator& t, 4732 const UnicodeString& source, 4733 const UnicodeString& expectedResult, 4734 UTransPosition *pos) { 4735 if (pos == 0) { 4736 UnicodeString result(source); 4737 t.transliterate(result); 4738 expectAux(t.getID() + ":String", source, result, expectedResult); 4739 } 4740 UTransPosition index={0, 0, 0, 0}; 4741 if (pos != 0) { 4742 index = *pos; 4743 } 4744 4745 UnicodeString rsource(source); 4746 if (pos == 0) { 4747 t.transliterate(rsource); 4748 } else { 4749 // Do it all at once -- below we do it incrementally 4750 t.finishTransliteration(rsource, *pos); 4751 } 4752 expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult); 4753 4754 // Test keyboard (incremental) transliteration -- this result 4755 // must be the same after we finalize (see below). 4756 UnicodeString log; 4757 rsource.remove(); 4758 if (pos != 0) { 4759 rsource = source; 4760 formatInput(log, rsource, index); 4761 log.append(" -> "); 4762 UErrorCode status = U_ZERO_ERROR; 4763 t.transliterate(rsource, index, status); 4764 formatInput(log, rsource, index); 4765 } else { 4766 for (int32_t i=0; i<source.length(); ++i) { 4767 if (i != 0) { 4768 log.append(" + "); 4769 } 4770 log.append(source.charAt(i)).append(" -> "); 4771 UErrorCode status = U_ZERO_ERROR; 4772 t.transliterate(rsource, index, source.charAt(i), status); 4773 formatInput(log, rsource, index); 4774 } 4775 } 4776 4777 // As a final step in keyboard transliteration, we must call 4778 // transliterate to finish off any pending partial matches that 4779 // were waiting for more input. 4780 t.finishTransliteration(rsource, index); 4781 log.append(" => ").append(rsource); 4782 4783 expectAux(t.getID() + ":Keyboard", log, 4784 rsource == expectedResult, 4785 expectedResult); 4786} 4787 4788 4789/** 4790 * @param appendTo result is appended to this param. 4791 * @param input the string being transliterated 4792 * @param pos the index struct 4793 */ 4794UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo, 4795 const UnicodeString& input, 4796 const UTransPosition& pos) { 4797 // Output a string of the form aaa{bbb|ccc|ddd}eee, where 4798 // the {} indicate the context start and limit, and the || 4799 // indicate the start and limit. 4800 if (0 <= pos.contextStart && 4801 pos.contextStart <= pos.start && 4802 pos.start <= pos.limit && 4803 pos.limit <= pos.contextLimit && 4804 pos.contextLimit <= input.length()) { 4805 4806 UnicodeString a, b, c, d, e; 4807 input.extractBetween(0, pos.contextStart, a); 4808 input.extractBetween(pos.contextStart, pos.start, b); 4809 input.extractBetween(pos.start, pos.limit, c); 4810 input.extractBetween(pos.limit, pos.contextLimit, d); 4811 input.extractBetween(pos.contextLimit, input.length(), e); 4812 appendTo.append(a).append((UChar)123/*{*/).append(b). 4813 append((UChar)PIPE).append(c).append((UChar)PIPE).append(d). 4814 append((UChar)125/*}*/).append(e); 4815 } else { 4816 appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" + 4817 pos.contextStart + ", s=" + pos.start + ", l=" + 4818 pos.limit + ", cl=" + pos.contextLimit + "} on " + 4819 input); 4820 } 4821 return appendTo; 4822} 4823 4824void TransliteratorTest::expectAux(const UnicodeString& tag, 4825 const UnicodeString& source, 4826 const UnicodeString& result, 4827 const UnicodeString& expectedResult) { 4828 expectAux(tag, source + " -> " + result, 4829 result == expectedResult, 4830 expectedResult); 4831} 4832 4833void TransliteratorTest::expectAux(const UnicodeString& tag, 4834 const UnicodeString& summary, UBool pass, 4835 const UnicodeString& expectedResult) { 4836 if (pass) { 4837 logln(UnicodeString("(")+tag+") " + prettify(summary)); 4838 } else { 4839 dataerrln(UnicodeString("FAIL: (")+tag+") " 4840 + prettify(summary) 4841 + ", expected " + prettify(expectedResult)); 4842 } 4843} 4844 4845#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 4846