regcoll.cpp revision c73f511526464f8e56c242df80552e9b0d94ae3d
1/******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2014, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 7#include "unicode/utypes.h" 8 9#if !UCONFIG_NO_COLLATION 10 11#include "unicode/coll.h" 12#include "unicode/localpointer.h" 13#include "unicode/tblcoll.h" 14#include "unicode/unistr.h" 15#include "unicode/sortkey.h" 16#include "regcoll.h" 17#include "sfwdchit.h" 18#include "testutil.h" 19#include "cmemory.h" 20 21#define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0])) 22 23CollationRegressionTest::CollationRegressionTest() 24{ 25 UErrorCode status = U_ZERO_ERROR; 26 27 en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status); 28 if(U_FAILURE(status)) { 29 delete en_us; 30 en_us = 0; 31 errcheckln(status, "Collator creation failed with %s", u_errorName(status)); 32 return; 33 } 34} 35 36CollationRegressionTest::~CollationRegressionTest() 37{ 38 delete en_us; 39} 40 41 42 // @bug 4048446 43// 44// CollationElementIterator.reset() doesn't work 45// 46void CollationRegressionTest::Test4048446(/* char* par */) 47{ 48 const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?"; 49 const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?"; 50 CollationElementIterator *i1 = en_us->createCollationElementIterator(test1); 51 CollationElementIterator *i2 = en_us->createCollationElementIterator(test1); 52 UErrorCode status = U_ZERO_ERROR; 53 54 if (i1 == NULL|| i2 == NULL) 55 { 56 errln("Could not create CollationElementIterator's"); 57 delete i1; 58 delete i2; 59 return; 60 } 61 62 while (i1->next(status) != CollationElementIterator::NULLORDER) 63 { 64 if (U_FAILURE(status)) 65 { 66 errln("error calling next()"); 67 68 delete i1; 69 delete i2; 70 return; 71 } 72 } 73 74 i1->reset(); 75 76 assertEqual(*i1, *i2); 77 78 delete i1; 79 delete i2; 80} 81 82// @bug 4051866 83// 84// Collator -> rules -> Collator round-trip broken for expanding characters 85// 86void CollationRegressionTest::Test4051866(/* char* par */) 87{ 88 UnicodeString rules; 89 UErrorCode status = U_ZERO_ERROR; 90 91 rules += "&n < o "; 92 rules += "& oe ,o"; 93 rules += (UChar)0x3080; 94 rules += "& oe ,"; 95 rules += (UChar)0x1530; 96 rules += " ,O"; 97 rules += "& OE ,O"; 98 rules += (UChar)0x3080; 99 rules += "& OE ,"; 100 rules += (UChar)0x1520; 101 rules += "< p ,P"; 102 103 // Build a collator containing expanding characters 104 LocalPointer<RuleBasedCollator> c1(new RuleBasedCollator(rules, status)); 105 106 // Build another using the rules from the first 107 LocalPointer<RuleBasedCollator> c2(new RuleBasedCollator(c1->getRules(), status)); 108 if (U_FAILURE(status)) { 109 errln("RuleBasedCollator(rule string) failed - %s", u_errorName(status)); 110 return; 111 } 112 113 // Make sure they're the same 114 if (!(c1->getRules() == c2->getRules())) 115 { 116 errln("Rules are not equal"); 117 } 118} 119 120// @bug 4053636 121// 122// Collator thinks "black-bird" == "black" 123// 124void CollationRegressionTest::Test4053636(/* char* par */) 125{ 126 if (en_us->equals("black_bird", "black")) 127 { 128 errln("black-bird == black"); 129 } 130} 131 132// @bug 4054238 133// 134// CollationElementIterator will not work correctly if the associated 135// Collator object's mode is changed 136// 137void CollationRegressionTest::Test4054238(/* char* par */) 138{ 139 const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0}; 140 const UnicodeString test3(chars3); 141 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 142 143 // NOTE: The Java code uses en_us to create the CollationElementIterators 144 // but I'm pretty sure that's wrong, so I've changed this to use c. 145 UErrorCode status = U_ZERO_ERROR; 146 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 147 CollationElementIterator *i1 = c->createCollationElementIterator(test3); 148 delete i1; 149 delete c; 150} 151 152// @bug 4054734 153// 154// Collator::IDENTICAL documented but not implemented 155// 156void CollationRegressionTest::Test4054734(/* char* par */) 157{ 158 /* 159 Here's the original Java: 160 161 String[] decomp = { 162 "\u0001", "<", "\u0002", 163 "\u0001", "=", "\u0001", 164 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise 165 "\u00C0", "=", "A\u0300" // Decomp should make these equal 166 }; 167 168 String[] nodecomp = { 169 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave 170 }; 171 */ 172 173 static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] = 174 { 175 {0x0001, 0}, {0x3c, 0}, {0x0002, 0}, 176 {0x0001, 0}, {0x3d, 0}, {0x0001, 0}, 177 {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0}, 178 {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0} 179 }; 180 181 182 UErrorCode status = U_ZERO_ERROR; 183 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 184 185 c->setStrength(Collator::IDENTICAL); 186 187 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 188 compareArray(*c, decomp, ARRAY_LENGTH(decomp)); 189 190 delete c; 191} 192 193// @bug 4054736 194// 195// Full Decomposition mode not implemented 196// 197void CollationRegressionTest::Test4054736(/* char* par */) 198{ 199 UErrorCode status = U_ZERO_ERROR; 200 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 201 202 c->setStrength(Collator::SECONDARY); 203 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 204 205 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 206 { 207 {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed 208 }; 209 210 compareArray(*c, tests, ARRAY_LENGTH(tests)); 211 212 delete c; 213} 214 215// @bug 4058613 216// 217// Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean 218// 219void CollationRegressionTest::Test4058613(/* char* par */) 220{ 221 // Creating a default collator doesn't work when Korean is the default 222 // locale 223 224 Locale oldDefault = Locale::getDefault(); 225 UErrorCode status = U_ZERO_ERROR; 226 227 Locale::setDefault(Locale::getKorean(), status); 228 229 if (U_FAILURE(status)) 230 { 231 errln("Could not set default locale to Locale::KOREAN"); 232 return; 233 } 234 235 Collator *c = NULL; 236 237 c = Collator::createInstance("en_US", status); 238 239 if (c == NULL || U_FAILURE(status)) 240 { 241 errln("Could not create a Korean collator"); 242 Locale::setDefault(oldDefault, status); 243 delete c; 244 return; 245 } 246 247 // Since the fix to this bug was to turn off decomposition for Korean collators, 248 // ensure that's what we got 249 if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF) 250 { 251 errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator"); 252 } 253 254 delete c; 255 256 Locale::setDefault(oldDefault, status); 257} 258 259// @bug 4059820 260// 261// RuleBasedCollator.getRules does not return the exact pattern as input 262// for expanding character sequences 263// 264void CollationRegressionTest::Test4059820(/* char* par */) 265{ 266 UErrorCode status = U_ZERO_ERROR; 267 268 RuleBasedCollator *c = NULL; 269 UnicodeString rules = "&9 < a < b , c/a < d < z"; 270 271 c = new RuleBasedCollator(rules, status); 272 273 if (c == NULL || U_FAILURE(status)) 274 { 275 errln("Failure building a collator."); 276 delete c; 277 return; 278 } 279 280 if ( c->getRules().indexOf("c/a") == -1) 281 { 282 errln("returned rules do not contain 'c/a'"); 283 } 284 285 delete c; 286} 287 288// @bug 4060154 289// 290// MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" 291// 292void CollationRegressionTest::Test4060154(/* char* par */) 293{ 294 UErrorCode status = U_ZERO_ERROR; 295 UnicodeString rules; 296 297 rules += "&f < g, G < h, H < i, I < j, J"; 298 rules += " & H < "; 299 rules += (UChar)0x0131; 300 rules += ", "; 301 rules += (UChar)0x0130; 302 rules += ", i, I"; 303 304 RuleBasedCollator *c = NULL; 305 306 c = new RuleBasedCollator(rules, status); 307 308 if (c == NULL || U_FAILURE(status)) 309 { 310 errln("failure building collator."); 311 delete c; 312 return; 313 } 314 315 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 316 317 /* 318 String[] tertiary = { 319 "A", "<", "B", 320 "H", "<", "\u0131", 321 "H", "<", "I", 322 "\u0131", "<", "\u0130", 323 "\u0130", "<", "i", 324 "\u0130", ">", "H", 325 }; 326*/ 327 328 static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] = 329 { 330 {0x41, 0}, {0x3c, 0}, {0x42, 0}, 331 {0x48, 0}, {0x3c, 0}, {0x0131, 0}, 332 {0x48, 0}, {0x3c, 0}, {0x49, 0}, 333 {0x0131, 0}, {0x3c, 0}, {0x0130, 0}, 334 {0x0130, 0}, {0x3c, 0}, {0x69, 0}, 335 {0x0130, 0}, {0x3e, 0}, {0x48, 0} 336 }; 337 338 c->setStrength(Collator::TERTIARY); 339 compareArray(*c, tertiary, ARRAY_LENGTH(tertiary)); 340 341 /* 342 String[] secondary = { 343 "H", "<", "I", 344 "\u0131", "=", "\u0130", 345 }; 346*/ 347 static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] = 348 { 349 {0x48, 0}, {0x3c, 0}, {0x49, 0}, 350 {0x0131, 0}, {0x3d, 0}, {0x0130, 0} 351 }; 352 353 c->setStrength(Collator::PRIMARY); 354 compareArray(*c, secondary, ARRAY_LENGTH(secondary)); 355 356 delete c; 357} 358 359// @bug 4062418 360// 361// Secondary/Tertiary comparison incorrect in French Secondary 362// 363void CollationRegressionTest::Test4062418(/* char* par */) 364{ 365 UErrorCode status = U_ZERO_ERROR; 366 367 RuleBasedCollator *c = NULL; 368 369 c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status); 370 371 if (c == NULL || U_FAILURE(status)) 372 { 373 errln("Failed to create collator for Locale::getCanadaFrench()"); 374 delete c; 375 return; 376 } 377 378 c->setStrength(Collator::SECONDARY); 379 380/* 381 String[] tests = { 382 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents from end, p\u00e9ch\u00e9 is greater 383 }; 384*/ 385 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 386 { 387 {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0} 388 }; 389 390 compareArray(*c, tests, ARRAY_LENGTH(tests)); 391 392 delete c; 393} 394 395// @bug 4065540 396// 397// Collator::compare() method broken if either string contains spaces 398// 399void CollationRegressionTest::Test4065540(/* char* par */) 400{ 401 if (en_us->compare("abcd e", "abcd f") == 0) 402 { 403 errln("'abcd e' == 'abcd f'"); 404 } 405} 406 407// @bug 4066189 408// 409// Unicode characters need to be recursively decomposed to get the 410// correct result. For example, 411// u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300. 412// 413void CollationRegressionTest::Test4066189(/* char* par */) 414{ 415 static const UChar chars1[] = {0x1EB1, 0}; 416 static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0}; 417 const UnicodeString test1(chars1); 418 const UnicodeString test2(chars2); 419 UErrorCode status = U_ZERO_ERROR; 420 421 // NOTE: The java code used en_us to create the 422 // CollationElementIterator's. I'm pretty sure that 423 // was wrong, so I've change the code to use c1 and c2 424 RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone(); 425 c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 426 CollationElementIterator *i1 = c1->createCollationElementIterator(test1); 427 428 RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone(); 429 c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); 430 CollationElementIterator *i2 = c2->createCollationElementIterator(test2); 431 432 assertEqual(*i1, *i2); 433 434 delete i2; 435 delete c2; 436 delete i1; 437 delete c1; 438} 439 440// @bug 4066696 441// 442// French secondary collation checking at the end of compare iteration fails 443// 444void CollationRegressionTest::Test4066696(/* char* par */) 445{ 446 UErrorCode status = U_ZERO_ERROR; 447 RuleBasedCollator *c = NULL; 448 449 c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status); 450 451 if (c == NULL || U_FAILURE(status)) 452 { 453 errln("Failure creating collator for Locale::getCanadaFrench()"); 454 delete c; 455 return; 456 } 457 458 c->setStrength(Collator::SECONDARY); 459 460/* 461 String[] tests = { 462 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute 463 }; 464 465 should be: 466 467 String[] tests = { 468 "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute 469 }; 470 471*/ 472 473 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 474 { 475 {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0} 476 }; 477 478 compareArray(*c, tests, ARRAY_LENGTH(tests)); 479 480 delete c; 481} 482 483// @bug 4076676 484// 485// Bad canonicalization of same-class combining characters 486// 487void CollationRegressionTest::Test4076676(/* char* par */) 488{ 489 // These combining characters are all in the same class, so they should not 490 // be reordered, and they should compare as unequal. 491 static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0}; 492 static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0}; 493 494 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 495 c->setStrength(Collator::TERTIARY); 496 497 if (c->compare(s1,s2) == 0) 498 { 499 errln("Same-class combining chars were reordered"); 500 } 501 502 delete c; 503} 504 505// @bug 4079231 506// 507// RuleBasedCollator::operator==(NULL) throws NullPointerException 508// 509void CollationRegressionTest::Test4079231(/* char* par */) 510{ 511 // I don't think there's any way to write this test 512 // in C++. The following is equivalent to the Java, 513 // but doesn't compile 'cause NULL can't be converted 514 // to Collator& 515 // 516 // if (en_us->operator==(NULL)) 517 // { 518 // errln("en_us->operator==(NULL) returned TRUE"); 519 // } 520 521 /* 522 try { 523 if (en_us->equals(null)) { 524 errln("en_us->equals(null) returned true"); 525 } 526 } 527 catch (Exception e) { 528 errln("en_us->equals(null) threw " + e.toString()); 529 } 530*/ 531} 532 533// @bug 4078588 534// 535// RuleBasedCollator breaks on "< a < bb" rule 536// 537void CollationRegressionTest::Test4078588(/* char *par */) 538{ 539 UErrorCode status = U_ZERO_ERROR; 540 RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status); 541 542 if (rbc == NULL || U_FAILURE(status)) 543 { 544 errln("Failed to create RuleBasedCollator."); 545 delete rbc; 546 return; 547 } 548 549 Collator::EComparisonResult result = rbc->compare("a","bb"); 550 551 if (result != Collator::LESS) 552 { 553 errln((UnicodeString)"Compare(a,bb) returned " + (int)result 554 + (UnicodeString)"; expected -1"); 555 } 556 557 delete rbc; 558} 559 560// @bug 4081866 561// 562// Combining characters in different classes not reordered properly. 563// 564void CollationRegressionTest::Test4081866(/* char* par */) 565{ 566 // These combining characters are all in different classes, 567 // so they should be reordered and the strings should compare as equal. 568 static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0}; 569 static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0}; 570 571 UErrorCode status = U_ZERO_ERROR; 572 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 573 c->setStrength(Collator::TERTIARY); 574 575 // Now that the default collators are set to NO_DECOMPOSITION 576 // (as a result of fixing bug 4114077), we must set it explicitly 577 // when we're testing reordering behavior. -- lwerner, 5/5/98 578 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 579 580 if (c->compare(s1,s2) != 0) 581 { 582 errln("Combining chars were not reordered"); 583 } 584 585 delete c; 586} 587 588// @bug 4087241 589// 590// string comparison errors in Scandinavian collators 591// 592void CollationRegressionTest::Test4087241(/* char* par */) 593{ 594 UErrorCode status = U_ZERO_ERROR; 595 Locale da_DK("da", "DK"); 596 RuleBasedCollator *c = NULL; 597 598 c = (RuleBasedCollator *) Collator::createInstance(da_DK, status); 599 600 if (c == NULL || U_FAILURE(status)) 601 { 602 errln("Failed to create collator for da_DK locale"); 603 delete c; 604 return; 605 } 606 607 c->setStrength(Collator::SECONDARY); 608 609 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 610 { 611 {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae 612 {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-umlaut < a-ring 613 {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-umlaut 614 }; 615 616 compareArray(*c, tests, ARRAY_LENGTH(tests)); 617 618 delete c; 619} 620 621// @bug 4087243 622// 623// CollationKey takes ignorable strings into account when it shouldn't 624// 625void CollationRegressionTest::Test4087243(/* char* par */) 626{ 627 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 628 c->setStrength(Collator::TERTIARY); 629 630 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 631 { 632 {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1 2 3 = 1 2 3 ctrl-A 633 }; 634 635 compareArray(*c, tests, ARRAY_LENGTH(tests)); 636 637 delete c; 638} 639 640// @bug 4092260 641// 642// Mu/micro conflict 643// Micro symbol and greek lowercase letter Mu should sort identically 644// 645void CollationRegressionTest::Test4092260(/* char* par */) 646{ 647 UErrorCode status = U_ZERO_ERROR; 648 Locale el("el", ""); 649 Collator *c = NULL; 650 651 c = Collator::createInstance(el, status); 652 653 if (c == NULL || U_FAILURE(status)) 654 { 655 errln("Failed to create collator for el locale."); 656 delete c; 657 return; 658 } 659 660 // These now have tertiary differences in UCA 661 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); 662 663 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 664 { 665 {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0} 666 }; 667 668 compareArray(*c, tests, ARRAY_LENGTH(tests)); 669 670 delete c; 671} 672 673// @bug 4095316 674// 675void CollationRegressionTest::Test4095316(/* char* par */) 676{ 677 UErrorCode status = U_ZERO_ERROR; 678 Locale el_GR("el", "GR"); 679 Collator *c = Collator::createInstance(el_GR, status); 680 681 if (c == NULL || U_FAILURE(status)) 682 { 683 errln("Failed to create collator for el_GR locale"); 684 delete c; 685 return; 686 } 687 // These now have tertiary differences in UCA 688 //c->setStrength(Collator::TERTIARY); 689 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); 690 691 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 692 { 693 {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0} 694 }; 695 696 compareArray(*c, tests, ARRAY_LENGTH(tests)); 697 698 delete c; 699} 700 701// @bug 4101940 702// 703void CollationRegressionTest::Test4101940(/* char* par */) 704{ 705 UErrorCode status = U_ZERO_ERROR; 706 RuleBasedCollator *c = NULL; 707 UnicodeString rules = "&9 < a < b"; 708 UnicodeString nothing = ""; 709 710 c = new RuleBasedCollator(rules, status); 711 712 if (c == NULL || U_FAILURE(status)) 713 { 714 errln("Failed to create RuleBasedCollator"); 715 delete c; 716 return; 717 } 718 719 CollationElementIterator *i = c->createCollationElementIterator(nothing); 720 i->reset(); 721 722 if (i->next(status) != CollationElementIterator::NULLORDER) 723 { 724 errln("next did not return NULLORDER"); 725 } 726 727 delete i; 728 delete c; 729} 730 731// @bug 4103436 732// 733// Collator::compare not handling spaces properly 734// 735void CollationRegressionTest::Test4103436(/* char* par */) 736{ 737 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 738 c->setStrength(Collator::TERTIARY); 739 740 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = 741 { 742 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}, 743 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0} 744 }; 745 746 compareArray(*c, tests, ARRAY_LENGTH(tests)); 747 748 delete c; 749} 750 751// @bug 4114076 752// 753// Collation not Unicode conformant with Hangul syllables 754// 755void CollationRegressionTest::Test4114076(/* char* par */) 756{ 757 UErrorCode status = U_ZERO_ERROR; 758 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 759 c->setStrength(Collator::TERTIARY); 760 761 // 762 // With Canonical decomposition, Hangul syllables should get decomposed 763 // into Jamo, but Jamo characters should not be decomposed into 764 // conjoining Jamo 765 // 766 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = 767 { 768 {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0} 769 }; 770 771 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 772 compareArray(*c, test1, ARRAY_LENGTH(test1)); 773 774 // From UTR #15: 775 // *In earlier versions of Unicode, jamo characters like ksf 776 // had compatibility mappings to kf + sf. These mappings were 777 // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.) 778 // That is, the following test is obsolete as of 2.1.9 779 780//obsolete- // With Full decomposition, it should go all the way down to 781//obsolete- // conjoining Jamo characters. 782//obsolete- // 783//obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = 784//obsolete- { 785//obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0} 786//obsolete- }; 787//obsolete- 788//obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT); 789//obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2)); 790 791 delete c; 792} 793 794 795// @bug 4124632 796// 797// Collator::getCollationKey was hanging on certain character sequences 798// 799void CollationRegressionTest::Test4124632(/* char* par */) 800{ 801 UErrorCode status = U_ZERO_ERROR; 802 Collator *coll = NULL; 803 804 coll = Collator::createInstance(Locale::getJapan(), status); 805 806 if (coll == NULL || U_FAILURE(status)) 807 { 808 errln("Failed to create collator for Locale::JAPAN"); 809 delete coll; 810 return; 811 } 812 813 static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0}; 814 CollationKey key; 815 816 coll->getCollationKey(test, key, status); 817 818 if (key.isBogus() || U_FAILURE(status)) 819 { 820 errln("CollationKey creation failed."); 821 } 822 823 delete coll; 824} 825 826// @bug 4132736 827// 828// sort order of french words with multiple accents has errors 829// 830void CollationRegressionTest::Test4132736(/* char* par */) 831{ 832 UErrorCode status = U_ZERO_ERROR; 833 834 Collator *c = NULL; 835 836 c = Collator::createInstance(Locale::getCanadaFrench(), status); 837 c->setStrength(Collator::TERTIARY); 838 839 if (c == NULL || U_FAILURE(status)) 840 { 841 errln("Failed to create a collator for Locale::getCanadaFrench()"); 842 delete c; 843 return; 844 } 845 846 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = 847 { 848 {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0}, 849 {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0} 850 }; 851 852 compareArray(*c, test1, ARRAY_LENGTH(test1)); 853 854 delete c; 855} 856 857// @bug 4133509 858// 859// The sorting using java.text.CollationKey is not in the exact order 860// 861void CollationRegressionTest::Test4133509(/* char* par */) 862{ 863 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = 864 { 865 {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0}, 866 {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0}, 867 {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0} 868 }; 869 870 compareArray(*en_us, test1, ARRAY_LENGTH(test1)); 871} 872 873// @bug 4114077 874// 875// Collation with decomposition off doesn't work for Europe 876// 877void CollationRegressionTest::Test4114077(/* char* par */) 878{ 879 // Ensure that we get the same results with decomposition off 880 // as we do with it on.... 881 882 UErrorCode status = U_ZERO_ERROR; 883 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); 884 c->setStrength(Collator::TERTIARY); 885 886 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = 887 { 888 {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent 889 {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0}, 890 {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0}, 891 {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0}, // a-ring-acute -> a-ring, acute 892 // -> a, ring, acute 893 {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0} // No reordering --> unequal 894 }; 895 896 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); 897 compareArray(*c, test1, ARRAY_LENGTH(test1)); 898 899 static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = 900 { 901 {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} // Reordering --> equal 902 }; 903 904 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); 905 compareArray(*c, test2, ARRAY_LENGTH(test2)); 906 907 delete c; 908} 909 910// @bug 4141640 911// 912// Support for Swedish gone in 1.1.6 (Can't create Swedish collator) 913// 914void CollationRegressionTest::Test4141640(/* char* par */) 915{ 916 // 917 // Rather than just creating a Swedish collator, we might as well 918 // try to instantiate one for every locale available on the system 919 // in order to prevent this sort of bug from cropping up in the future 920 // 921 UErrorCode status = U_ZERO_ERROR; 922 int32_t i, localeCount; 923 const Locale *locales = Locale::getAvailableLocales(localeCount); 924 925 for (i = 0; i < localeCount; i += 1) 926 { 927 Collator *c = NULL; 928 929 status = U_ZERO_ERROR; 930 c = Collator::createInstance(locales[i], status); 931 932 if (c == NULL || U_FAILURE(status)) 933 { 934 UnicodeString msg, localeName; 935 936 msg += "Could not create collator for locale "; 937 msg += locales[i].getName(); 938 939 errln(msg); 940 } 941 942 delete c; 943 } 944} 945 946// @bug 4139572 947// 948// getCollationKey throws exception for spanish text 949// Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6 950// 951void CollationRegressionTest::Test4139572(/* char* par */) 952{ 953 // 954 // Code pasted straight from the bug report 955 // (and then translated to C++ ;-) 956 // 957 // create spanish locale and collator 958 UErrorCode status = U_ZERO_ERROR; 959 Locale l("es", "es"); 960 Collator *col = NULL; 961 962 col = Collator::createInstance(l, status); 963 964 if (col == NULL || U_FAILURE(status)) 965 { 966 errln("Failed to create a collator for es_es locale."); 967 delete col; 968 return; 969 } 970 971 CollationKey key; 972 973 // this spanish phrase kills it! 974 col->getCollationKey("Nombre De Objeto", key, status); 975 976 if (key.isBogus() || U_FAILURE(status)) 977 { 978 errln("Error creating CollationKey for \"Nombre De Ojbeto\""); 979 } 980 981 delete col; 982} 983/* HSYS : RuleBasedCollator::compare() performance enhancements 984 compare() does not create CollationElementIterator() anymore.*/ 985 986class My4146160Collator : public RuleBasedCollator 987{ 988public: 989 My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status); 990 ~My4146160Collator(); 991 992 CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const; 993 994 CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const; 995 996 static int32_t count; 997}; 998 999int32_t My4146160Collator::count = 0; 1000 1001My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status) 1002 : RuleBasedCollator(rbc.getRules(), status) 1003{ 1004} 1005 1006My4146160Collator::~My4146160Collator() 1007{ 1008} 1009 1010CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const 1011{ 1012 count += 1; 1013 return RuleBasedCollator::createCollationElementIterator(text); 1014} 1015 1016CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const 1017{ 1018 count += 1; 1019 return RuleBasedCollator::createCollationElementIterator(text); 1020} 1021 1022// @bug 4146160 1023// 1024// RuleBasedCollator doesn't use createCollationElementIterator internally 1025// 1026void CollationRegressionTest::Test4146160(/* char* par */) 1027{ 1028#if 0 1029 // 1030 // Use a custom collator class whose createCollationElementIterator 1031 // methods increment a count.... 1032 // 1033 UErrorCode status = U_ZERO_ERROR; 1034 CollationKey key; 1035 1036 My4146160Collator::count = 0; 1037 My4146160Collator *mc = NULL; 1038 1039 mc = new My4146160Collator(*en_us, status); 1040 1041 if (mc == NULL || U_FAILURE(status)) 1042 { 1043 errln("Failed to create a My4146160Collator."); 1044 delete mc; 1045 return; 1046 } 1047 1048 mc->getCollationKey("1", key, status); 1049 1050 if (key.isBogus() || U_FAILURE(status)) 1051 { 1052 errln("Failure to get a CollationKey from a My4146160Collator."); 1053 delete mc; 1054 return; 1055 } 1056 1057 if (My4146160Collator::count < 1) 1058 { 1059 errln("My4146160Collator::createCollationElementIterator not called for getCollationKey"); 1060 } 1061 1062 My4146160Collator::count = 0; 1063 mc->compare("1", "2"); 1064 1065 if (My4146160Collator::count < 1) 1066 { 1067 errln("My4146160Collator::createtCollationElementIterator not called for compare"); 1068 } 1069 1070 delete mc; 1071#endif 1072} 1073 1074void CollationRegressionTest::Test4179216() { 1075 // you can position a CollationElementIterator in the middle of 1076 // a contracting character sequence, yielding a bogus collation 1077 // element 1078 IcuTestErrorCode errorCode(*this, "Test4179216"); 1079 RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode); 1080 UnicodeString testText = "church church catcatcher runcrunchynchy"; 1081 CollationElementIterator *iter = coll.createCollationElementIterator(testText); 1082 1083 // test that the "ch" combination works properly 1084 iter->setOffset(4, errorCode); 1085 int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1086 1087 iter->reset(); 1088 int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1089 1090 iter->setOffset(5, errorCode); 1091 int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1092 1093 // Compares and prints only 16-bit primary weights. 1094 if (elt4 != elt0 || elt5 != elt0) { 1095 errln("The collation elements at positions 0 (0x%04x), " 1096 "4 (0x%04x), and 5 (0x%04x) don't match.", 1097 elt0, elt4, elt5); 1098 } 1099 1100 // test that the "cat" combination works properly 1101 iter->setOffset(14, errorCode); 1102 int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1103 1104 iter->setOffset(15, errorCode); 1105 int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1106 1107 iter->setOffset(16, errorCode); 1108 int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1109 1110 iter->setOffset(17, errorCode); 1111 int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1112 1113 iter->setOffset(18, errorCode); 1114 int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1115 1116 iter->setOffset(19, errorCode); 1117 int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode)); 1118 1119 // Compares and prints only 16-bit primary weights. 1120 if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17 1121 || elt14 != elt18 || elt14 != elt19) { 1122 errln("\"cat\" elements don't match: elt14 = 0x%04x, " 1123 "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, " 1124 "elt18 = 0x%04x, elt19 = 0x%04x", 1125 elt14, elt15, elt16, elt17, elt18, elt19); 1126 } 1127 1128 // now generate a complete list of the collation elements, 1129 // first using next() and then using setOffset(), and 1130 // make sure both interfaces return the same set of elements 1131 iter->reset(); 1132 1133 int32_t elt = iter->next(errorCode); 1134 int32_t count = 0; 1135 while (elt != CollationElementIterator::NULLORDER) { 1136 ++count; 1137 elt = iter->next(errorCode); 1138 } 1139 1140 LocalArray<UnicodeString> nextElements(new UnicodeString[count]); 1141 LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]); 1142 int32_t lastPos = 0; 1143 1144 iter->reset(); 1145 elt = iter->next(errorCode); 1146 count = 0; 1147 while (elt != CollationElementIterator::NULLORDER) { 1148 nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset()); 1149 lastPos = iter->getOffset(); 1150 elt = iter->next(errorCode); 1151 } 1152 int32_t nextElementsLength = count; 1153 count = 0; 1154 for (int32_t i = 0; i < testText.length(); ) { 1155 iter->setOffset(i, errorCode); 1156 lastPos = iter->getOffset(); 1157 elt = iter->next(errorCode); 1158 setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset()); 1159 i = iter->getOffset(); 1160 } 1161 for (int32_t i = 0; i < nextElementsLength; i++) { 1162 if (nextElements[i] == setOffsetElements[i]) { 1163 logln(nextElements[i]); 1164 } else { 1165 errln(UnicodeString("Error: next() yielded ") + nextElements[i] + 1166 ", but setOffset() yielded " + setOffsetElements[i]); 1167 } 1168 } 1169 delete iter; 1170} 1171 1172// Ticket 7189 1173// 1174// nextSortKeyPart incorrect for EO_S1 collation 1175static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) { 1176 UCharIterator uiter; 1177 uint32_t state[2] = { 0, 0 }; 1178 int32_t keyLen; 1179 int32_t count = 8; 1180 1181 uiter_setString(&uiter, text, len); 1182 keyLen = 0; 1183 while (TRUE) { 1184 int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status); 1185 if (U_FAILURE(status)) { 1186 return -1; 1187 } 1188 if (keyPartLen == 0) { 1189 break; 1190 } 1191 keyLen += keyPartLen; 1192 } 1193 return keyLen; 1194} 1195 1196void CollationRegressionTest::TestT7189() { 1197 UErrorCode status = U_ZERO_ERROR; 1198 UCollator *coll; 1199 uint32_t i; 1200 1201 static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = { 1202 // "Achter De Hoven" 1203 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 }, 1204 // "ABC" 1205 { 0x41, 0x42, 0x43, 0x00 }, 1206 // "HELLO world!" 1207 { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 } 1208 }; 1209 1210 static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = { 1211 // "Achter de Hoven" 1212 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 }, 1213 // "abc" 1214 { 0x61, 0x62, 0x63, 0x00 }, 1215 // "hello world!" 1216 { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 } 1217 }; 1218 1219 // Open the collator 1220 coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status); 1221 if (U_FAILURE(status)) { 1222 errln("Failed to create a collator for short string EO_S1"); 1223 return; 1224 } 1225 1226 for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * sizeof(UChar)); i++) { 1227 uint8_t key1[100], key2[100]; 1228 int32_t len1, len2; 1229 1230 len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status); 1231 if (U_FAILURE(status)) { 1232 errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]); 1233 break; 1234 } 1235 len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status); 1236 if (U_FAILURE(status)) { 1237 errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]); 1238 break; 1239 } 1240 1241 if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) { 1242 errln(UnicodeString("Failed: Identical key\n") + " text1: " + text1[i] + "\n" + " text2: " + text2[i] + "\n" + " key : " + TestUtility::hex(key1, len1)); 1243 } else { 1244 logln(UnicodeString("Keys produced -\n") + " text1: " + text1[i] + "\n" + " key1 : " + TestUtility::hex(key1, len1) + "\n" + " text2: " + text2[i] + "\n" + " key2 : " 1245 + TestUtility::hex(key2, len2)); 1246 } 1247 } 1248 ucol_close(coll); 1249} 1250 1251void CollationRegressionTest::TestCaseFirstCompression() { 1252 RuleBasedCollator *col = (RuleBasedCollator *) en_us->clone(); 1253 UErrorCode status = U_ZERO_ERROR; 1254 1255 // default 1256 caseFirstCompressionSub(col, "default"); 1257 1258 // Upper first 1259 col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); 1260 if (U_FAILURE(status)) { 1261 errln("Failed to set UCOL_UPPER_FIRST"); 1262 return; 1263 } 1264 caseFirstCompressionSub(col, "upper first"); 1265 1266 // Lower first 1267 col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status); 1268 if (U_FAILURE(status)) { 1269 errln("Failed to set UCOL_LOWER_FIRST"); 1270 return; 1271 } 1272 caseFirstCompressionSub(col, "lower first"); 1273 1274 delete col; 1275} 1276 1277void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeString opt) { 1278 const int32_t maxLength = 50; 1279 1280 UChar str1[maxLength]; 1281 UChar str2[maxLength]; 1282 1283 CollationKey key1, key2; 1284 1285 for (int32_t len = 1; len <= maxLength; len++) { 1286 int32_t i = 0; 1287 for (; i < len - 1; i++) { 1288 str1[i] = str2[i] = (UChar)0x61; // 'a' 1289 } 1290 str1[i] = (UChar)0x41; // 'A' 1291 str2[i] = (UChar)0x61; // 'a' 1292 1293 UErrorCode status = U_ZERO_ERROR; 1294 col->getCollationKey(str1, len, key1, status); 1295 col->getCollationKey(str2, len, key2, status); 1296 1297 UCollationResult cmpKey = key1.compareTo(key2, status); 1298 UCollationResult cmpCol = col->compare(str1, len, str2, len, status); 1299 1300 if (U_FAILURE(status)) { 1301 errln("Error in caseFirstCompressionSub"); 1302 } else if (cmpKey != cmpCol) { 1303 errln((UnicodeString)"Inconsistent comparison(" + opt 1304 + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeString(str2, len) 1305 + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol); 1306 } 1307 } 1308} 1309 1310void CollationRegressionTest::TestTrailingComment() { 1311 // ICU ticket #8070: 1312 // Check that the rule parser handles a comment without terminating end-of-line. 1313 IcuTestErrorCode errorCode(*this, "TestTrailingComment"); 1314 RuleBasedCollator coll(UNICODE_STRING_SIMPLE("&c<b#comment1\n<a#comment2"), errorCode); 1315 UnicodeString a((UChar)0x61), b((UChar)0x62), c((UChar)0x63); 1316 assertTrue("c<b", coll.compare(c, b) < 0); 1317 assertTrue("b<a", coll.compare(b, a) < 0); 1318} 1319 1320void CollationRegressionTest::TestBeforeWithTooStrongAfter() { 1321 // ICU ticket #9959: 1322 // Forbid rules with a before-reset followed by a stronger relation. 1323 IcuTestErrorCode errorCode(*this, "TestBeforeWithTooStrongAfter"); 1324 RuleBasedCollator before2(UNICODE_STRING_SIMPLE("&[before 2]x<<q<p"), errorCode); 1325 if(errorCode.isSuccess()) { 1326 errln("should forbid before-2-reset followed by primary relation"); 1327 } else { 1328 errorCode.reset(); 1329 } 1330 RuleBasedCollator before3(UNICODE_STRING_SIMPLE("&[before 3]x<<<q<<s<p"), errorCode); 1331 if(errorCode.isSuccess()) { 1332 errln("should forbid before-3-reset followed by primary or secondary relation"); 1333 } else { 1334 errorCode.reset(); 1335 } 1336} 1337 1338void CollationRegressionTest::compareArray(Collator &c, 1339 const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN], 1340 int32_t testCount) 1341{ 1342 int32_t i; 1343 Collator::EComparisonResult expectedResult = Collator::EQUAL; 1344 1345 for (i = 0; i < testCount; i += 3) 1346 { 1347 UnicodeString source(tests[i]); 1348 UnicodeString comparison(tests[i + 1]); 1349 UnicodeString target(tests[i + 2]); 1350 1351 if (comparison == "<") 1352 { 1353 expectedResult = Collator::LESS; 1354 } 1355 else if (comparison == ">") 1356 { 1357 expectedResult = Collator::GREATER; 1358 } 1359 else if (comparison == "=") 1360 { 1361 expectedResult = Collator::EQUAL; 1362 } 1363 else 1364 { 1365 UnicodeString bogus1("Bogus comparison string \""); 1366 UnicodeString bogus2("\""); 1367 errln(bogus1 + comparison + bogus2); 1368 } 1369 1370 Collator::EComparisonResult compareResult = c.compare(source, target); 1371 1372 CollationKey sourceKey, targetKey; 1373 UErrorCode status = U_ZERO_ERROR; 1374 1375 c.getCollationKey(source, sourceKey, status); 1376 1377 if (U_FAILURE(status)) 1378 { 1379 errln("Couldn't get collationKey for source"); 1380 continue; 1381 } 1382 1383 c.getCollationKey(target, targetKey, status); 1384 1385 if (U_FAILURE(status)) 1386 { 1387 errln("Couldn't get collationKey for target"); 1388 continue; 1389 } 1390 1391 Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey); 1392 1393 reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult ); 1394 1395 } 1396} 1397 1398void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2) 1399{ 1400 int32_t c1, c2, count = 0; 1401 UErrorCode status = U_ZERO_ERROR; 1402 1403 do 1404 { 1405 c1 = i1.next(status); 1406 c2 = i2.next(status); 1407 1408 if (c1 != c2) 1409 { 1410 UnicodeString msg, msg1(" "); 1411 1412 msg += msg1 + count; 1413 msg += ": strength(0x"; 1414 appendHex(c1, 8, msg); 1415 msg += ") != strength(0x"; 1416 appendHex(c2, 8, msg); 1417 msg += ")"; 1418 1419 errln(msg); 1420 break; 1421 } 1422 1423 count += 1; 1424 } 1425 while (c1 != CollationElementIterator::NULLORDER); 1426} 1427 1428void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */) 1429{ 1430 if (exec) 1431 { 1432 logln("Collation Regression Tests: "); 1433 } 1434 1435 if(en_us == NULL) { 1436 dataerrln("Class collator not instantiated"); 1437 name = ""; 1438 return; 1439 } 1440 TESTCASE_AUTO_BEGIN; 1441 TESTCASE_AUTO(Test4048446); 1442 TESTCASE_AUTO(Test4051866); 1443 TESTCASE_AUTO(Test4053636); 1444 TESTCASE_AUTO(Test4054238); 1445 TESTCASE_AUTO(Test4054734); 1446 TESTCASE_AUTO(Test4054736); 1447 TESTCASE_AUTO(Test4058613); 1448 TESTCASE_AUTO(Test4059820); 1449 TESTCASE_AUTO(Test4060154); 1450 TESTCASE_AUTO(Test4062418); 1451 TESTCASE_AUTO(Test4065540); 1452 TESTCASE_AUTO(Test4066189); 1453 TESTCASE_AUTO(Test4066696); 1454 TESTCASE_AUTO(Test4076676); 1455 TESTCASE_AUTO(Test4078588); 1456 TESTCASE_AUTO(Test4079231); 1457 TESTCASE_AUTO(Test4081866); 1458 TESTCASE_AUTO(Test4087241); 1459 TESTCASE_AUTO(Test4087243); 1460 TESTCASE_AUTO(Test4092260); 1461 TESTCASE_AUTO(Test4095316); 1462 TESTCASE_AUTO(Test4101940); 1463 TESTCASE_AUTO(Test4103436); 1464 TESTCASE_AUTO(Test4114076); 1465 TESTCASE_AUTO(Test4114077); 1466 TESTCASE_AUTO(Test4124632); 1467 TESTCASE_AUTO(Test4132736); 1468 TESTCASE_AUTO(Test4133509); 1469 TESTCASE_AUTO(Test4139572); 1470 TESTCASE_AUTO(Test4141640); 1471 TESTCASE_AUTO(Test4146160); 1472 TESTCASE_AUTO(Test4179216); 1473 TESTCASE_AUTO(TestT7189); 1474 TESTCASE_AUTO(TestCaseFirstCompression); 1475 TESTCASE_AUTO(TestTrailingComment); 1476 TESTCASE_AUTO(TestBeforeWithTooStrongAfter); 1477 TESTCASE_AUTO_END; 1478} 1479 1480#endif /* #if !UCONFIG_NO_COLLATION */ 1481