1// Copyright (C) 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1997-2014, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 9/** 10 * IntlTestCollator is the medium level test class for everything in the directory "collate". 11 */ 12 13/*********************************************************************** 14* Modification history 15* Date Name Description 16* 02/14/2001 synwee Compare with cintltst and commented away tests 17* that are not run. 18***********************************************************************/ 19 20#include "unicode/utypes.h" 21 22#if !UCONFIG_NO_COLLATION 23 24#include "unicode/localpointer.h" 25#include "unicode/sortkey.h" 26#include "unicode/uchar.h" 27#include "unicode/ustring.h" 28 29#include "encoll.h" 30#include "frcoll.h" 31#include "decoll.h" 32#include "escoll.h" 33#include "ficoll.h" 34#include "jacoll.h" 35#include "trcoll.h" 36#include "allcoll.h" 37#include "g7coll.h" 38#include "mnkytst.h" 39#include "apicoll.h" 40#include "regcoll.h" 41#include "currcoll.h" 42#include "itercoll.h" 43#include "tstnorm.h" 44#include "normconf.h" 45#include "thcoll.h" 46#include "srchtest.h" 47#include "ssearch.h" 48#include "lcukocol.h" 49#include "ucaconf.h" 50#include "svccoll.h" 51#include "cmemory.h" 52#include "alphaindextst.h" 53 54// Set to 1 to test offsets in backAndForth() 55#define TEST_OFFSETS 0 56 57extern IntlTest *createCollationTest(); 58 59void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par ) 60{ 61 if (exec) { 62 logln("TestSuite Collator: "); 63 } 64 65 TESTCASE_AUTO_BEGIN; 66 TESTCASE_AUTO_CLASS(CollationEnglishTest); 67 TESTCASE_AUTO_CLASS(CollationFrenchTest); 68 TESTCASE_AUTO_CLASS(CollationGermanTest); 69 TESTCASE_AUTO_CLASS(CollationSpanishTest); 70 TESTCASE_AUTO_CLASS(CollationKanaTest); 71 TESTCASE_AUTO_CLASS(CollationTurkishTest); 72 TESTCASE_AUTO_CLASS(CollationDummyTest); 73 TESTCASE_AUTO_CLASS(G7CollationTest); 74 TESTCASE_AUTO_CLASS(CollationMonkeyTest); 75 TESTCASE_AUTO_CLASS(CollationAPITest); 76 TESTCASE_AUTO_CLASS(CollationRegressionTest); 77 TESTCASE_AUTO_CLASS(CollationCurrencyTest); 78 TESTCASE_AUTO_CLASS(CollationIteratorTest); 79 TESTCASE_AUTO_CLASS(CollationThaiTest); 80 TESTCASE_AUTO_CLASS(LotusCollationKoreanTest); 81 TESTCASE_AUTO_CLASS(StringSearchTest); 82 TESTCASE_AUTO_CLASS(UCAConformanceTest); 83 TESTCASE_AUTO_CLASS(CollationServiceTest); 84 TESTCASE_AUTO_CLASS(CollationFinnishTest); // removed by weiv - we have changed Finnish collation 85 TESTCASE_AUTO_CLASS(SSearchTest); 86#if !UCONFIG_NO_NORMALIZATION 87 TESTCASE_AUTO_CLASS(AlphabeticIndexTest); 88#endif 89 TESTCASE_AUTO_CREATE_CLASS(CollationTest); 90 TESTCASE_AUTO_END; 91} 92 93UCollationResult 94IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) { 95 int32_t partialSKResult = 0; 96 uint8_t sBuf[512], tBuf[512]; 97 UCharIterator sIter, tIter; 98 uint32_t sState[2], tState[2]; 99 int32_t sSize = pieceSize, tSize = pieceSize; 100 int32_t i = 0; 101 status = U_ZERO_ERROR; 102 sState[0] = 0; sState[1] = 0; 103 tState[0] = 0; tState[1] = 0; 104 while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) { 105 uiter_setString(&sIter, source, sLen); 106 uiter_setString(&tIter, target, tLen); 107 sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status); 108 tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status); 109 110 if(sState[0] != 0 || tState[0] != 0) { 111 log("State != 0 : %08X %08X\n", sState[0], tState[0]); 112 } 113 log("%i ", i++); 114 115 partialSKResult = memcmp(sBuf, tBuf, pieceSize); 116 } 117 118 if(partialSKResult < 0) { 119 return UCOL_LESS; 120 } else if(partialSKResult > 0) { 121 return UCOL_GREATER; 122 } else { 123 return UCOL_EQUAL; 124 } 125} 126 127void 128IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result) 129{ 130 UErrorCode status = U_ZERO_ERROR; 131 132 UCollator *myCollation = col->toUCollator(); 133 134 Collator::EComparisonResult compareResult = col->compare(source, target); 135 136 CollationKey srckey, tgtkey; 137 col->getCollationKey(source, srckey, status); 138 col->getCollationKey(target, tgtkey, status); 139 if (U_FAILURE(status)){ 140 errln("Creation of collation keys failed\n"); 141 } 142 Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey); 143 144 reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result); 145 146 UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status); 147 148 int32_t sLen = source.length(), tLen = target.length(); 149 const UChar* src = source.getBuffer(); 150 const UChar* trg = target.getBuffer(); 151 UCollationResult compareResultIter = (UCollationResult)result; 152 153 { 154 UCharIterator sIter, tIter; 155 uiter_setString(&sIter, src, sLen); 156 uiter_setString(&tIter, trg, tLen); 157 compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); 158 if(compareResultIter != (UCollationResult)result) { 159 errln("Different result for iterative comparison "+source+" "+target); 160 } 161 } 162 /* convert the strings to UTF-8 and do try comparing with char iterator */ 163 if(!quick) { /*!QUICK*/ 164 char utf8Source[256], utf8Target[256]; 165 int32_t utf8SourceLen = 0, utf8TargetLen = 0; 166 u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status); 167 if(U_FAILURE(status)) { /* probably buffer is not big enough */ 168 log("Src UTF-8 buffer too small! Will not compare!\n"); 169 } else { 170 u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status); 171 if(U_SUCCESS(status)) { /* probably buffer is not big enough */ 172 UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result; 173 UCharIterator sIter, tIter; 174 /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/ 175 uiter_setUTF8(&sIter, utf8Source, utf8SourceLen); 176 uiter_setUTF8(&tIter, utf8Target, utf8TargetLen); 177 /*uiter_setString(&sIter, source, sLen); 178 uiter_setString(&tIter, target, tLen);*/ 179 compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status); 180 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 181 sIter.move(&sIter, 0, UITER_START); 182 tIter.move(&tIter, 0, UITER_START); 183 compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status); 184 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); 185 if(compareResultUTF8 != compareResultIter) { 186 errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target); 187 } 188 if(compareResultUTF8 != compareResultUTF8Norm) { 189 errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target); 190 } 191 } else { 192 log("Target UTF-8 buffer too small! Did not compare!\n"); 193 } 194 if(U_FAILURE(status)) { 195 log("UTF-8 strcoll failed! Ignoring result\n"); 196 } 197 } 198 } 199 200 /* testing the partial sortkeys */ 201 { /*!QUICK*/ 202 int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */ 203 int32_t partialSizesSize = 1; 204 if(!quick) { 205 partialSizesSize = 7; 206 } 207 int32_t i = 0; 208 log("partial sortkey test piecesize="); 209 for(i = 0; i < partialSizesSize; i++) { 210 UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result; 211 log("%i ", partialSizes[i]); 212 213 partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status); 214 if(partialSKResult != (UCollationResult)result) { 215 errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")"); 216 } 217 218 if(norm != UCOL_ON && !quick) { 219 log("N "); 220 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 221 partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status); 222 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); 223 if(partialSKResult != partialNormalizedSKResult) { 224 errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")"); 225 } 226 } 227 } 228 log("\n"); 229 } 230/* 231 if (compareResult != result) { 232 errln("String comparison failed in variant test\n"); 233 } 234 if (keyResult != result) { 235 errln("Collation key comparison failed in variant test\n"); 236 } 237*/ 238} 239 240void 241IntlTestCollator::doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result) { 242 doTest(col, UnicodeString(source), UnicodeString(target), result); 243} 244 245void 246IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result) 247{ 248 if(col) { 249 doTestVariant(col, source, target, result); 250 if(result == Collator::LESS) { 251 doTestVariant(col, target, source, Collator::GREATER); 252 } else if (result == Collator::GREATER) { 253 doTestVariant(col, target, source, Collator::LESS); 254 } 255 256 UErrorCode status = U_ZERO_ERROR; 257 LocalPointer<CollationElementIterator> c(((RuleBasedCollator *)col)->createCollationElementIterator(source)); 258 logln("Testing iterating source: "+source); 259 backAndForth(*c); 260 c->setText(target, status); 261 logln("Testing iterating target: "+target); 262 backAndForth(*c); 263 } 264} 265 266 267// used for collation result reporting, defined here for convenience 268// (maybe moved later) 269void 270IntlTestCollator::reportCResult( const UnicodeString &source, const UnicodeString &target, 271 CollationKey &sourceKey, CollationKey &targetKey, 272 Collator::EComparisonResult compareResult, 273 Collator::EComparisonResult keyResult, 274 Collator::EComparisonResult incResult, 275 Collator::EComparisonResult expectedResult ) 276{ 277 if (expectedResult < -1 || expectedResult > 1) 278 { 279 errln("***** invalid call to reportCResult ****"); 280 return; 281 } 282 283 UBool ok1 = (compareResult == expectedResult); 284 UBool ok2 = (keyResult == expectedResult); 285 UBool ok3 = (incResult == expectedResult); 286 287 288 if (ok1 && ok2 && ok3 && !verbose) { 289 // Keep non-verbose, passing tests fast 290 return; 291 } else { 292 UnicodeString msg1(ok1 ? "Ok: compare(" : "FAIL: compare("); 293 UnicodeString msg2(", "), msg3(") returned "), msg4("; expected "); 294 UnicodeString prettySource, prettyTarget, sExpect, sResult; 295 296 IntlTest::prettify(source, prettySource); 297 IntlTest::prettify(target, prettyTarget); 298 appendCompareResult(compareResult, sResult); 299 appendCompareResult(expectedResult, sExpect); 300 301 if (ok1) { 302 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult); 303 } else { 304 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect); 305 } 306 307 msg1 = UnicodeString(ok2 ? "Ok: key(" : "FAIL: key("); 308 msg2 = ").compareTo(key("; 309 msg3 = ")) returned "; 310 311 appendCompareResult(keyResult, sResult); 312 313 if (ok2) { 314 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult); 315 } else { 316 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect); 317 318 msg1 = " "; 319 msg2 = " vs. "; 320 321 prettify(sourceKey, prettySource); 322 prettify(targetKey, prettyTarget); 323 324 errln(msg1 + prettySource + msg2 + prettyTarget); 325 } 326 msg1 = UnicodeString (ok3 ? "Ok: incCompare(" : "FAIL: incCompare("); 327 msg2 = ", "; 328 msg3 = ") returned "; 329 330 appendCompareResult(incResult, sResult); 331 332 if (ok3) { 333 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult); 334 } else { 335 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect); 336 } 337 } 338} 339 340UnicodeString& 341IntlTestCollator::appendCompareResult(Collator::EComparisonResult result, 342 UnicodeString& target) 343{ 344 if (result == Collator::LESS) 345 { 346 target += "LESS"; 347 } 348 else if (result == Collator::EQUAL) 349 { 350 target += "EQUAL"; 351 } 352 else if (result == Collator::GREATER) 353 { 354 target += "GREATER"; 355 } 356 else 357 { 358 UnicodeString huh = "?"; 359 360 target += (huh + (int32_t)result); 361 } 362 363 return target; 364} 365 366// Produce a printable representation of a CollationKey 367UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeString &target) 368{ 369 int32_t i, byteCount; 370 const uint8_t *bytes = source.getByteArray(byteCount); 371 372 target.remove(); 373 target += "["; 374 375 for (i = 0; i < byteCount; i += 1) 376 { 377 if (i != 0) { 378 target += " "; 379 } 380 appendHex(bytes[i], 2, target); 381 } 382 383 target += "]"; 384 385 return target; 386} 387 388void IntlTestCollator::backAndForth(CollationElementIterator &iter) 389{ 390 // Run through the iterator forwards and stick it into an array 391 int32_t orderLength = 0; 392 LocalArray<Order> orders(getOrders(iter, orderLength)); 393 UErrorCode status = U_ZERO_ERROR; 394 395 // Now go through it backwards and make sure we get the same values 396 int32_t index = orderLength; 397 int32_t o; 398 399 // reset the iterator 400 iter.reset(); 401 402 while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER) 403 { 404 /*int32_t offset = */iter.getOffset(); 405 406 if (index == 0) { 407 if(o == 0) { 408 continue; 409 } else { // this is an error, orders exhausted but there are non-ignorable CEs from 410 // going backwards 411 errln("Backward iteration returned a non ignorable after orders are exhausted"); 412 break; 413 } 414 } 415 416 index -= 1; 417 if (o != orders[index].order) { 418 if (o == 0) 419 index += 1; 420 else { 421 while (index > 0 && orders[--index].order == 0) { 422 // nothing... 423 } 424 425 if (o != orders[index].order) { 426 errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index, 427 orders[index].order, o); 428 //break; 429 return; 430 } 431 } 432 } 433 434#if TEST_OFFSETS 435 if (offset != orders[index].offset) { 436 errln("Mismatched offset at index %d: %d vs. %d", index, 437 orders[index].offset, offset); 438 //break; 439 return; 440 } 441#endif 442 443 } 444 445 while (index != 0 && orders[index - 1].order == 0) 446 { 447 index --; 448 } 449 450 if (index != 0) 451 { 452 UnicodeString msg("Didn't get back to beginning - index is "); 453 errln(msg + index); 454 455 iter.reset(); 456 err("next: "); 457 while ((o = iter.next(status)) != CollationElementIterator::NULLORDER) 458 { 459 UnicodeString hexString("0x"); 460 461 appendHex(o, 8, hexString); 462 hexString += " "; 463 err(hexString); 464 } 465 errln(""); 466 467 err("prev: "); 468 while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER) 469 { 470 UnicodeString hexString("0x"); 471 472 appendHex(o, 8, hexString); 473 hexString += " "; 474 err(hexString); 475 } 476 errln(""); 477 } 478} 479 480 481/** 482 * Return an integer array containing all of the collation orders 483 * returned by calls to next on the specified iterator 484 */ 485IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength) 486{ 487 int32_t maxSize = 100; 488 int32_t size = 0; 489 LocalArray<Order> orders(new Order[maxSize]); 490 UErrorCode status = U_ZERO_ERROR; 491 int32_t offset = iter.getOffset(); 492 493 int32_t order; 494 while ((order = iter.next(status)) != CollationElementIterator::NULLORDER) 495 { 496 if (size == maxSize) 497 { 498 maxSize *= 2; 499 Order *temp = new Order[maxSize]; 500 501 uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order)); 502 orders.adoptInstead(temp); 503 } 504 505 orders[size].order = order; 506 orders[size].offset = offset; 507 508 offset = iter.getOffset(); 509 size += 1; 510 } 511 if (U_FAILURE(status)) { 512 errln("CollationElementIterator.next() failed - %s", 513 u_errorName(status)); 514 } 515 516 if (maxSize > size) 517 { 518 Order *temp = new Order[size]; 519 520 uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order)); 521 orders.adoptInstead(temp); 522 } 523 524 orderLength = size; 525 return orders.orphan(); 526} 527 528#endif /* #if !UCONFIG_NO_COLLATION */ 529