tscoll.cpp revision c73f511526464f8e56c242df80552e9b0d94ae3d
1/******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2014, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 7/** 8 * IntlTestCollator is the medium level test class for everything in the directory "collate". 9 */ 10 11/*********************************************************************** 12* Modification history 13* Date Name Description 14* 02/14/2001 synwee Compare with cintltst and commented away tests 15* that are not run. 16***********************************************************************/ 17 18#include "unicode/utypes.h" 19 20#if !UCONFIG_NO_COLLATION 21 22#include "unicode/localpointer.h" 23#include "unicode/sortkey.h" 24#include "unicode/uchar.h" 25#include "unicode/ustring.h" 26 27#include "encoll.h" 28#include "frcoll.h" 29#include "decoll.h" 30#include "escoll.h" 31#include "ficoll.h" 32#include "jacoll.h" 33#include "trcoll.h" 34#include "allcoll.h" 35#include "g7coll.h" 36#include "mnkytst.h" 37#include "apicoll.h" 38#include "regcoll.h" 39#include "currcoll.h" 40#include "itercoll.h" 41#include "tstnorm.h" 42#include "normconf.h" 43#include "thcoll.h" 44#include "srchtest.h" 45#include "ssearch.h" 46#include "lcukocol.h" 47#include "ucaconf.h" 48#include "svccoll.h" 49#include "cmemory.h" 50#include "alphaindextst.h" 51 52// Set to 1 to test offsets in backAndForth() 53#define TEST_OFFSETS 0 54 55extern IntlTest *createCollationTest(); 56 57void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par ) 58{ 59 if (exec) { 60 logln("TestSuite Collator: "); 61 } 62 63 TESTCASE_AUTO_BEGIN; 64 TESTCASE_AUTO_CLASS(CollationEnglishTest); 65 TESTCASE_AUTO_CLASS(CollationFrenchTest); 66 TESTCASE_AUTO_CLASS(CollationGermanTest); 67 TESTCASE_AUTO_CLASS(CollationSpanishTest); 68 TESTCASE_AUTO_CLASS(CollationKanaTest); 69 TESTCASE_AUTO_CLASS(CollationTurkishTest); 70 TESTCASE_AUTO_CLASS(CollationDummyTest); 71 TESTCASE_AUTO_CLASS(G7CollationTest); 72 TESTCASE_AUTO_CLASS(CollationMonkeyTest); 73 TESTCASE_AUTO_CLASS(CollationAPITest); 74 TESTCASE_AUTO_CLASS(CollationRegressionTest); 75 TESTCASE_AUTO_CLASS(CollationCurrencyTest); 76 TESTCASE_AUTO_CLASS(CollationIteratorTest); 77 TESTCASE_AUTO_CLASS(CollationThaiTest); 78 TESTCASE_AUTO_CLASS(LotusCollationKoreanTest); 79 TESTCASE_AUTO_CLASS(StringSearchTest); 80 TESTCASE_AUTO_CLASS(UCAConformanceTest); 81 TESTCASE_AUTO_CLASS(CollationServiceTest); 82 TESTCASE_AUTO_CLASS(CollationFinnishTest); // removed by weiv - we have changed Finnish collation 83 TESTCASE_AUTO_CLASS(SSearchTest); 84#if !UCONFIG_NO_NORMALIZATION 85 TESTCASE_AUTO_CLASS(AlphabeticIndexTest); 86#endif 87 TESTCASE_AUTO_CREATE_CLASS(CollationTest); 88 TESTCASE_AUTO_END; 89} 90 91UCollationResult 92IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) { 93 int32_t partialSKResult = 0; 94 uint8_t sBuf[512], tBuf[512]; 95 UCharIterator sIter, tIter; 96 uint32_t sState[2], tState[2]; 97 int32_t sSize = pieceSize, tSize = pieceSize; 98 int32_t i = 0; 99 status = U_ZERO_ERROR; 100 sState[0] = 0; sState[1] = 0; 101 tState[0] = 0; tState[1] = 0; 102 while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) { 103 uiter_setString(&sIter, source, sLen); 104 uiter_setString(&tIter, target, tLen); 105 sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status); 106 tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status); 107 108 if(sState[0] != 0 || tState[0] != 0) { 109 log("State != 0 : %08X %08X\n", sState[0], tState[0]); 110 } 111 log("%i ", i++); 112 113 partialSKResult = memcmp(sBuf, tBuf, pieceSize); 114 } 115 116 if(partialSKResult < 0) { 117 return UCOL_LESS; 118 } else if(partialSKResult > 0) { 119 return UCOL_GREATER; 120 } else { 121 return UCOL_EQUAL; 122 } 123} 124 125void 126IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result) 127{ 128 UErrorCode status = U_ZERO_ERROR; 129 130 UCollator *myCollation = col->toUCollator(); 131 132 Collator::EComparisonResult compareResult = col->compare(source, target); 133 134 CollationKey srckey, tgtkey; 135 col->getCollationKey(source, srckey, status); 136 col->getCollationKey(target, tgtkey, status); 137 if (U_FAILURE(status)){ 138 errln("Creation of collation keys failed\n"); 139 } 140 Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey); 141 142 reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result); 143 144 UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status); 145 146 int32_t sLen = source.length(), tLen = target.length(); 147 const UChar* src = source.getBuffer(); 148 const UChar* trg = target.getBuffer(); 149 UCollationResult compareResultIter = (UCollationResult)result; 150 151 { 152 UCharIterator sIter, tIter; 153 uiter_setString(&sIter, src, sLen); 154 uiter_setString(&tIter, trg, tLen); 155 compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); 156 if(compareResultIter != (UCollationResult)result) { 157 errln("Different result for iterative comparison "+source+" "+target); 158 } 159 } 160 /* convert the strings to UTF-8 and do try comparing with char iterator */ 161 if(!quick) { /*!QUICK*/ 162 char utf8Source[256], utf8Target[256]; 163 int32_t utf8SourceLen = 0, utf8TargetLen = 0; 164 u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status); 165 if(U_FAILURE(status)) { /* probably buffer is not big enough */ 166 log("Src UTF-8 buffer too small! Will not compare!\n"); 167 } else { 168 u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status); 169 if(U_SUCCESS(status)) { /* probably buffer is not big enough */ 170 UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result; 171 UCharIterator sIter, tIter; 172 /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/ 173 uiter_setUTF8(&sIter, utf8Source, utf8SourceLen); 174 uiter_setUTF8(&tIter, utf8Target, utf8TargetLen); 175 /*uiter_setString(&sIter, source, sLen); 176 uiter_setString(&tIter, target, tLen);*/ 177 compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status); 178 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 179 sIter.move(&sIter, 0, UITER_START); 180 tIter.move(&tIter, 0, UITER_START); 181 compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status); 182 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); 183 if(compareResultUTF8 != compareResultIter) { 184 errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target); 185 } 186 if(compareResultUTF8 != compareResultUTF8Norm) { 187 errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target); 188 } 189 } else { 190 log("Target UTF-8 buffer too small! Did not compare!\n"); 191 } 192 if(U_FAILURE(status)) { 193 log("UTF-8 strcoll failed! Ignoring result\n"); 194 } 195 } 196 } 197 198 /* testing the partial sortkeys */ 199 { /*!QUICK*/ 200 int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */ 201 int32_t partialSizesSize = 1; 202 if(!quick) { 203 partialSizesSize = 7; 204 } 205 int32_t i = 0; 206 log("partial sortkey test piecesize="); 207 for(i = 0; i < partialSizesSize; i++) { 208 UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result; 209 log("%i ", partialSizes[i]); 210 211 partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status); 212 if(partialSKResult != (UCollationResult)result) { 213 errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")"); 214 } 215 216 if(norm != UCOL_ON && !quick) { 217 log("N "); 218 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); 219 partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status); 220 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); 221 if(partialSKResult != partialNormalizedSKResult) { 222 errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")"); 223 } 224 } 225 } 226 log("\n"); 227 } 228/* 229 if (compareResult != result) { 230 errln("String comparison failed in variant test\n"); 231 } 232 if (keyResult != result) { 233 errln("Collation key comparison failed in variant test\n"); 234 } 235*/ 236} 237 238void 239IntlTestCollator::doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result) { 240 doTest(col, UnicodeString(source), UnicodeString(target), result); 241} 242 243void 244IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result) 245{ 246 if(col) { 247 doTestVariant(col, source, target, result); 248 if(result == Collator::LESS) { 249 doTestVariant(col, target, source, Collator::GREATER); 250 } else if (result == Collator::GREATER) { 251 doTestVariant(col, target, source, Collator::LESS); 252 } 253 254 UErrorCode status = U_ZERO_ERROR; 255 LocalPointer<CollationElementIterator> c(((RuleBasedCollator *)col)->createCollationElementIterator(source)); 256 logln("Testing iterating source: "+source); 257 backAndForth(*c); 258 c->setText(target, status); 259 logln("Testing iterating target: "+target); 260 backAndForth(*c); 261 } 262} 263 264 265// used for collation result reporting, defined here for convenience 266// (maybe moved later) 267void 268IntlTestCollator::reportCResult( const UnicodeString &source, const UnicodeString &target, 269 CollationKey &sourceKey, CollationKey &targetKey, 270 Collator::EComparisonResult compareResult, 271 Collator::EComparisonResult keyResult, 272 Collator::EComparisonResult incResult, 273 Collator::EComparisonResult expectedResult ) 274{ 275 if (expectedResult < -1 || expectedResult > 1) 276 { 277 errln("***** invalid call to reportCResult ****"); 278 return; 279 } 280 281 UBool ok1 = (compareResult == expectedResult); 282 UBool ok2 = (keyResult == expectedResult); 283 UBool ok3 = (incResult == expectedResult); 284 285 286 if (ok1 && ok2 && ok3 && !verbose) { 287 // Keep non-verbose, passing tests fast 288 return; 289 } else { 290 UnicodeString msg1(ok1 ? "Ok: compare(" : "FAIL: compare("); 291 UnicodeString msg2(", "), msg3(") returned "), msg4("; expected "); 292 UnicodeString prettySource, prettyTarget, sExpect, sResult; 293 294 IntlTest::prettify(source, prettySource); 295 IntlTest::prettify(target, prettyTarget); 296 appendCompareResult(compareResult, sResult); 297 appendCompareResult(expectedResult, sExpect); 298 299 if (ok1) { 300 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult); 301 } else { 302 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect); 303 } 304 305 msg1 = UnicodeString(ok2 ? "Ok: key(" : "FAIL: key("); 306 msg2 = ").compareTo(key("; 307 msg3 = ")) returned "; 308 309 appendCompareResult(keyResult, sResult); 310 311 if (ok2) { 312 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult); 313 } else { 314 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect); 315 316 msg1 = " "; 317 msg2 = " vs. "; 318 319 prettify(sourceKey, prettySource); 320 prettify(targetKey, prettyTarget); 321 322 errln(msg1 + prettySource + msg2 + prettyTarget); 323 } 324 msg1 = UnicodeString (ok3 ? "Ok: incCompare(" : "FAIL: incCompare("); 325 msg2 = ", "; 326 msg3 = ") returned "; 327 328 appendCompareResult(incResult, sResult); 329 330 if (ok3) { 331 logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult); 332 } else { 333 errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect); 334 } 335 } 336} 337 338UnicodeString& 339IntlTestCollator::appendCompareResult(Collator::EComparisonResult result, 340 UnicodeString& target) 341{ 342 if (result == Collator::LESS) 343 { 344 target += "LESS"; 345 } 346 else if (result == Collator::EQUAL) 347 { 348 target += "EQUAL"; 349 } 350 else if (result == Collator::GREATER) 351 { 352 target += "GREATER"; 353 } 354 else 355 { 356 UnicodeString huh = "?"; 357 358 target += (huh + (int32_t)result); 359 } 360 361 return target; 362} 363 364// Produce a printable representation of a CollationKey 365UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeString &target) 366{ 367 int32_t i, byteCount; 368 const uint8_t *bytes = source.getByteArray(byteCount); 369 370 target.remove(); 371 target += "["; 372 373 for (i = 0; i < byteCount; i += 1) 374 { 375 if (i != 0) { 376 target += " "; 377 } 378 appendHex(bytes[i], 2, target); 379 } 380 381 target += "]"; 382 383 return target; 384} 385 386void IntlTestCollator::backAndForth(CollationElementIterator &iter) 387{ 388 // Run through the iterator forwards and stick it into an array 389 int32_t orderLength = 0; 390 LocalArray<Order> orders(getOrders(iter, orderLength)); 391 UErrorCode status = U_ZERO_ERROR; 392 393 // Now go through it backwards and make sure we get the same values 394 int32_t index = orderLength; 395 int32_t o; 396 397 // reset the iterator 398 iter.reset(); 399 400 while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER) 401 { 402 /*int32_t offset = */iter.getOffset(); 403 404 if (index == 0) { 405 if(o == 0) { 406 continue; 407 } else { // this is an error, orders exhausted but there are non-ignorable CEs from 408 // going backwards 409 errln("Backward iteration returned a non ignorable after orders are exhausted"); 410 break; 411 } 412 } 413 414 index -= 1; 415 if (o != orders[index].order) { 416 if (o == 0) 417 index += 1; 418 else { 419 while (index > 0 && orders[--index].order == 0) { 420 // nothing... 421 } 422 423 if (o != orders[index].order) { 424 errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index, 425 orders[index].order, o); 426 //break; 427 return; 428 } 429 } 430 } 431 432#if TEST_OFFSETS 433 if (offset != orders[index].offset) { 434 errln("Mismatched offset at index %d: %d vs. %d", index, 435 orders[index].offset, offset); 436 //break; 437 return; 438 } 439#endif 440 441 } 442 443 while (index != 0 && orders[index - 1].order == 0) 444 { 445 index --; 446 } 447 448 if (index != 0) 449 { 450 UnicodeString msg("Didn't get back to beginning - index is "); 451 errln(msg + index); 452 453 iter.reset(); 454 err("next: "); 455 while ((o = iter.next(status)) != CollationElementIterator::NULLORDER) 456 { 457 UnicodeString hexString("0x"); 458 459 appendHex(o, 8, hexString); 460 hexString += " "; 461 err(hexString); 462 } 463 errln(""); 464 465 err("prev: "); 466 while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER) 467 { 468 UnicodeString hexString("0x"); 469 470 appendHex(o, 8, hexString); 471 hexString += " "; 472 err(hexString); 473 } 474 errln(""); 475 } 476} 477 478 479/** 480 * Return an integer array containing all of the collation orders 481 * returned by calls to next on the specified iterator 482 */ 483IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength) 484{ 485 int32_t maxSize = 100; 486 int32_t size = 0; 487 LocalArray<Order> orders(new Order[maxSize]); 488 UErrorCode status = U_ZERO_ERROR; 489 int32_t offset = iter.getOffset(); 490 491 int32_t order; 492 while ((order = iter.next(status)) != CollationElementIterator::NULLORDER) 493 { 494 if (size == maxSize) 495 { 496 maxSize *= 2; 497 Order *temp = new Order[maxSize]; 498 499 uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order)); 500 orders.adoptInstead(temp); 501 } 502 503 orders[size].order = order; 504 orders[size].offset = offset; 505 506 offset = iter.getOffset(); 507 size += 1; 508 } 509 if (U_FAILURE(status)) { 510 errln("CollationElementIterator.next() failed - %s", 511 u_errorName(status)); 512 } 513 514 if (maxSize > size) 515 { 516 Order *temp = new Order[size]; 517 518 uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order)); 519 orders.adoptInstead(temp); 520 } 521 522 orderLength = size; 523 return orders.orphan(); 524} 525 526#endif /* #if !UCONFIG_NO_COLLATION */ 527