1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4******************************************************************************* 5* Copyright (C) 2010-2014, International Business Machines 6* Corporation and others. All Rights Reserved. 7******************************************************************************* 8* file name: bytetrietest.cpp 9* encoding: UTF-8 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2010nov16 14* created by: Markus W. Scherer 15*/ 16 17#include <string.h> 18 19#include "unicode/utypes.h" 20#include "unicode/bytestrie.h" 21#include "unicode/bytestriebuilder.h" 22#include "unicode/localpointer.h" 23#include "unicode/stringpiece.h" 24#include "intltest.h" 25#include "cmemory.h" 26 27struct StringAndValue { 28 const char *s; 29 int32_t value; 30}; 31 32class BytesTrieTest : public IntlTest { 33public: 34 BytesTrieTest(); 35 virtual ~BytesTrieTest(); 36 37 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); 38 void TestBuilder(); 39 void TestEmpty(); 40 void Test_a(); 41 void Test_a_ab(); 42 void TestShortestBranch(); 43 void TestBranches(); 44 void TestLongSequence(); 45 void TestLongBranch(); 46 void TestValuesForState(); 47 void TestCompact(); 48 49 BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption); 50 void TestHasUniqueValue(); 51 void TestGetNextBytes(); 52 void TestIteratorFromBranch(); 53 void TestIteratorFromLinearMatch(); 54 void TestTruncatingIteratorFromRoot(); 55 void TestTruncatingIteratorFromLinearMatchShort(); 56 void TestTruncatingIteratorFromLinearMatchLong(); 57 void TestIteratorFromBytes(); 58 void TestFailedIterator(); 59 60 void checkData(const StringAndValue data[], int32_t dataLength); 61 void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption); 62 BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength, 63 UStringTrieBuildOption buildOption); 64 void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); 65 void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); 66 void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); 67 void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength); 68 void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength); 69 void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength); 70 71private: 72 BytesTrieBuilder *builder_; 73}; 74 75extern IntlTest *createBytesTrieTest() { 76 return new BytesTrieTest(); 77} 78 79BytesTrieTest::BytesTrieTest() : builder_(NULL) { 80 IcuTestErrorCode errorCode(*this, "BytesTrieTest()"); 81 builder_=new BytesTrieBuilder(errorCode); 82} 83 84BytesTrieTest::~BytesTrieTest() { 85 delete builder_; 86} 87 88void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { 89 if(exec) { 90 logln("TestSuite BytesTrieTest: "); 91 } 92 TESTCASE_AUTO_BEGIN; 93 TESTCASE_AUTO(TestBuilder); 94 TESTCASE_AUTO(TestEmpty); 95 TESTCASE_AUTO(Test_a); 96 TESTCASE_AUTO(Test_a_ab); 97 TESTCASE_AUTO(TestShortestBranch); 98 TESTCASE_AUTO(TestBranches); 99 TESTCASE_AUTO(TestLongSequence); 100 TESTCASE_AUTO(TestLongBranch); 101 TESTCASE_AUTO(TestValuesForState); 102 TESTCASE_AUTO(TestCompact); 103 TESTCASE_AUTO(TestHasUniqueValue); 104 TESTCASE_AUTO(TestGetNextBytes); 105 TESTCASE_AUTO(TestIteratorFromBranch); 106 TESTCASE_AUTO(TestIteratorFromLinearMatch); 107 TESTCASE_AUTO(TestTruncatingIteratorFromRoot); 108 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort); 109 TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong); 110 TESTCASE_AUTO(TestIteratorFromBytes); 111 TESTCASE_AUTO(TestFailedIterator); 112 TESTCASE_AUTO_END; 113} 114 115void BytesTrieTest::TestBuilder() { 116 IcuTestErrorCode errorCode(*this, "TestBuilder()"); 117 builder_->clear(); 118 delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode); 119 if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) { 120 errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR"); 121 return; 122 } 123 // TODO: remove .build(...) once add() checks for duplicates. 124 builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode); 125 if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) { 126 errln("BytesTrieBuilder.add() did not detect duplicates"); 127 return; 128 } 129} 130 131void BytesTrieTest::TestEmpty() { 132 static const StringAndValue data[]={ 133 { "", 0 } 134 }; 135 checkData(data, UPRV_LENGTHOF(data)); 136} 137 138void BytesTrieTest::Test_a() { 139 static const StringAndValue data[]={ 140 { "a", 1 } 141 }; 142 checkData(data, UPRV_LENGTHOF(data)); 143} 144 145void BytesTrieTest::Test_a_ab() { 146 static const StringAndValue data[]={ 147 { "a", 1 }, 148 { "ab", 100 } 149 }; 150 checkData(data, UPRV_LENGTHOF(data)); 151} 152 153void BytesTrieTest::TestShortestBranch() { 154 static const StringAndValue data[]={ 155 { "a", 1000 }, 156 { "b", 2000 } 157 }; 158 checkData(data, UPRV_LENGTHOF(data)); 159} 160 161void BytesTrieTest::TestBranches() { 162 static const StringAndValue data[]={ 163 { "a", 0x10 }, 164 { "cc", 0x40 }, 165 { "e", 0x100 }, 166 { "ggg", 0x400 }, 167 { "i", 0x1000 }, 168 { "kkkk", 0x4000 }, 169 { "n", 0x10000 }, 170 { "ppppp", 0x40000 }, 171 { "r", 0x100000 }, 172 { "sss", 0x200000 }, 173 { "t", 0x400000 }, 174 { "uu", 0x800000 }, 175 { "vv", 0x7fffffff }, 176 { "zz", (int32_t)0x80000000 } 177 }; 178 for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) { 179 logln("TestBranches length=%d", (int)length); 180 checkData(data, length); 181 } 182} 183 184void BytesTrieTest::TestLongSequence() { 185 static const StringAndValue data[]={ 186 { "a", -1 }, 187 // sequence of linear-match nodes 188 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 }, 189 // more than 256 bytes 190 { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 191 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 192 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 193 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 194 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 195 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 } 196 }; 197 checkData(data, UPRV_LENGTHOF(data)); 198} 199 200void BytesTrieTest::TestLongBranch() { 201 // Split-branch and interesting compact-integer values. 202 static const StringAndValue data[]={ 203 { "a", -2 }, 204 { "b", -1 }, 205 { "c", 0 }, 206 { "d2", 1 }, 207 { "f", 0x3f }, 208 { "g", 0x40 }, 209 { "h", 0x41 }, 210 { "j23", 0x1900 }, 211 { "j24", 0x19ff }, 212 { "j25", 0x1a00 }, 213 { "k2", 0x1a80 }, 214 { "k3", 0x1aff }, 215 { "l234567890", 0x1b00 }, 216 { "l234567890123", 0x1b01 }, 217 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff }, 218 { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 }, 219 { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 }, 220 { "r", 0x333333 }, 221 { "s2345", 0x4444444 }, 222 { "t234567890", 0x77777777 }, 223 { "z", (int32_t)0x80000001 } 224 }; 225 checkData(data, UPRV_LENGTHOF(data)); 226} 227 228void BytesTrieTest::TestValuesForState() { 229 // Check that saveState() and resetToState() interact properly 230 // with next() and current(). 231 static const StringAndValue data[]={ 232 { "a", -1 }, 233 { "ab", -2 }, 234 { "abc", -3 }, 235 { "abcd", -4 }, 236 { "abcde", -5 }, 237 { "abcdef", -6 } 238 }; 239 checkData(data, UPRV_LENGTHOF(data)); 240} 241 242void BytesTrieTest::TestCompact() { 243 // Duplicate trailing strings and values provide opportunities for compacting. 244 static const StringAndValue data[]={ 245 { "+", 0 }, 246 { "+august", 8 }, 247 { "+december", 12 }, 248 { "+july", 7 }, 249 { "+june", 6 }, 250 { "+november", 11 }, 251 { "+october", 10 }, 252 { "+september", 9 }, 253 { "-", 0 }, 254 { "-august", 8 }, 255 { "-december", 12 }, 256 { "-july", 7 }, 257 { "-june", 6 }, 258 { "-november", 11 }, 259 { "-october", 10 }, 260 { "-september", 9 }, 261 // The l+n branch (with its sub-nodes) is a duplicate but will be written 262 // both times because each time it follows a different linear-match node. 263 { "xjuly", 7 }, 264 { "xjune", 6 } 265 }; 266 checkData(data, UPRV_LENGTHOF(data)); 267} 268 269BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) { 270 // All types of nodes leading to the same value, 271 // for code coverage of recursive functions. 272 // In particular, we need a lot of branches on some single level 273 // to exercise a split-branch node. 274 static const StringAndValue data[]={ 275 { "august", 8 }, 276 { "jan", 1 }, 277 { "jan.", 1 }, 278 { "jana", 1 }, 279 { "janbb", 1 }, 280 { "janc", 1 }, 281 { "janddd", 1 }, 282 { "janee", 1 }, 283 { "janef", 1 }, 284 { "janf", 1 }, 285 { "jangg", 1 }, 286 { "janh", 1 }, 287 { "janiiii", 1 }, 288 { "janj", 1 }, 289 { "jankk", 1 }, 290 { "jankl", 1 }, 291 { "jankmm", 1 }, 292 { "janl", 1 }, 293 { "janm", 1 }, 294 { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, 295 { "jano", 1 }, 296 { "janpp", 1 }, 297 { "janqqq", 1 }, 298 { "janr", 1 }, 299 { "januar", 1 }, 300 { "january", 1 }, 301 { "july", 7 }, 302 { "jun", 6 }, 303 { "jun.", 6 }, 304 { "june", 6 } 305 }; 306 return buildTrie(data, UPRV_LENGTHOF(data), buildOption); 307} 308 309void BytesTrieTest::TestHasUniqueValue() { 310 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 311 if(trie.isNull()) { 312 return; // buildTrie() reported an error 313 } 314 int32_t uniqueValue; 315 if(trie->hasUniqueValue(uniqueValue)) { 316 errln("unique value at root"); 317 } 318 trie->next('j'); 319 trie->next('a'); 320 trie->next('n'); 321 // hasUniqueValue() directly after next() 322 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) { 323 errln("not unique value 1 after \"jan\""); 324 } 325 trie->first('j'); 326 trie->next('u'); 327 if(trie->hasUniqueValue(uniqueValue)) { 328 errln("unique value after \"ju\""); 329 } 330 if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) { 331 errln("not normal value 6 after \"jun\""); 332 } 333 // hasUniqueValue() after getValue() 334 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) { 335 errln("not unique value 6 after \"jun\""); 336 } 337 // hasUniqueValue() from within a linear-match node 338 trie->first('a'); 339 trie->next('u'); 340 if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) { 341 errln("not unique value 8 after \"au\""); 342 } 343} 344 345void BytesTrieTest::TestGetNextBytes() { 346 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); 347 if(trie.isNull()) { 348 return; // buildTrie() reported an error 349 } 350 char buffer[40]; 351 CheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer)); 352 int32_t count=trie->getNextBytes(sink); 353 if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') { 354 errln("months getNextBytes()!=[aj] at root"); 355 } 356 trie->next('j'); 357 trie->next('a'); 358 trie->next('n'); 359 // getNextBytes() directly after next() 360 count=trie->getNextBytes(sink.Reset()); 361 buffer[count]=0; 362 if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) { 363 errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\""); 364 } 365 // getNextBytes() after getValue() 366 trie->getValue(); // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE. 367 memset(buffer, 0, sizeof(buffer)); 368 count=trie->getNextBytes(sink.Reset()); 369 if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) { 370 errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()"); 371 } 372 // getNextBytes() from a linear-match node 373 trie->next('u'); 374 memset(buffer, 0, sizeof(buffer)); 375 count=trie->getNextBytes(sink.Reset()); 376 if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') { 377 errln("months getNextBytes()!=[a] after \"janu\""); 378 } 379 trie->next('a'); 380 memset(buffer, 0, sizeof(buffer)); 381 count=trie->getNextBytes(sink.Reset()); 382 if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') { 383 errln("months getNextBytes()!=[r] after \"janua\""); 384 } 385 trie->next('r'); 386 trie->next('y'); 387 // getNextBytes() after a final match 388 count=trie->getNextBytes(sink.Reset()); 389 if(count!=0 || sink.NumberOfBytesAppended()!=0) { 390 errln("months getNextBytes()!=[] after \"january\""); 391 } 392} 393 394void BytesTrieTest::TestIteratorFromBranch() { 395 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 396 if(trie.isNull()) { 397 return; // buildTrie() reported an error 398 } 399 // Go to a branch node. 400 trie->next('j'); 401 trie->next('a'); 402 trie->next('n'); 403 IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()"); 404 BytesTrie::Iterator iter(*trie, 0, errorCode); 405 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 406 return; 407 } 408 // Expected data: Same as in buildMonthsTrie(), except only the suffixes 409 // following "jan". 410 static const StringAndValue data[]={ 411 { "", 1 }, 412 { ".", 1 }, 413 { "a", 1 }, 414 { "bb", 1 }, 415 { "c", 1 }, 416 { "ddd", 1 }, 417 { "ee", 1 }, 418 { "ef", 1 }, 419 { "f", 1 }, 420 { "gg", 1 }, 421 { "h", 1 }, 422 { "iiii", 1 }, 423 { "j", 1 }, 424 { "kk", 1 }, 425 { "kl", 1 }, 426 { "kmm", 1 }, 427 { "l", 1 }, 428 { "m", 1 }, 429 { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 }, 430 { "o", 1 }, 431 { "pp", 1 }, 432 { "qqq", 1 }, 433 { "r", 1 }, 434 { "uar", 1 }, 435 { "uary", 1 } 436 }; 437 checkIterator(iter, data, UPRV_LENGTHOF(data)); 438 // Reset, and we should get the same result. 439 logln("after iter.reset()"); 440 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); 441} 442 443void BytesTrieTest::TestIteratorFromLinearMatch() { 444 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL)); 445 if(trie.isNull()) { 446 return; // buildTrie() reported an error 447 } 448 // Go into a linear-match node. 449 trie->next('j'); 450 trie->next('a'); 451 trie->next('n'); 452 trie->next('u'); 453 trie->next('a'); 454 IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()"); 455 BytesTrie::Iterator iter(*trie, 0, errorCode); 456 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 457 return; 458 } 459 // Expected data: Same as in buildMonthsTrie(), except only the suffixes 460 // following "janua". 461 static const StringAndValue data[]={ 462 { "r", 1 }, 463 { "ry", 1 } 464 }; 465 checkIterator(iter, data, UPRV_LENGTHOF(data)); 466 // Reset, and we should get the same result. 467 logln("after iter.reset()"); 468 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); 469} 470 471void BytesTrieTest::TestTruncatingIteratorFromRoot() { 472 LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST)); 473 if(trie.isNull()) { 474 return; // buildTrie() reported an error 475 } 476 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()"); 477 BytesTrie::Iterator iter(*trie, 4, errorCode); 478 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 479 return; 480 } 481 // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters 482 // of each string, and no string duplicates from the truncation. 483 static const StringAndValue data[]={ 484 { "augu", -1 }, 485 { "jan", 1 }, 486 { "jan.", 1 }, 487 { "jana", 1 }, 488 { "janb", -1 }, 489 { "janc", 1 }, 490 { "jand", -1 }, 491 { "jane", -1 }, 492 { "janf", 1 }, 493 { "jang", -1 }, 494 { "janh", 1 }, 495 { "jani", -1 }, 496 { "janj", 1 }, 497 { "jank", -1 }, 498 { "janl", 1 }, 499 { "janm", 1 }, 500 { "jann", -1 }, 501 { "jano", 1 }, 502 { "janp", -1 }, 503 { "janq", -1 }, 504 { "janr", 1 }, 505 { "janu", -1 }, 506 { "july", 7 }, 507 { "jun", 6 }, 508 { "jun.", 6 }, 509 { "june", 6 } 510 }; 511 checkIterator(iter, data, UPRV_LENGTHOF(data)); 512 // Reset, and we should get the same result. 513 logln("after iter.reset()"); 514 checkIterator(iter.reset(), data, UPRV_LENGTHOF(data)); 515} 516 517void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() { 518 static const StringAndValue data[]={ 519 { "abcdef", 10 }, 520 { "abcdepq", 200 }, 521 { "abcdeyz", 3000 } 522 }; 523 LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 524 if(trie.isNull()) { 525 return; // buildTrie() reported an error 526 } 527 // Go into a linear-match node. 528 trie->next('a'); 529 trie->next('b'); 530 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()"); 531 // Truncate within the linear-match node. 532 BytesTrie::Iterator iter(*trie, 2, errorCode); 533 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 534 return; 535 } 536 static const StringAndValue expected[]={ 537 { "cd", -1 } 538 }; 539 checkIterator(iter, expected, UPRV_LENGTHOF(expected)); 540 // Reset, and we should get the same result. 541 logln("after iter.reset()"); 542 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); 543} 544 545void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() { 546 static const StringAndValue data[]={ 547 { "abcdef", 10 }, 548 { "abcdepq", 200 }, 549 { "abcdeyz", 3000 } 550 }; 551 LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST)); 552 if(trie.isNull()) { 553 return; // buildTrie() reported an error 554 } 555 // Go into a linear-match node. 556 trie->next('a'); 557 trie->next('b'); 558 trie->next('c'); 559 IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()"); 560 // Truncate after the linear-match node. 561 BytesTrie::Iterator iter(*trie, 3, errorCode); 562 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 563 return; 564 } 565 static const StringAndValue expected[]={ 566 { "def", 10 }, 567 { "dep", -1 }, 568 { "dey", -1 } 569 }; 570 checkIterator(iter, expected, UPRV_LENGTHOF(expected)); 571 // Reset, and we should get the same result. 572 logln("after iter.reset()"); 573 checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected)); 574} 575 576void BytesTrieTest::TestIteratorFromBytes() { 577 static const StringAndValue data[]={ 578 { "mm", 3 }, 579 { "mmm", 33 }, 580 { "mmnop", 333 } 581 }; 582 builder_->clear(); 583 IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()"); 584 for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) { 585 builder_->add(data[i].s, data[i].value, errorCode); 586 } 587 StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode); 588 BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode); 589 checkIterator(iter, data, UPRV_LENGTHOF(data)); 590} 591 592void BytesTrieTest::TestFailedIterator() { 593 UErrorCode failure = U_ILLEGAL_ARGUMENT_ERROR; 594 BytesTrie::Iterator iter(NULL, 0, failure); 595 StringPiece sp = iter.getString(); 596 if (!sp.empty()) { 597 errln("failed iterator returned garbage data"); 598 } 599} 600 601void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) { 602 logln("checkData(dataLength=%d, fast)", (int)dataLength); 603 checkData(data, dataLength, USTRINGTRIE_BUILD_FAST); 604 logln("checkData(dataLength=%d, small)", (int)dataLength); 605 checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL); 606} 607 608void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) { 609 LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption)); 610 if(trie.isNull()) { 611 return; // buildTrie() reported an error 612 } 613 checkFirst(*trie, data, dataLength); 614 checkNext(*trie, data, dataLength); 615 checkNextWithState(*trie, data, dataLength); 616 checkNextString(*trie, data, dataLength); 617 checkIterator(*trie, data, dataLength); 618} 619 620BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength, 621 UStringTrieBuildOption buildOption) { 622 IcuTestErrorCode errorCode(*this, "buildTrie()"); 623 // Add the items to the trie builder in an interesting (not trivial, not random) order. 624 int32_t index, step; 625 if(dataLength&1) { 626 // Odd number of items. 627 index=dataLength/2; 628 step=2; 629 } else if((dataLength%3)!=0) { 630 // Not a multiple of 3. 631 index=dataLength/5; 632 step=3; 633 } else { 634 index=dataLength-1; 635 step=-1; 636 } 637 builder_->clear(); 638 for(int32_t i=0; i<dataLength; ++i) { 639 builder_->add(data[index].s, data[index].value, errorCode); 640 index=(index+step)%dataLength; 641 } 642 StringPiece sp=builder_->buildStringPiece(buildOption, errorCode); 643 LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode)); 644 if(!errorCode.logIfFailureAndReset("add()/build()")) { 645 builder_->add("zzz", 999, errorCode); 646 if(errorCode.reset()!=U_NO_WRITE_PERMISSION) { 647 errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION"); 648 } 649 } 650 logln("serialized trie size: %ld bytes\n", (long)sp.length()); 651 StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode); 652 if(sp.data()==sp2.data()) { 653 errln("builder.buildStringPiece() before & after build() returned same array"); 654 } 655 if(errorCode.isFailure()) { 656 return NULL; 657 } 658 // Tries from either build() method should be identical but 659 // BytesTrie does not implement equals(). 660 // We just return either one. 661 if((dataLength&1)!=0) { 662 return trie.orphan(); 663 } else { 664 return new BytesTrie(sp2.data()); 665 } 666} 667 668void BytesTrieTest::checkFirst(BytesTrie &trie, 669 const StringAndValue data[], int32_t dataLength) { 670 for(int32_t i=0; i<dataLength; ++i) { 671 int c=*data[i].s; 672 if(c==0) { 673 continue; // skip empty string 674 } 675 UStringTrieResult firstResult=trie.first(c); 676 int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1; 677 UStringTrieResult nextResult=trie.next(data[i].s[1]); 678 if(firstResult!=trie.reset().next(c) || 679 firstResult!=trie.current() || 680 firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) || 681 nextResult!=trie.next(data[i].s[1]) 682 ) { 683 errln("trie.first(%c)!=trie.reset().next(same) for %s", 684 c, data[i].s); 685 } 686 } 687 trie.reset(); 688} 689 690void BytesTrieTest::checkNext(BytesTrie &trie, 691 const StringAndValue data[], int32_t dataLength) { 692 BytesTrie::State state; 693 for(int32_t i=0; i<dataLength; ++i) { 694 int32_t stringLength= (i&1) ? -1 : strlen(data[i].s); 695 UStringTrieResult result; 696 if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) || 697 result!=trie.current() 698 ) { 699 errln("trie does not seem to contain %s", data[i].s); 700 } else if(trie.getValue()!=data[i].value) { 701 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", 702 data[i].s, 703 (long)trie.getValue(), (long)trie.getValue(), 704 (long)data[i].value, (long)data[i].value); 705 } else if(result!=trie.current() || trie.getValue()!=data[i].value) { 706 errln("trie value for %s changes when repeating current()/getValue()", data[i].s); 707 } 708 trie.reset(); 709 stringLength=strlen(data[i].s); 710 result=trie.current(); 711 for(int32_t j=0; j<stringLength; ++j) { 712 if(!USTRINGTRIE_HAS_NEXT(result)) { 713 errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j); 714 break; 715 } 716 if(result==USTRINGTRIE_INTERMEDIATE_VALUE) { 717 trie.getValue(); 718 if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) { 719 errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j); 720 break; 721 } 722 } 723 result=trie.next(data[i].s[j]); 724 if(!USTRINGTRIE_MATCHES(result)) { 725 errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j); 726 break; 727 } 728 if(result!=trie.current()) { 729 errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j); 730 break; 731 } 732 } 733 if(!USTRINGTRIE_HAS_VALUE(result)) { 734 errln("trie.next()!=hasValue at the end of %s", data[i].s); 735 continue; 736 } 737 trie.getValue(); 738 if(result!=trie.current()) { 739 errln("trie.current() != current()+getValue()+current() after end of %s", 740 data[i].s); 741 } 742 // Compare the final current() with whether next() can actually continue. 743 trie.saveState(state); 744 UBool nextContinues=FALSE; 745 // Try all graphic characters; we only use those in test strings in this file. 746#if U_CHARSET_FAMILY==U_ASCII_FAMILY 747 const int32_t minChar=0x20; 748 const int32_t maxChar=0x7e; 749#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY 750 const int32_t minChar=0x40; 751 const int32_t maxChar=0xfe; 752#else 753 const int32_t minChar=0; 754 const int32_t maxChar=0xff; 755#endif 756 for(int32_t c=minChar; c<=maxChar; ++c) { 757 if(trie.resetToState(state).next(c)) { 758 nextContinues=TRUE; 759 break; 760 } 761 } 762 if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) { 763 errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts " 764 "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s); 765 } 766 trie.reset(); 767 } 768} 769 770void BytesTrieTest::checkNextWithState(BytesTrie &trie, 771 const StringAndValue data[], int32_t dataLength) { 772 BytesTrie::State noState, state; 773 for(int32_t i=0; i<dataLength; ++i) { 774 if((i&1)==0) { 775 // This should have no effect. 776 trie.resetToState(noState); 777 } 778 const char *expectedString=data[i].s; 779 int32_t stringLength=strlen(expectedString); 780 int32_t partialLength=stringLength/3; 781 for(int32_t j=0; j<partialLength; ++j) { 782 if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) { 783 errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s); 784 return; 785 } 786 } 787 trie.saveState(state); 788 UStringTrieResult resultAtState=trie.current(); 789 UStringTrieResult result; 790 int32_t valueAtState=-99; 791 if(USTRINGTRIE_HAS_VALUE(resultAtState)) { 792 valueAtState=trie.getValue(); 793 } 794 result=trie.next(0); // mismatch 795 if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) { 796 errln("trie.next(0) matched after part of %s", data[i].s); 797 } 798 if( resultAtState!=trie.resetToState(state).current() || 799 (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue()) 800 ) { 801 errln("trie.next(part of %s) changes current()/getValue() after " 802 "saveState/next(0)/resetToState", 803 data[i].s); 804 } else if(!USTRINGTRIE_HAS_VALUE( 805 result=trie.next(expectedString+partialLength, 806 stringLength-partialLength)) || 807 result!=trie.current()) { 808 errln("trie.next(rest of %s) does not seem to contain %s after " 809 "saveState/next(0)/resetToState", 810 data[i].s, data[i].s); 811 } else if(!USTRINGTRIE_HAS_VALUE( 812 result=trie.resetToState(state). 813 next(expectedString+partialLength, 814 stringLength-partialLength)) || 815 result!=trie.current()) { 816 errln("trie does not seem to contain %s after saveState/next(rest)/resetToState", 817 data[i].s); 818 } else if(trie.getValue()!=data[i].value) { 819 errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx", 820 data[i].s, 821 (long)trie.getValue(), (long)trie.getValue(), 822 (long)data[i].value, (long)data[i].value); 823 } 824 trie.reset(); 825 } 826} 827 828// next(string) is also tested in other functions, 829// but here we try to go partway through the string, and then beyond it. 830void BytesTrieTest::checkNextString(BytesTrie &trie, 831 const StringAndValue data[], int32_t dataLength) { 832 for(int32_t i=0; i<dataLength; ++i) { 833 const char *expectedString=data[i].s; 834 int32_t stringLength=strlen(expectedString); 835 if(!trie.next(expectedString, stringLength/2)) { 836 errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s); 837 continue; 838 } 839 // Test that we stop properly at the end of the string. 840 if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) { 841 errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s); 842 } 843 trie.reset(); 844 } 845} 846 847void BytesTrieTest::checkIterator(const BytesTrie &trie, 848 const StringAndValue data[], int32_t dataLength) { 849 IcuTestErrorCode errorCode(*this, "checkIterator()"); 850 BytesTrie::Iterator iter(trie, 0, errorCode); 851 if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) { 852 return; 853 } 854 checkIterator(iter, data, dataLength); 855} 856 857void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter, 858 const StringAndValue data[], int32_t dataLength) { 859 IcuTestErrorCode errorCode(*this, "checkIterator()"); 860 for(int32_t i=0; i<dataLength; ++i) { 861 if(!iter.hasNext()) { 862 errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s); 863 break; 864 } 865 UBool hasNext=iter.next(errorCode); 866 if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) { 867 break; 868 } 869 if(!hasNext) { 870 errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s); 871 break; 872 } 873 if(iter.getString()!=StringPiece(data[i].s)) { 874 errln("trie iterator next().getString()=%s but expected %s for item %d", 875 iter.getString().data(), data[i].s, (int)i); 876 } 877 if(iter.getValue()!=data[i].value) { 878 errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s", 879 (long)iter.getValue(), (long)iter.getValue(), 880 (long)data[i].value, (long)data[i].value, 881 (int)i, data[i].s); 882 } 883 } 884 if(iter.hasNext()) { 885 errln("trie iterator hasNext()=TRUE after all items"); 886 } 887 UBool hasNext=iter.next(errorCode); 888 errorCode.logIfFailureAndReset("trie iterator next() after all items"); 889 if(hasNext) { 890 errln("trie iterator next()=TRUE after all items"); 891 } 892} 893