1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*   Copyright (C) 2010-2014, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7*******************************************************************************
8*   file name:  bytetrietest.cpp
9*   encoding:   UTF-8
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2010nov16
14*   created by: Markus W. Scherer
15*/
16
17#include <string.h>
18
19#include "unicode/utypes.h"
20#include "unicode/bytestrie.h"
21#include "unicode/bytestriebuilder.h"
22#include "unicode/localpointer.h"
23#include "unicode/stringpiece.h"
24#include "intltest.h"
25#include "cmemory.h"
26
27struct StringAndValue {
28    const char *s;
29    int32_t value;
30};
31
32class BytesTrieTest : public IntlTest {
33public:
34    BytesTrieTest();
35    virtual ~BytesTrieTest();
36
37    void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
38    void TestBuilder();
39    void TestEmpty();
40    void Test_a();
41    void Test_a_ab();
42    void TestShortestBranch();
43    void TestBranches();
44    void TestLongSequence();
45    void TestLongBranch();
46    void TestValuesForState();
47    void TestCompact();
48
49    BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
50    void TestHasUniqueValue();
51    void TestGetNextBytes();
52    void TestIteratorFromBranch();
53    void TestIteratorFromLinearMatch();
54    void TestTruncatingIteratorFromRoot();
55    void TestTruncatingIteratorFromLinearMatchShort();
56    void TestTruncatingIteratorFromLinearMatchLong();
57    void TestIteratorFromBytes();
58    void TestFailedIterator();
59
60    void checkData(const StringAndValue data[], int32_t dataLength);
61    void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
62    BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
63                         UStringTrieBuildOption buildOption);
64    void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
65    void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
66    void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
67    void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
68    void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
69    void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
70
71private:
72    BytesTrieBuilder *builder_;
73};
74
75extern IntlTest *createBytesTrieTest() {
76    return new BytesTrieTest();
77}
78
79BytesTrieTest::BytesTrieTest() : builder_(NULL) {
80    IcuTestErrorCode errorCode(*this, "BytesTrieTest()");
81    builder_=new BytesTrieBuilder(errorCode);
82}
83
84BytesTrieTest::~BytesTrieTest() {
85    delete builder_;
86}
87
88void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
89    if(exec) {
90        logln("TestSuite BytesTrieTest: ");
91    }
92    TESTCASE_AUTO_BEGIN;
93    TESTCASE_AUTO(TestBuilder);
94    TESTCASE_AUTO(TestEmpty);
95    TESTCASE_AUTO(Test_a);
96    TESTCASE_AUTO(Test_a_ab);
97    TESTCASE_AUTO(TestShortestBranch);
98    TESTCASE_AUTO(TestBranches);
99    TESTCASE_AUTO(TestLongSequence);
100    TESTCASE_AUTO(TestLongBranch);
101    TESTCASE_AUTO(TestValuesForState);
102    TESTCASE_AUTO(TestCompact);
103    TESTCASE_AUTO(TestHasUniqueValue);
104    TESTCASE_AUTO(TestGetNextBytes);
105    TESTCASE_AUTO(TestIteratorFromBranch);
106    TESTCASE_AUTO(TestIteratorFromLinearMatch);
107    TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
108    TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
109    TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
110    TESTCASE_AUTO(TestIteratorFromBytes);
111    TESTCASE_AUTO(TestFailedIterator);
112    TESTCASE_AUTO_END;
113}
114
115void BytesTrieTest::TestBuilder() {
116    IcuTestErrorCode errorCode(*this, "TestBuilder()");
117    builder_->clear();
118    delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
119    if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
120        errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
121        return;
122    }
123    // TODO: remove .build(...) once add() checks for duplicates.
124    builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
125    if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
126        errln("BytesTrieBuilder.add() did not detect duplicates");
127        return;
128    }
129}
130
131void BytesTrieTest::TestEmpty() {
132    static const StringAndValue data[]={
133        { "", 0 }
134    };
135    checkData(data, UPRV_LENGTHOF(data));
136}
137
138void BytesTrieTest::Test_a() {
139    static const StringAndValue data[]={
140        { "a", 1 }
141    };
142    checkData(data, UPRV_LENGTHOF(data));
143}
144
145void BytesTrieTest::Test_a_ab() {
146    static const StringAndValue data[]={
147        { "a", 1 },
148        { "ab", 100 }
149    };
150    checkData(data, UPRV_LENGTHOF(data));
151}
152
153void BytesTrieTest::TestShortestBranch() {
154    static const StringAndValue data[]={
155        { "a", 1000 },
156        { "b", 2000 }
157    };
158    checkData(data, UPRV_LENGTHOF(data));
159}
160
161void BytesTrieTest::TestBranches() {
162    static const StringAndValue data[]={
163        { "a", 0x10 },
164        { "cc", 0x40 },
165        { "e", 0x100 },
166        { "ggg", 0x400 },
167        { "i", 0x1000 },
168        { "kkkk", 0x4000 },
169        { "n", 0x10000 },
170        { "ppppp", 0x40000 },
171        { "r", 0x100000 },
172        { "sss", 0x200000 },
173        { "t", 0x400000 },
174        { "uu", 0x800000 },
175        { "vv", 0x7fffffff },
176        { "zz", (int32_t)0x80000000 }
177    };
178    for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
179        logln("TestBranches length=%d", (int)length);
180        checkData(data, length);
181    }
182}
183
184void BytesTrieTest::TestLongSequence() {
185    static const StringAndValue data[]={
186        { "a", -1 },
187        // sequence of linear-match nodes
188        { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
189        // more than 256 bytes
190        { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
191          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
192          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
193          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
194          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
195          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
196    };
197    checkData(data, UPRV_LENGTHOF(data));
198}
199
200void BytesTrieTest::TestLongBranch() {
201    // Split-branch and interesting compact-integer values.
202    static const StringAndValue data[]={
203        { "a", -2 },
204        { "b", -1 },
205        { "c", 0 },
206        { "d2", 1 },
207        { "f", 0x3f },
208        { "g", 0x40 },
209        { "h", 0x41 },
210        { "j23", 0x1900 },
211        { "j24", 0x19ff },
212        { "j25", 0x1a00 },
213        { "k2", 0x1a80 },
214        { "k3", 0x1aff },
215        { "l234567890", 0x1b00 },
216        { "l234567890123", 0x1b01 },
217        { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
218        { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
219        { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
220        { "r", 0x333333 },
221        { "s2345", 0x4444444 },
222        { "t234567890", 0x77777777 },
223        { "z", (int32_t)0x80000001 }
224    };
225    checkData(data, UPRV_LENGTHOF(data));
226}
227
228void BytesTrieTest::TestValuesForState() {
229    // Check that saveState() and resetToState() interact properly
230    // with next() and current().
231    static const StringAndValue data[]={
232        { "a", -1 },
233        { "ab", -2 },
234        { "abc", -3 },
235        { "abcd", -4 },
236        { "abcde", -5 },
237        { "abcdef", -6 }
238    };
239    checkData(data, UPRV_LENGTHOF(data));
240}
241
242void BytesTrieTest::TestCompact() {
243    // Duplicate trailing strings and values provide opportunities for compacting.
244    static const StringAndValue data[]={
245        { "+", 0 },
246        { "+august", 8 },
247        { "+december", 12 },
248        { "+july", 7 },
249        { "+june", 6 },
250        { "+november", 11 },
251        { "+october", 10 },
252        { "+september", 9 },
253        { "-", 0 },
254        { "-august", 8 },
255        { "-december", 12 },
256        { "-july", 7 },
257        { "-june", 6 },
258        { "-november", 11 },
259        { "-october", 10 },
260        { "-september", 9 },
261        // The l+n branch (with its sub-nodes) is a duplicate but will be written
262        // both times because each time it follows a different linear-match node.
263        { "xjuly", 7 },
264        { "xjune", 6 }
265    };
266    checkData(data, UPRV_LENGTHOF(data));
267}
268
269BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
270    // All types of nodes leading to the same value,
271    // for code coverage of recursive functions.
272    // In particular, we need a lot of branches on some single level
273    // to exercise a split-branch node.
274    static const StringAndValue data[]={
275        { "august", 8 },
276        { "jan", 1 },
277        { "jan.", 1 },
278        { "jana", 1 },
279        { "janbb", 1 },
280        { "janc", 1 },
281        { "janddd", 1 },
282        { "janee", 1 },
283        { "janef", 1 },
284        { "janf", 1 },
285        { "jangg", 1 },
286        { "janh", 1 },
287        { "janiiii", 1 },
288        { "janj", 1 },
289        { "jankk", 1 },
290        { "jankl", 1 },
291        { "jankmm", 1 },
292        { "janl", 1 },
293        { "janm", 1 },
294        { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
295        { "jano", 1 },
296        { "janpp", 1 },
297        { "janqqq", 1 },
298        { "janr", 1 },
299        { "januar", 1 },
300        { "january", 1 },
301        { "july", 7 },
302        { "jun", 6 },
303        { "jun.", 6 },
304        { "june", 6 }
305    };
306    return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
307}
308
309void BytesTrieTest::TestHasUniqueValue() {
310    LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
311    if(trie.isNull()) {
312        return;  // buildTrie() reported an error
313    }
314    int32_t uniqueValue;
315    if(trie->hasUniqueValue(uniqueValue)) {
316        errln("unique value at root");
317    }
318    trie->next('j');
319    trie->next('a');
320    trie->next('n');
321    // hasUniqueValue() directly after next()
322    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
323        errln("not unique value 1 after \"jan\"");
324    }
325    trie->first('j');
326    trie->next('u');
327    if(trie->hasUniqueValue(uniqueValue)) {
328        errln("unique value after \"ju\"");
329    }
330    if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
331        errln("not normal value 6 after \"jun\"");
332    }
333    // hasUniqueValue() after getValue()
334    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
335        errln("not unique value 6 after \"jun\"");
336    }
337    // hasUniqueValue() from within a linear-match node
338    trie->first('a');
339    trie->next('u');
340    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
341        errln("not unique value 8 after \"au\"");
342    }
343}
344
345void BytesTrieTest::TestGetNextBytes() {
346    LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
347    if(trie.isNull()) {
348        return;  // buildTrie() reported an error
349    }
350    char buffer[40];
351    CheckedArrayByteSink sink(buffer, UPRV_LENGTHOF(buffer));
352    int32_t count=trie->getNextBytes(sink);
353    if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
354        errln("months getNextBytes()!=[aj] at root");
355    }
356    trie->next('j');
357    trie->next('a');
358    trie->next('n');
359    // getNextBytes() directly after next()
360    count=trie->getNextBytes(sink.Reset());
361    buffer[count]=0;
362    if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
363        errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
364    }
365    // getNextBytes() after getValue()
366    trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
367    memset(buffer, 0, sizeof(buffer));
368    count=trie->getNextBytes(sink.Reset());
369    if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
370        errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
371    }
372    // getNextBytes() from a linear-match node
373    trie->next('u');
374    memset(buffer, 0, sizeof(buffer));
375    count=trie->getNextBytes(sink.Reset());
376    if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
377        errln("months getNextBytes()!=[a] after \"janu\"");
378    }
379    trie->next('a');
380    memset(buffer, 0, sizeof(buffer));
381    count=trie->getNextBytes(sink.Reset());
382    if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
383        errln("months getNextBytes()!=[r] after \"janua\"");
384    }
385    trie->next('r');
386    trie->next('y');
387    // getNextBytes() after a final match
388    count=trie->getNextBytes(sink.Reset());
389    if(count!=0 || sink.NumberOfBytesAppended()!=0) {
390        errln("months getNextBytes()!=[] after \"january\"");
391    }
392}
393
394void BytesTrieTest::TestIteratorFromBranch() {
395    LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
396    if(trie.isNull()) {
397        return;  // buildTrie() reported an error
398    }
399    // Go to a branch node.
400    trie->next('j');
401    trie->next('a');
402    trie->next('n');
403    IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
404    BytesTrie::Iterator iter(*trie, 0, errorCode);
405    if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
406        return;
407    }
408    // Expected data: Same as in buildMonthsTrie(), except only the suffixes
409    // following "jan".
410    static const StringAndValue data[]={
411        { "", 1 },
412        { ".", 1 },
413        { "a", 1 },
414        { "bb", 1 },
415        { "c", 1 },
416        { "ddd", 1 },
417        { "ee", 1 },
418        { "ef", 1 },
419        { "f", 1 },
420        { "gg", 1 },
421        { "h", 1 },
422        { "iiii", 1 },
423        { "j", 1 },
424        { "kk", 1 },
425        { "kl", 1 },
426        { "kmm", 1 },
427        { "l", 1 },
428        { "m", 1 },
429        { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
430        { "o", 1 },
431        { "pp", 1 },
432        { "qqq", 1 },
433        { "r", 1 },
434        { "uar", 1 },
435        { "uary", 1 }
436    };
437    checkIterator(iter, data, UPRV_LENGTHOF(data));
438    // Reset, and we should get the same result.
439    logln("after iter.reset()");
440    checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
441}
442
443void BytesTrieTest::TestIteratorFromLinearMatch() {
444    LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
445    if(trie.isNull()) {
446        return;  // buildTrie() reported an error
447    }
448    // Go into a linear-match node.
449    trie->next('j');
450    trie->next('a');
451    trie->next('n');
452    trie->next('u');
453    trie->next('a');
454    IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
455    BytesTrie::Iterator iter(*trie, 0, errorCode);
456    if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
457        return;
458    }
459    // Expected data: Same as in buildMonthsTrie(), except only the suffixes
460    // following "janua".
461    static const StringAndValue data[]={
462        { "r", 1 },
463        { "ry", 1 }
464    };
465    checkIterator(iter, data, UPRV_LENGTHOF(data));
466    // Reset, and we should get the same result.
467    logln("after iter.reset()");
468    checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
469}
470
471void BytesTrieTest::TestTruncatingIteratorFromRoot() {
472    LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
473    if(trie.isNull()) {
474        return;  // buildTrie() reported an error
475    }
476    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
477    BytesTrie::Iterator iter(*trie, 4, errorCode);
478    if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
479        return;
480    }
481    // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
482    // of each string, and no string duplicates from the truncation.
483    static const StringAndValue data[]={
484        { "augu", -1 },
485        { "jan", 1 },
486        { "jan.", 1 },
487        { "jana", 1 },
488        { "janb", -1 },
489        { "janc", 1 },
490        { "jand", -1 },
491        { "jane", -1 },
492        { "janf", 1 },
493        { "jang", -1 },
494        { "janh", 1 },
495        { "jani", -1 },
496        { "janj", 1 },
497        { "jank", -1 },
498        { "janl", 1 },
499        { "janm", 1 },
500        { "jann", -1 },
501        { "jano", 1 },
502        { "janp", -1 },
503        { "janq", -1 },
504        { "janr", 1 },
505        { "janu", -1 },
506        { "july", 7 },
507        { "jun", 6 },
508        { "jun.", 6 },
509        { "june", 6 }
510    };
511    checkIterator(iter, data, UPRV_LENGTHOF(data));
512    // Reset, and we should get the same result.
513    logln("after iter.reset()");
514    checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
515}
516
517void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
518    static const StringAndValue data[]={
519        { "abcdef", 10 },
520        { "abcdepq", 200 },
521        { "abcdeyz", 3000 }
522    };
523    LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
524    if(trie.isNull()) {
525        return;  // buildTrie() reported an error
526    }
527    // Go into a linear-match node.
528    trie->next('a');
529    trie->next('b');
530    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
531    // Truncate within the linear-match node.
532    BytesTrie::Iterator iter(*trie, 2, errorCode);
533    if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
534        return;
535    }
536    static const StringAndValue expected[]={
537        { "cd", -1 }
538    };
539    checkIterator(iter, expected, UPRV_LENGTHOF(expected));
540    // Reset, and we should get the same result.
541    logln("after iter.reset()");
542    checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
543}
544
545void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
546    static const StringAndValue data[]={
547        { "abcdef", 10 },
548        { "abcdepq", 200 },
549        { "abcdeyz", 3000 }
550    };
551    LocalPointer<BytesTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
552    if(trie.isNull()) {
553        return;  // buildTrie() reported an error
554    }
555    // Go into a linear-match node.
556    trie->next('a');
557    trie->next('b');
558    trie->next('c');
559    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
560    // Truncate after the linear-match node.
561    BytesTrie::Iterator iter(*trie, 3, errorCode);
562    if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
563        return;
564    }
565    static const StringAndValue expected[]={
566        { "def", 10 },
567        { "dep", -1 },
568        { "dey", -1 }
569    };
570    checkIterator(iter, expected, UPRV_LENGTHOF(expected));
571    // Reset, and we should get the same result.
572    logln("after iter.reset()");
573    checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
574}
575
576void BytesTrieTest::TestIteratorFromBytes() {
577    static const StringAndValue data[]={
578        { "mm", 3 },
579        { "mmm", 33 },
580        { "mmnop", 333 }
581    };
582    builder_->clear();
583    IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()");
584    for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
585        builder_->add(data[i].s, data[i].value, errorCode);
586    }
587    StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode);
588    BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
589    checkIterator(iter, data, UPRV_LENGTHOF(data));
590}
591
592void BytesTrieTest::TestFailedIterator() {
593    UErrorCode failure = U_ILLEGAL_ARGUMENT_ERROR;
594    BytesTrie::Iterator iter(NULL, 0, failure);
595    StringPiece sp = iter.getString();
596    if (!sp.empty()) {
597        errln("failed iterator returned garbage data");
598    }
599}
600
601void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
602    logln("checkData(dataLength=%d, fast)", (int)dataLength);
603    checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
604    logln("checkData(dataLength=%d, small)", (int)dataLength);
605    checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
606}
607
608void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
609    LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption));
610    if(trie.isNull()) {
611        return;  // buildTrie() reported an error
612    }
613    checkFirst(*trie, data, dataLength);
614    checkNext(*trie, data, dataLength);
615    checkNextWithState(*trie, data, dataLength);
616    checkNextString(*trie, data, dataLength);
617    checkIterator(*trie, data, dataLength);
618}
619
620BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
621                                    UStringTrieBuildOption buildOption) {
622    IcuTestErrorCode errorCode(*this, "buildTrie()");
623    // Add the items to the trie builder in an interesting (not trivial, not random) order.
624    int32_t index, step;
625    if(dataLength&1) {
626        // Odd number of items.
627        index=dataLength/2;
628        step=2;
629    } else if((dataLength%3)!=0) {
630        // Not a multiple of 3.
631        index=dataLength/5;
632        step=3;
633    } else {
634        index=dataLength-1;
635        step=-1;
636    }
637    builder_->clear();
638    for(int32_t i=0; i<dataLength; ++i) {
639        builder_->add(data[index].s, data[index].value, errorCode);
640        index=(index+step)%dataLength;
641    }
642    StringPiece sp=builder_->buildStringPiece(buildOption, errorCode);
643    LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode));
644    if(!errorCode.logIfFailureAndReset("add()/build()")) {
645        builder_->add("zzz", 999, errorCode);
646        if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
647            errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
648        }
649    }
650    logln("serialized trie size: %ld bytes\n", (long)sp.length());
651    StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode);
652    if(sp.data()==sp2.data()) {
653        errln("builder.buildStringPiece() before & after build() returned same array");
654    }
655    if(errorCode.isFailure()) {
656        return NULL;
657    }
658    // Tries from either build() method should be identical but
659    // BytesTrie does not implement equals().
660    // We just return either one.
661    if((dataLength&1)!=0) {
662        return trie.orphan();
663    } else {
664        return new BytesTrie(sp2.data());
665    }
666}
667
668void BytesTrieTest::checkFirst(BytesTrie &trie,
669                               const StringAndValue data[], int32_t dataLength) {
670    for(int32_t i=0; i<dataLength; ++i) {
671        int c=*data[i].s;
672        if(c==0) {
673            continue;  // skip empty string
674        }
675        UStringTrieResult firstResult=trie.first(c);
676        int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
677        UStringTrieResult nextResult=trie.next(data[i].s[1]);
678        if(firstResult!=trie.reset().next(c) ||
679           firstResult!=trie.current() ||
680           firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
681           nextResult!=trie.next(data[i].s[1])
682        ) {
683            errln("trie.first(%c)!=trie.reset().next(same) for %s",
684                  c, data[i].s);
685        }
686    }
687    trie.reset();
688}
689
690void BytesTrieTest::checkNext(BytesTrie &trie,
691                              const StringAndValue data[], int32_t dataLength) {
692    BytesTrie::State state;
693    for(int32_t i=0; i<dataLength; ++i) {
694        int32_t stringLength= (i&1) ? -1 : strlen(data[i].s);
695        UStringTrieResult result;
696        if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
697            result!=trie.current()
698        ) {
699            errln("trie does not seem to contain %s", data[i].s);
700        } else if(trie.getValue()!=data[i].value) {
701            errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
702                  data[i].s,
703                  (long)trie.getValue(), (long)trie.getValue(),
704                  (long)data[i].value, (long)data[i].value);
705        } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
706            errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
707        }
708        trie.reset();
709        stringLength=strlen(data[i].s);
710        result=trie.current();
711        for(int32_t j=0; j<stringLength; ++j) {
712            if(!USTRINGTRIE_HAS_NEXT(result)) {
713                errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
714                break;
715            }
716            if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
717                trie.getValue();
718                if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
719                    errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
720                    break;
721                }
722            }
723            result=trie.next(data[i].s[j]);
724            if(!USTRINGTRIE_MATCHES(result)) {
725                errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
726                break;
727            }
728            if(result!=trie.current()) {
729                errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
730                break;
731            }
732        }
733        if(!USTRINGTRIE_HAS_VALUE(result)) {
734            errln("trie.next()!=hasValue at the end of %s", data[i].s);
735            continue;
736        }
737        trie.getValue();
738        if(result!=trie.current()) {
739            errln("trie.current() != current()+getValue()+current() after end of %s",
740                  data[i].s);
741        }
742        // Compare the final current() with whether next() can actually continue.
743        trie.saveState(state);
744        UBool nextContinues=FALSE;
745        // Try all graphic characters; we only use those in test strings in this file.
746#if U_CHARSET_FAMILY==U_ASCII_FAMILY
747        const int32_t minChar=0x20;
748        const int32_t maxChar=0x7e;
749#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
750        const int32_t minChar=0x40;
751        const int32_t maxChar=0xfe;
752#else
753        const int32_t minChar=0;
754        const int32_t maxChar=0xff;
755#endif
756        for(int32_t c=minChar; c<=maxChar; ++c) {
757            if(trie.resetToState(state).next(c)) {
758                nextContinues=TRUE;
759                break;
760            }
761        }
762        if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
763            errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
764                  "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
765        }
766        trie.reset();
767    }
768}
769
770void BytesTrieTest::checkNextWithState(BytesTrie &trie,
771                                       const StringAndValue data[], int32_t dataLength) {
772    BytesTrie::State noState, state;
773    for(int32_t i=0; i<dataLength; ++i) {
774        if((i&1)==0) {
775            // This should have no effect.
776            trie.resetToState(noState);
777        }
778        const char *expectedString=data[i].s;
779        int32_t stringLength=strlen(expectedString);
780        int32_t partialLength=stringLength/3;
781        for(int32_t j=0; j<partialLength; ++j) {
782            if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
783                errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
784                return;
785            }
786        }
787        trie.saveState(state);
788        UStringTrieResult resultAtState=trie.current();
789        UStringTrieResult result;
790        int32_t valueAtState=-99;
791        if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
792            valueAtState=trie.getValue();
793        }
794        result=trie.next(0);  // mismatch
795        if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
796            errln("trie.next(0) matched after part of %s", data[i].s);
797        }
798        if( resultAtState!=trie.resetToState(state).current() ||
799            (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
800        ) {
801            errln("trie.next(part of %s) changes current()/getValue() after "
802                  "saveState/next(0)/resetToState",
803                  data[i].s);
804        } else if(!USTRINGTRIE_HAS_VALUE(
805                      result=trie.next(expectedString+partialLength,
806                                       stringLength-partialLength)) ||
807                  result!=trie.current()) {
808            errln("trie.next(rest of %s) does not seem to contain %s after "
809                  "saveState/next(0)/resetToState",
810                  data[i].s, data[i].s);
811        } else if(!USTRINGTRIE_HAS_VALUE(
812                      result=trie.resetToState(state).
813                                  next(expectedString+partialLength,
814                                       stringLength-partialLength)) ||
815                  result!=trie.current()) {
816            errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
817                  data[i].s);
818        } else if(trie.getValue()!=data[i].value) {
819            errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
820                  data[i].s,
821                  (long)trie.getValue(), (long)trie.getValue(),
822                  (long)data[i].value, (long)data[i].value);
823        }
824        trie.reset();
825    }
826}
827
828// next(string) is also tested in other functions,
829// but here we try to go partway through the string, and then beyond it.
830void BytesTrieTest::checkNextString(BytesTrie &trie,
831                                    const StringAndValue data[], int32_t dataLength) {
832    for(int32_t i=0; i<dataLength; ++i) {
833        const char *expectedString=data[i].s;
834        int32_t stringLength=strlen(expectedString);
835        if(!trie.next(expectedString, stringLength/2)) {
836            errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
837            continue;
838        }
839        // Test that we stop properly at the end of the string.
840        if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
841            errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
842        }
843        trie.reset();
844    }
845}
846
847void BytesTrieTest::checkIterator(const BytesTrie &trie,
848                                  const StringAndValue data[], int32_t dataLength) {
849    IcuTestErrorCode errorCode(*this, "checkIterator()");
850    BytesTrie::Iterator iter(trie, 0, errorCode);
851    if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
852        return;
853    }
854    checkIterator(iter, data, dataLength);
855}
856
857void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter,
858                                  const StringAndValue data[], int32_t dataLength) {
859    IcuTestErrorCode errorCode(*this, "checkIterator()");
860    for(int32_t i=0; i<dataLength; ++i) {
861        if(!iter.hasNext()) {
862            errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
863            break;
864        }
865        UBool hasNext=iter.next(errorCode);
866        if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
867            break;
868        }
869        if(!hasNext) {
870            errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
871            break;
872        }
873        if(iter.getString()!=StringPiece(data[i].s)) {
874            errln("trie iterator next().getString()=%s but expected %s for item %d",
875                  iter.getString().data(), data[i].s, (int)i);
876        }
877        if(iter.getValue()!=data[i].value) {
878            errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
879                  (long)iter.getValue(), (long)iter.getValue(),
880                  (long)data[i].value, (long)data[i].value,
881                  (int)i, data[i].s);
882        }
883    }
884    if(iter.hasNext()) {
885        errln("trie iterator hasNext()=TRUE after all items");
886    }
887    UBool hasNext=iter.next(errorCode);
888    errorCode.logIfFailureAndReset("trie iterator next() after all items");
889    if(hasNext) {
890        errln("trie iterator next()=TRUE after all items");
891    }
892}
893