1/*
2*******************************************************************************
3*   Copyright (C) 2010-2013, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   file name:  bytetrietest.cpp
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11*   created on: 2010nov16
12*   created by: Markus W. Scherer
13*/
14
15#include <string.h>
16
17#include "unicode/utypes.h"
18#include "unicode/bytestrie.h"
19#include "unicode/bytestriebuilder.h"
20#include "unicode/localpointer.h"
21#include "unicode/stringpiece.h"
22#include "intltest.h"
23
24#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
25
26struct StringAndValue {
27    const char *s;
28    int32_t value;
29};
30
31class BytesTrieTest : public IntlTest {
32public:
33    BytesTrieTest();
34    virtual ~BytesTrieTest();
35
36    void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
37    void TestBuilder();
38    void TestEmpty();
39    void Test_a();
40    void Test_a_ab();
41    void TestShortestBranch();
42    void TestBranches();
43    void TestLongSequence();
44    void TestLongBranch();
45    void TestValuesForState();
46    void TestCompact();
47
48    BytesTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
49    void TestHasUniqueValue();
50    void TestGetNextBytes();
51    void TestIteratorFromBranch();
52    void TestIteratorFromLinearMatch();
53    void TestTruncatingIteratorFromRoot();
54    void TestTruncatingIteratorFromLinearMatchShort();
55    void TestTruncatingIteratorFromLinearMatchLong();
56    void TestIteratorFromBytes();
57
58    void checkData(const StringAndValue data[], int32_t dataLength);
59    void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
60    BytesTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
61                         UStringTrieBuildOption buildOption);
62    void checkFirst(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
63    void checkNext(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
64    void checkNextWithState(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
65    void checkNextString(BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
66    void checkIterator(const BytesTrie &trie, const StringAndValue data[], int32_t dataLength);
67    void checkIterator(BytesTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
68
69private:
70    BytesTrieBuilder *builder_;
71};
72
73extern IntlTest *createBytesTrieTest() {
74    return new BytesTrieTest();
75}
76
77BytesTrieTest::BytesTrieTest() : builder_(NULL) {
78    IcuTestErrorCode errorCode(*this, "BytesTrieTest()");
79    builder_=new BytesTrieBuilder(errorCode);
80}
81
82BytesTrieTest::~BytesTrieTest() {
83    delete builder_;
84}
85
86void BytesTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
87    if(exec) {
88        logln("TestSuite BytesTrieTest: ");
89    }
90    TESTCASE_AUTO_BEGIN;
91    TESTCASE_AUTO(TestBuilder);
92    TESTCASE_AUTO(TestEmpty);
93    TESTCASE_AUTO(Test_a);
94    TESTCASE_AUTO(Test_a_ab);
95    TESTCASE_AUTO(TestShortestBranch);
96    TESTCASE_AUTO(TestBranches);
97    TESTCASE_AUTO(TestLongSequence);
98    TESTCASE_AUTO(TestLongBranch);
99    TESTCASE_AUTO(TestValuesForState);
100    TESTCASE_AUTO(TestCompact);
101    TESTCASE_AUTO(TestHasUniqueValue);
102    TESTCASE_AUTO(TestGetNextBytes);
103    TESTCASE_AUTO(TestIteratorFromBranch);
104    TESTCASE_AUTO(TestIteratorFromLinearMatch);
105    TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
106    TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
107    TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
108    TESTCASE_AUTO(TestIteratorFromBytes);
109    TESTCASE_AUTO_END;
110}
111
112void BytesTrieTest::TestBuilder() {
113    IcuTestErrorCode errorCode(*this, "TestBuilder()");
114    builder_->clear();
115    delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
116    if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
117        errln("BytesTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
118        return;
119    }
120    // TODO: remove .build(...) once add() checks for duplicates.
121    builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
122    if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
123        errln("BytesTrieBuilder.add() did not detect duplicates");
124        return;
125    }
126}
127
128void BytesTrieTest::TestEmpty() {
129    static const StringAndValue data[]={
130        { "", 0 }
131    };
132    checkData(data, LENGTHOF(data));
133}
134
135void BytesTrieTest::Test_a() {
136    static const StringAndValue data[]={
137        { "a", 1 }
138    };
139    checkData(data, LENGTHOF(data));
140}
141
142void BytesTrieTest::Test_a_ab() {
143    static const StringAndValue data[]={
144        { "a", 1 },
145        { "ab", 100 }
146    };
147    checkData(data, LENGTHOF(data));
148}
149
150void BytesTrieTest::TestShortestBranch() {
151    static const StringAndValue data[]={
152        { "a", 1000 },
153        { "b", 2000 }
154    };
155    checkData(data, LENGTHOF(data));
156}
157
158void BytesTrieTest::TestBranches() {
159    static const StringAndValue data[]={
160        { "a", 0x10 },
161        { "cc", 0x40 },
162        { "e", 0x100 },
163        { "ggg", 0x400 },
164        { "i", 0x1000 },
165        { "kkkk", 0x4000 },
166        { "n", 0x10000 },
167        { "ppppp", 0x40000 },
168        { "r", 0x100000 },
169        { "sss", 0x200000 },
170        { "t", 0x400000 },
171        { "uu", 0x800000 },
172        { "vv", 0x7fffffff },
173        { "zz", (int32_t)0x80000000 }
174    };
175    for(int32_t length=2; length<=LENGTHOF(data); ++length) {
176        logln("TestBranches length=%d", (int)length);
177        checkData(data, length);
178    }
179}
180
181void BytesTrieTest::TestLongSequence() {
182    static const StringAndValue data[]={
183        { "a", -1 },
184        // sequence of linear-match nodes
185        { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
186        // more than 256 bytes
187        { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
188          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
189          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
190          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
191          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
192          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
193    };
194    checkData(data, LENGTHOF(data));
195}
196
197void BytesTrieTest::TestLongBranch() {
198    // Split-branch and interesting compact-integer values.
199    static const StringAndValue data[]={
200        { "a", -2 },
201        { "b", -1 },
202        { "c", 0 },
203        { "d2", 1 },
204        { "f", 0x3f },
205        { "g", 0x40 },
206        { "h", 0x41 },
207        { "j23", 0x1900 },
208        { "j24", 0x19ff },
209        { "j25", 0x1a00 },
210        { "k2", 0x1a80 },
211        { "k3", 0x1aff },
212        { "l234567890", 0x1b00 },
213        { "l234567890123", 0x1b01 },
214        { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
215        { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
216        { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
217        { "r", 0x333333 },
218        { "s2345", 0x4444444 },
219        { "t234567890", 0x77777777 },
220        { "z", (int32_t)0x80000001 }
221    };
222    checkData(data, LENGTHOF(data));
223}
224
225void BytesTrieTest::TestValuesForState() {
226    // Check that saveState() and resetToState() interact properly
227    // with next() and current().
228    static const StringAndValue data[]={
229        { "a", -1 },
230        { "ab", -2 },
231        { "abc", -3 },
232        { "abcd", -4 },
233        { "abcde", -5 },
234        { "abcdef", -6 }
235    };
236    checkData(data, LENGTHOF(data));
237}
238
239void BytesTrieTest::TestCompact() {
240    // Duplicate trailing strings and values provide opportunities for compacting.
241    static const StringAndValue data[]={
242        { "+", 0 },
243        { "+august", 8 },
244        { "+december", 12 },
245        { "+july", 7 },
246        { "+june", 6 },
247        { "+november", 11 },
248        { "+october", 10 },
249        { "+september", 9 },
250        { "-", 0 },
251        { "-august", 8 },
252        { "-december", 12 },
253        { "-july", 7 },
254        { "-june", 6 },
255        { "-november", 11 },
256        { "-october", 10 },
257        { "-september", 9 },
258        // The l+n branch (with its sub-nodes) is a duplicate but will be written
259        // both times because each time it follows a different linear-match node.
260        { "xjuly", 7 },
261        { "xjune", 6 }
262    };
263    checkData(data, LENGTHOF(data));
264}
265
266BytesTrie *BytesTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
267    // All types of nodes leading to the same value,
268    // for code coverage of recursive functions.
269    // In particular, we need a lot of branches on some single level
270    // to exercise a split-branch node.
271    static const StringAndValue data[]={
272        { "august", 8 },
273        { "jan", 1 },
274        { "jan.", 1 },
275        { "jana", 1 },
276        { "janbb", 1 },
277        { "janc", 1 },
278        { "janddd", 1 },
279        { "janee", 1 },
280        { "janef", 1 },
281        { "janf", 1 },
282        { "jangg", 1 },
283        { "janh", 1 },
284        { "janiiii", 1 },
285        { "janj", 1 },
286        { "jankk", 1 },
287        { "jankl", 1 },
288        { "jankmm", 1 },
289        { "janl", 1 },
290        { "janm", 1 },
291        { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
292        { "jano", 1 },
293        { "janpp", 1 },
294        { "janqqq", 1 },
295        { "janr", 1 },
296        { "januar", 1 },
297        { "january", 1 },
298        { "july", 7 },
299        { "jun", 6 },
300        { "jun.", 6 },
301        { "june", 6 }
302    };
303    return buildTrie(data, LENGTHOF(data), buildOption);
304}
305
306void BytesTrieTest::TestHasUniqueValue() {
307    LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
308    if(trie.isNull()) {
309        return;  // buildTrie() reported an error
310    }
311    int32_t uniqueValue;
312    if(trie->hasUniqueValue(uniqueValue)) {
313        errln("unique value at root");
314    }
315    trie->next('j');
316    trie->next('a');
317    trie->next('n');
318    // hasUniqueValue() directly after next()
319    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
320        errln("not unique value 1 after \"jan\"");
321    }
322    trie->first('j');
323    trie->next('u');
324    if(trie->hasUniqueValue(uniqueValue)) {
325        errln("unique value after \"ju\"");
326    }
327    if(trie->next('n')!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
328        errln("not normal value 6 after \"jun\"");
329    }
330    // hasUniqueValue() after getValue()
331    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
332        errln("not unique value 6 after \"jun\"");
333    }
334    // hasUniqueValue() from within a linear-match node
335    trie->first('a');
336    trie->next('u');
337    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
338        errln("not unique value 8 after \"au\"");
339    }
340}
341
342void BytesTrieTest::TestGetNextBytes() {
343    LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
344    if(trie.isNull()) {
345        return;  // buildTrie() reported an error
346    }
347    char buffer[40];
348    CheckedArrayByteSink sink(buffer, LENGTHOF(buffer));
349    int32_t count=trie->getNextBytes(sink);
350    if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
351        errln("months getNextBytes()!=[aj] at root");
352    }
353    trie->next('j');
354    trie->next('a');
355    trie->next('n');
356    // getNextBytes() directly after next()
357    count=trie->getNextBytes(sink.Reset());
358    buffer[count]=0;
359    if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
360        errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
361    }
362    // getNextBytes() after getValue()
363    trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
364    memset(buffer, 0, sizeof(buffer));
365    count=trie->getNextBytes(sink.Reset());
366    if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
367        errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
368    }
369    // getNextBytes() from a linear-match node
370    trie->next('u');
371    memset(buffer, 0, sizeof(buffer));
372    count=trie->getNextBytes(sink.Reset());
373    if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
374        errln("months getNextBytes()!=[a] after \"janu\"");
375    }
376    trie->next('a');
377    memset(buffer, 0, sizeof(buffer));
378    count=trie->getNextBytes(sink.Reset());
379    if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
380        errln("months getNextBytes()!=[r] after \"janua\"");
381    }
382    trie->next('r');
383    trie->next('y');
384    // getNextBytes() after a final match
385    count=trie->getNextBytes(sink.Reset());
386    if(count!=0 || sink.NumberOfBytesAppended()!=0) {
387        errln("months getNextBytes()!=[] after \"january\"");
388    }
389}
390
391void BytesTrieTest::TestIteratorFromBranch() {
392    LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
393    if(trie.isNull()) {
394        return;  // buildTrie() reported an error
395    }
396    // Go to a branch node.
397    trie->next('j');
398    trie->next('a');
399    trie->next('n');
400    IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
401    BytesTrie::Iterator iter(*trie, 0, errorCode);
402    if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
403        return;
404    }
405    // Expected data: Same as in buildMonthsTrie(), except only the suffixes
406    // following "jan".
407    static const StringAndValue data[]={
408        { "", 1 },
409        { ".", 1 },
410        { "a", 1 },
411        { "bb", 1 },
412        { "c", 1 },
413        { "ddd", 1 },
414        { "ee", 1 },
415        { "ef", 1 },
416        { "f", 1 },
417        { "gg", 1 },
418        { "h", 1 },
419        { "iiii", 1 },
420        { "j", 1 },
421        { "kk", 1 },
422        { "kl", 1 },
423        { "kmm", 1 },
424        { "l", 1 },
425        { "m", 1 },
426        { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
427        { "o", 1 },
428        { "pp", 1 },
429        { "qqq", 1 },
430        { "r", 1 },
431        { "uar", 1 },
432        { "uary", 1 }
433    };
434    checkIterator(iter, data, LENGTHOF(data));
435    // Reset, and we should get the same result.
436    logln("after iter.reset()");
437    checkIterator(iter.reset(), data, LENGTHOF(data));
438}
439
440void BytesTrieTest::TestIteratorFromLinearMatch() {
441    LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
442    if(trie.isNull()) {
443        return;  // buildTrie() reported an error
444    }
445    // Go into a linear-match node.
446    trie->next('j');
447    trie->next('a');
448    trie->next('n');
449    trie->next('u');
450    trie->next('a');
451    IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
452    BytesTrie::Iterator iter(*trie, 0, errorCode);
453    if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
454        return;
455    }
456    // Expected data: Same as in buildMonthsTrie(), except only the suffixes
457    // following "janua".
458    static const StringAndValue data[]={
459        { "r", 1 },
460        { "ry", 1 }
461    };
462    checkIterator(iter, data, LENGTHOF(data));
463    // Reset, and we should get the same result.
464    logln("after iter.reset()");
465    checkIterator(iter.reset(), data, LENGTHOF(data));
466}
467
468void BytesTrieTest::TestTruncatingIteratorFromRoot() {
469    LocalPointer<BytesTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
470    if(trie.isNull()) {
471        return;  // buildTrie() reported an error
472    }
473    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
474    BytesTrie::Iterator iter(*trie, 4, errorCode);
475    if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
476        return;
477    }
478    // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
479    // of each string, and no string duplicates from the truncation.
480    static const StringAndValue data[]={
481        { "augu", -1 },
482        { "jan", 1 },
483        { "jan.", 1 },
484        { "jana", 1 },
485        { "janb", -1 },
486        { "janc", 1 },
487        { "jand", -1 },
488        { "jane", -1 },
489        { "janf", 1 },
490        { "jang", -1 },
491        { "janh", 1 },
492        { "jani", -1 },
493        { "janj", 1 },
494        { "jank", -1 },
495        { "janl", 1 },
496        { "janm", 1 },
497        { "jann", -1 },
498        { "jano", 1 },
499        { "janp", -1 },
500        { "janq", -1 },
501        { "janr", 1 },
502        { "janu", -1 },
503        { "july", 7 },
504        { "jun", 6 },
505        { "jun.", 6 },
506        { "june", 6 }
507    };
508    checkIterator(iter, data, LENGTHOF(data));
509    // Reset, and we should get the same result.
510    logln("after iter.reset()");
511    checkIterator(iter.reset(), data, LENGTHOF(data));
512}
513
514void BytesTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
515    static const StringAndValue data[]={
516        { "abcdef", 10 },
517        { "abcdepq", 200 },
518        { "abcdeyz", 3000 }
519    };
520    LocalPointer<BytesTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
521    if(trie.isNull()) {
522        return;  // buildTrie() reported an error
523    }
524    // Go into a linear-match node.
525    trie->next('a');
526    trie->next('b');
527    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
528    // Truncate within the linear-match node.
529    BytesTrie::Iterator iter(*trie, 2, errorCode);
530    if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
531        return;
532    }
533    static const StringAndValue expected[]={
534        { "cd", -1 }
535    };
536    checkIterator(iter, expected, LENGTHOF(expected));
537    // Reset, and we should get the same result.
538    logln("after iter.reset()");
539    checkIterator(iter.reset(), expected, LENGTHOF(expected));
540}
541
542void BytesTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
543    static const StringAndValue data[]={
544        { "abcdef", 10 },
545        { "abcdepq", 200 },
546        { "abcdeyz", 3000 }
547    };
548    LocalPointer<BytesTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
549    if(trie.isNull()) {
550        return;  // buildTrie() reported an error
551    }
552    // Go into a linear-match node.
553    trie->next('a');
554    trie->next('b');
555    trie->next('c');
556    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
557    // Truncate after the linear-match node.
558    BytesTrie::Iterator iter(*trie, 3, errorCode);
559    if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
560        return;
561    }
562    static const StringAndValue expected[]={
563        { "def", 10 },
564        { "dep", -1 },
565        { "dey", -1 }
566    };
567    checkIterator(iter, expected, LENGTHOF(expected));
568    // Reset, and we should get the same result.
569    logln("after iter.reset()");
570    checkIterator(iter.reset(), expected, LENGTHOF(expected));
571}
572
573void BytesTrieTest::TestIteratorFromBytes() {
574    static const StringAndValue data[]={
575        { "mm", 3 },
576        { "mmm", 33 },
577        { "mmnop", 333 }
578    };
579    builder_->clear();
580    IcuTestErrorCode errorCode(*this, "TestIteratorFromBytes()");
581    for(int32_t i=0; i<LENGTHOF(data); ++i) {
582        builder_->add(data[i].s, data[i].value, errorCode);
583    }
584    StringPiece trieBytes=builder_->buildStringPiece(USTRINGTRIE_BUILD_FAST, errorCode);
585    BytesTrie::Iterator iter(trieBytes.data(), 0, errorCode);
586    checkIterator(iter, data, LENGTHOF(data));
587}
588
589void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
590    logln("checkData(dataLength=%d, fast)", (int)dataLength);
591    checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
592    logln("checkData(dataLength=%d, small)", (int)dataLength);
593    checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
594}
595
596void BytesTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
597    LocalPointer<BytesTrie> trie(buildTrie(data, dataLength, buildOption));
598    if(trie.isNull()) {
599        return;  // buildTrie() reported an error
600    }
601    checkFirst(*trie, data, dataLength);
602    checkNext(*trie, data, dataLength);
603    checkNextWithState(*trie, data, dataLength);
604    checkNextString(*trie, data, dataLength);
605    checkIterator(*trie, data, dataLength);
606}
607
608BytesTrie *BytesTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
609                                    UStringTrieBuildOption buildOption) {
610    IcuTestErrorCode errorCode(*this, "buildTrie()");
611    // Add the items to the trie builder in an interesting (not trivial, not random) order.
612    int32_t index, step;
613    if(dataLength&1) {
614        // Odd number of items.
615        index=dataLength/2;
616        step=2;
617    } else if((dataLength%3)!=0) {
618        // Not a multiple of 3.
619        index=dataLength/5;
620        step=3;
621    } else {
622        index=dataLength-1;
623        step=-1;
624    }
625    builder_->clear();
626    for(int32_t i=0; i<dataLength; ++i) {
627        builder_->add(data[index].s, data[index].value, errorCode);
628        index=(index+step)%dataLength;
629    }
630    StringPiece sp=builder_->buildStringPiece(buildOption, errorCode);
631    LocalPointer<BytesTrie> trie(builder_->build(buildOption, errorCode));
632    if(!errorCode.logIfFailureAndReset("add()/build()")) {
633        builder_->add("zzz", 999, errorCode);
634        if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
635            errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
636        }
637    }
638    logln("serialized trie size: %ld bytes\n", (long)sp.length());
639    StringPiece sp2=builder_->buildStringPiece(buildOption, errorCode);
640    if(sp.data()==sp2.data()) {
641        errln("builder.buildStringPiece() before & after build() returned same array");
642    }
643    if(errorCode.isFailure()) {
644        return NULL;
645    }
646    // Tries from either build() method should be identical but
647    // BytesTrie does not implement equals().
648    // We just return either one.
649    if((dataLength&1)!=0) {
650        return trie.orphan();
651    } else {
652        return new BytesTrie(sp2.data());
653    }
654}
655
656void BytesTrieTest::checkFirst(BytesTrie &trie,
657                               const StringAndValue data[], int32_t dataLength) {
658    for(int32_t i=0; i<dataLength; ++i) {
659        int c=*data[i].s;
660        if(c==0) {
661            continue;  // skip empty string
662        }
663        UStringTrieResult firstResult=trie.first(c);
664        int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
665        UStringTrieResult nextResult=trie.next(data[i].s[1]);
666        if(firstResult!=trie.reset().next(c) ||
667           firstResult!=trie.current() ||
668           firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
669           nextResult!=trie.next(data[i].s[1])
670        ) {
671            errln("trie.first(%c)!=trie.reset().next(same) for %s",
672                  c, data[i].s);
673        }
674    }
675    trie.reset();
676}
677
678void BytesTrieTest::checkNext(BytesTrie &trie,
679                              const StringAndValue data[], int32_t dataLength) {
680    BytesTrie::State state;
681    for(int32_t i=0; i<dataLength; ++i) {
682        int32_t stringLength= (i&1) ? -1 : strlen(data[i].s);
683        UStringTrieResult result;
684        if( !USTRINGTRIE_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
685            result!=trie.current()
686        ) {
687            errln("trie does not seem to contain %s", data[i].s);
688        } else if(trie.getValue()!=data[i].value) {
689            errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
690                  data[i].s,
691                  (long)trie.getValue(), (long)trie.getValue(),
692                  (long)data[i].value, (long)data[i].value);
693        } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
694            errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
695        }
696        trie.reset();
697        stringLength=strlen(data[i].s);
698        result=trie.current();
699        for(int32_t j=0; j<stringLength; ++j) {
700            if(!USTRINGTRIE_HAS_NEXT(result)) {
701                errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
702                break;
703            }
704            if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
705                trie.getValue();
706                if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
707                    errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
708                    break;
709                }
710            }
711            result=trie.next(data[i].s[j]);
712            if(!USTRINGTRIE_MATCHES(result)) {
713                errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
714                break;
715            }
716            if(result!=trie.current()) {
717                errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
718                break;
719            }
720        }
721        if(!USTRINGTRIE_HAS_VALUE(result)) {
722            errln("trie.next()!=hasValue at the end of %s", data[i].s);
723            continue;
724        }
725        trie.getValue();
726        if(result!=trie.current()) {
727            errln("trie.current() != current()+getValue()+current() after end of %s",
728                  data[i].s);
729        }
730        // Compare the final current() with whether next() can actually continue.
731        trie.saveState(state);
732        UBool nextContinues=FALSE;
733        // Try all graphic characters; we only use those in test strings in this file.
734#if U_CHARSET_FAMILY==U_ASCII_FAMILY
735        const int32_t minChar=0x20;
736        const int32_t maxChar=0x7e;
737#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
738        const int32_t minChar=0x40;
739        const int32_t maxChar=0xfe;
740#else
741        const int32_t minChar=0;
742        const int32_t maxChar=0xff;
743#endif
744        for(int32_t c=minChar; c<=maxChar; ++c) {
745            if(trie.resetToState(state).next(c)) {
746                nextContinues=TRUE;
747                break;
748            }
749        }
750        if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
751            errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
752                  "(trie.next(some byte)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
753        }
754        trie.reset();
755    }
756}
757
758void BytesTrieTest::checkNextWithState(BytesTrie &trie,
759                                       const StringAndValue data[], int32_t dataLength) {
760    BytesTrie::State noState, state;
761    for(int32_t i=0; i<dataLength; ++i) {
762        if((i&1)==0) {
763            // This should have no effect.
764            trie.resetToState(noState);
765        }
766        const char *expectedString=data[i].s;
767        int32_t stringLength=strlen(expectedString);
768        int32_t partialLength=stringLength/3;
769        for(int32_t j=0; j<partialLength; ++j) {
770            if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
771                errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
772                return;
773            }
774        }
775        trie.saveState(state);
776        UStringTrieResult resultAtState=trie.current();
777        UStringTrieResult result;
778        int32_t valueAtState=-99;
779        if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
780            valueAtState=trie.getValue();
781        }
782        result=trie.next(0);  // mismatch
783        if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
784            errln("trie.next(0) matched after part of %s", data[i].s);
785        }
786        if( resultAtState!=trie.resetToState(state).current() ||
787            (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
788        ) {
789            errln("trie.next(part of %s) changes current()/getValue() after "
790                  "saveState/next(0)/resetToState",
791                  data[i].s);
792        } else if(!USTRINGTRIE_HAS_VALUE(
793                      result=trie.next(expectedString+partialLength,
794                                       stringLength-partialLength)) ||
795                  result!=trie.current()) {
796            errln("trie.next(rest of %s) does not seem to contain %s after "
797                  "saveState/next(0)/resetToState",
798                  data[i].s, data[i].s);
799        } else if(!USTRINGTRIE_HAS_VALUE(
800                      result=trie.resetToState(state).
801                                  next(expectedString+partialLength,
802                                       stringLength-partialLength)) ||
803                  result!=trie.current()) {
804            errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
805                  data[i].s);
806        } else if(trie.getValue()!=data[i].value) {
807            errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
808                  data[i].s,
809                  (long)trie.getValue(), (long)trie.getValue(),
810                  (long)data[i].value, (long)data[i].value);
811        }
812        trie.reset();
813    }
814}
815
816// next(string) is also tested in other functions,
817// but here we try to go partway through the string, and then beyond it.
818void BytesTrieTest::checkNextString(BytesTrie &trie,
819                                    const StringAndValue data[], int32_t dataLength) {
820    for(int32_t i=0; i<dataLength; ++i) {
821        const char *expectedString=data[i].s;
822        int32_t stringLength=strlen(expectedString);
823        if(!trie.next(expectedString, stringLength/2)) {
824            errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
825            continue;
826        }
827        // Test that we stop properly at the end of the string.
828        if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
829            errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
830        }
831        trie.reset();
832    }
833}
834
835void BytesTrieTest::checkIterator(const BytesTrie &trie,
836                                  const StringAndValue data[], int32_t dataLength) {
837    IcuTestErrorCode errorCode(*this, "checkIterator()");
838    BytesTrie::Iterator iter(trie, 0, errorCode);
839    if(errorCode.logIfFailureAndReset("BytesTrie::Iterator(trie) constructor")) {
840        return;
841    }
842    checkIterator(iter, data, dataLength);
843}
844
845void BytesTrieTest::checkIterator(BytesTrie::Iterator &iter,
846                                  const StringAndValue data[], int32_t dataLength) {
847    IcuTestErrorCode errorCode(*this, "checkIterator()");
848    for(int32_t i=0; i<dataLength; ++i) {
849        if(!iter.hasNext()) {
850            errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
851            break;
852        }
853        UBool hasNext=iter.next(errorCode);
854        if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
855            break;
856        }
857        if(!hasNext) {
858            errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
859            break;
860        }
861        if(iter.getString()!=StringPiece(data[i].s)) {
862            errln("trie iterator next().getString()=%s but expected %s for item %d",
863                  iter.getString().data(), data[i].s, (int)i);
864        }
865        if(iter.getValue()!=data[i].value) {
866            errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
867                  (long)iter.getValue(), (long)iter.getValue(),
868                  (long)data[i].value, (long)data[i].value,
869                  (int)i, data[i].s);
870        }
871    }
872    if(iter.hasNext()) {
873        errln("trie iterator hasNext()=TRUE after all items");
874    }
875    UBool hasNext=iter.next(errorCode);
876    errorCode.logIfFailureAndReset("trie iterator next() after all items");
877    if(hasNext) {
878        errln("trie iterator next()=TRUE after all items");
879    }
880}
881