1/*
2*******************************************************************************
3*   Copyright (C) 2010-2014, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   file name:  ucharstrietest.cpp
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11*   created on: 2010nov16
12*   created by: Markus W. Scherer
13*/
14
15#include <string.h>
16
17#include "unicode/utypes.h"
18#include "unicode/appendable.h"
19#include "unicode/localpointer.h"
20#include "unicode/ucharstrie.h"
21#include "unicode/ucharstriebuilder.h"
22#include "unicode/uniset.h"
23#include "unicode/unistr.h"
24#include "intltest.h"
25#include "cmemory.h"
26
27struct StringAndValue {
28    const char *s;
29    int32_t value;
30};
31
32class UCharsTrieTest : public IntlTest {
33public:
34    UCharsTrieTest();
35    virtual ~UCharsTrieTest();
36
37    void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
38    void TestBuilder();
39    void TestEmpty();
40    void Test_a();
41    void Test_a_ab();
42    void TestShortestBranch();
43    void TestBranches();
44    void TestLongSequence();
45    void TestLongBranch();
46    void TestValuesForState();
47    void TestCompact();
48    void TestFirstForCodePoint();
49    void TestNextForCodePoint();
50
51    UCharsTrie *buildLargeTrie(int32_t numUniqueFirst);
52    void TestLargeTrie();
53
54    UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
55    void TestHasUniqueValue();
56    void TestGetNextUChars();
57    void TestIteratorFromBranch();
58    void TestIteratorFromLinearMatch();
59    void TestTruncatingIteratorFromRoot();
60    void TestTruncatingIteratorFromLinearMatchShort();
61    void TestTruncatingIteratorFromLinearMatchLong();
62    void TestIteratorFromUChars();
63
64    void checkData(const StringAndValue data[], int32_t dataLength);
65    void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
66    UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
67                          UStringTrieBuildOption buildOption);
68    void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
69    void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
70    void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
71    void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
72    void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
73    void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
74
75private:
76    UCharsTrieBuilder *builder_;
77};
78
79extern IntlTest *createUCharsTrieTest() {
80    return new UCharsTrieTest();
81}
82
83UCharsTrieTest::UCharsTrieTest() : builder_(NULL) {
84    IcuTestErrorCode errorCode(*this, "UCharsTrieTest()");
85    builder_=new UCharsTrieBuilder(errorCode);
86}
87
88UCharsTrieTest::~UCharsTrieTest() {
89    delete builder_;
90}
91
92void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
93    if(exec) {
94        logln("TestSuite UCharsTrieTest: ");
95    }
96    TESTCASE_AUTO_BEGIN;
97    TESTCASE_AUTO(TestBuilder);
98    TESTCASE_AUTO(TestEmpty);
99    TESTCASE_AUTO(Test_a);
100    TESTCASE_AUTO(Test_a_ab);
101    TESTCASE_AUTO(TestShortestBranch);
102    TESTCASE_AUTO(TestBranches);
103    TESTCASE_AUTO(TestLongSequence);
104    TESTCASE_AUTO(TestLongBranch);
105    TESTCASE_AUTO(TestValuesForState);
106    TESTCASE_AUTO(TestCompact);
107    TESTCASE_AUTO(TestFirstForCodePoint);
108    TESTCASE_AUTO(TestNextForCodePoint);
109    TESTCASE_AUTO(TestLargeTrie);
110    TESTCASE_AUTO(TestHasUniqueValue);
111    TESTCASE_AUTO(TestGetNextUChars);
112    TESTCASE_AUTO(TestIteratorFromBranch);
113    TESTCASE_AUTO(TestIteratorFromLinearMatch);
114    TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
115    TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
116    TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
117    TESTCASE_AUTO(TestIteratorFromUChars);
118    TESTCASE_AUTO_END;
119}
120
121void UCharsTrieTest::TestBuilder() {
122    IcuTestErrorCode errorCode(*this, "TestBuilder()");
123    delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
124    if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
125        errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
126        return;
127    }
128    // TODO: remove .build(...) once add() checks for duplicates.
129    builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
130    if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
131        errln("UCharsTrieBuilder.add() did not detect duplicates");
132        return;
133    }
134}
135
136void UCharsTrieTest::TestEmpty() {
137    static const StringAndValue data[]={
138        { "", 0 }
139    };
140    checkData(data, UPRV_LENGTHOF(data));
141}
142
143void UCharsTrieTest::Test_a() {
144    static const StringAndValue data[]={
145        { "a", 1 }
146    };
147    checkData(data, UPRV_LENGTHOF(data));
148}
149
150void UCharsTrieTest::Test_a_ab() {
151    static const StringAndValue data[]={
152        { "a", 1 },
153        { "ab", 100 }
154    };
155    checkData(data, UPRV_LENGTHOF(data));
156}
157
158void UCharsTrieTest::TestShortestBranch() {
159    static const StringAndValue data[]={
160        { "a", 1000 },
161        { "b", 2000 }
162    };
163    checkData(data, UPRV_LENGTHOF(data));
164}
165
166void UCharsTrieTest::TestBranches() {
167    static const StringAndValue data[]={
168        { "a", 0x10 },
169        { "cc", 0x40 },
170        { "e", 0x100 },
171        { "ggg", 0x400 },
172        { "i", 0x1000 },
173        { "kkkk", 0x4000 },
174        { "n", 0x10000 },
175        { "ppppp", 0x40000 },
176        { "r", 0x100000 },
177        { "sss", 0x200000 },
178        { "t", 0x400000 },
179        { "uu", 0x800000 },
180        { "vv", 0x7fffffff },
181        { "zz", (int32_t)0x80000000 }
182    };
183    for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
184        logln("TestBranches length=%d", (int)length);
185        checkData(data, length);
186    }
187}
188
189void UCharsTrieTest::TestLongSequence() {
190    static const StringAndValue data[]={
191        { "a", -1 },
192        // sequence of linear-match nodes
193        { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
194        // more than 256 units
195        { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
196          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
197          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
198          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
199          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
200          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
201    };
202    checkData(data, UPRV_LENGTHOF(data));
203}
204
205void UCharsTrieTest::TestLongBranch() {
206    // Split-branch and interesting compact-integer values.
207    static const StringAndValue data[]={
208        { "a", -2 },
209        { "b", -1 },
210        { "c", 0 },
211        { "d2", 1 },
212        { "f", 0x3f },
213        { "g", 0x40 },
214        { "h", 0x41 },
215        { "j23", 0x1900 },
216        { "j24", 0x19ff },
217        { "j25", 0x1a00 },
218        { "k2", 0x1a80 },
219        { "k3", 0x1aff },
220        { "l234567890", 0x1b00 },
221        { "l234567890123", 0x1b01 },
222        { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
223        { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
224        { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
225        { "r", 0x333333 },
226        { "s2345", 0x4444444 },
227        { "t234567890", 0x77777777 },
228        { "z", (int32_t)0x80000001 }
229    };
230    checkData(data, UPRV_LENGTHOF(data));
231}
232
233void UCharsTrieTest::TestValuesForState() {
234    // Check that saveState() and resetToState() interact properly
235    // with next() and current().
236    static const StringAndValue data[]={
237        { "a", -1 },
238        { "ab", -2 },
239        { "abc", -3 },
240        { "abcd", -4 },
241        { "abcde", -5 },
242        { "abcdef", -6 }
243    };
244    checkData(data, UPRV_LENGTHOF(data));
245}
246
247void UCharsTrieTest::TestCompact() {
248    // Duplicate trailing strings and values provide opportunities for compacting.
249    static const StringAndValue data[]={
250        { "+", 0 },
251        { "+august", 8 },
252        { "+december", 12 },
253        { "+july", 7 },
254        { "+june", 6 },
255        { "+november", 11 },
256        { "+october", 10 },
257        { "+september", 9 },
258        { "-", 0 },
259        { "-august", 8 },
260        { "-december", 12 },
261        { "-july", 7 },
262        { "-june", 6 },
263        { "-november", 11 },
264        { "-october", 10 },
265        { "-september", 9 },
266        // The l+n branch (with its sub-nodes) is a duplicate but will be written
267        // both times because each time it follows a different linear-match node.
268        { "xjuly", 7 },
269        { "xjune", 6 }
270    };
271    checkData(data, UPRV_LENGTHOF(data));
272}
273
274void UCharsTrieTest::TestFirstForCodePoint() {
275    static const StringAndValue data[]={
276        { "a", 1 },
277        { "a\\ud800", 2 },
278        { "a\\U00010000", 3 },
279        { "\\ud840", 4 },
280        { "\\U00020000\\udbff", 5 },
281        { "\\U00020000\\U0010ffff", 6 },
282        { "\\U00020000\\U0010ffffz", 7 },
283        { "\\U00050000xy", 8 },
284        { "\\U00050000xyz", 9 }
285    };
286    checkData(data, UPRV_LENGTHOF(data));
287}
288
289void UCharsTrieTest::TestNextForCodePoint() {
290    static const StringAndValue data[]={
291        { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
292        { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
293        { "\\u4dff\\U000103ff", 99999 }
294    };
295    LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
296    if(trie.isNull()) {
297        return;  // buildTrie() reported an error
298    }
299    UStringTrieResult result;
300    if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
301        (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
302        (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
303        (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
304        (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
305        (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
306        trie->getValue()!=2000000000
307    ) {
308        errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
309    }
310    if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
311        (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
312        (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
313        (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
314        trie->getValue()!=44444
315    ) {
316        errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
317    }
318    if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
319        (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
320        (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
321        (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current()  // no match for trail surrogate
322    ) {
323        errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
324    }
325    if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
326        (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
327        trie->getValue()!=99999
328    ) {
329        errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
330    }
331}
332
333// Definitions in the anonymous namespace are invisible outside this file.
334namespace {
335
336// Generate (string, value) pairs.
337// The first string (before next()) will be empty.
338class Generator {
339public:
340    Generator() : value(4711), num(0) {}
341    void next() {
342        UChar c;
343        s.truncate(0);
344        s.append(c=(UChar)(value>>16));
345        s.append((UChar)(value>>4));
346        if(value&1) {
347            s.append((UChar)value);
348        }
349        set.add(c);
350        value+=((value>>5)&0x7ff)*3+1;
351        ++num;
352    }
353    const UnicodeString &getString() const { return s; }
354    int32_t getValue() const { return value; }
355    int32_t countUniqueFirstChars() const { return set.size(); }
356    int32_t getIndex() const { return num; }
357
358private:
359    UnicodeString s;
360    UnicodeSet set;
361    int32_t value;
362    int32_t num;
363};
364
365}  // end namespace
366
367UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) {
368    IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
369    Generator gen;
370    builder_->clear();
371    while(gen.countUniqueFirstChars()<numUniqueFirst) {
372        builder_->add(gen.getString(), gen.getValue(), errorCode);
373        gen.next();
374    }
375    logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
376    UnicodeString trieUChars;
377    builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
378    logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
379    return new UCharsTrie(trieUChars.getBuffer());
380}
381
382// Exercise a large branch node.
383void UCharsTrieTest::TestLargeTrie() {
384    LocalPointer<UCharsTrie> trie(buildLargeTrie(1111));
385    if(trie.isNull()) {
386        return;  // buildTrie() reported an error
387    }
388    Generator gen;
389    while(gen.countUniqueFirstChars()<1111) {
390        UnicodeString x(gen.getString());
391        int32_t value=gen.getValue();
392        if(!x.isEmpty()) {
393            if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) {
394                errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
395                      x[0], (long)gen.getIndex());
396                break;
397            }
398            x.remove(0, 1);
399        }
400        UStringTrieResult result=trie->next(x.getBuffer(), x.length());
401        if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) {
402            errln("next(%d chars U+%04X U+%04X)!=hasValue or "
403                  "next()!=current() or getValue() wrong "
404                  "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
405            break;
406        }
407        gen.next();
408    }
409}
410
411enum {
412    u_a=0x61,
413    u_b=0x62,
414    u_c=0x63,
415    u_j=0x6a,
416    u_n=0x6e,
417    u_r=0x72,
418    u_u=0x75,
419    u_y=0x79
420};
421
422UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
423    // All types of nodes leading to the same value,
424    // for code coverage of recursive functions.
425    // In particular, we need a lot of branches on some single level
426    // to exercise a split-branch node.
427    static const StringAndValue data[]={
428        { "august", 8 },
429        { "jan", 1 },
430        { "jan.", 1 },
431        { "jana", 1 },
432        { "janbb", 1 },
433        { "janc", 1 },
434        { "janddd", 1 },
435        { "janee", 1 },
436        { "janef", 1 },
437        { "janf", 1 },
438        { "jangg", 1 },
439        { "janh", 1 },
440        { "janiiii", 1 },
441        { "janj", 1 },
442        { "jankk", 1 },
443        { "jankl", 1 },
444        { "jankmm", 1 },
445        { "janl", 1 },
446        { "janm", 1 },
447        { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
448        { "jano", 1 },
449        { "janpp", 1 },
450        { "janqqq", 1 },
451        { "janr", 1 },
452        { "januar", 1 },
453        { "january", 1 },
454        { "july", 7 },
455        { "jun", 6 },
456        { "jun.", 6 },
457        { "june", 6 }
458    };
459    return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
460}
461
462void UCharsTrieTest::TestHasUniqueValue() {
463    LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
464    if(trie.isNull()) {
465        return;  // buildTrie() reported an error
466    }
467    int32_t uniqueValue;
468    if(trie->hasUniqueValue(uniqueValue)) {
469        errln("unique value at root");
470    }
471    trie->next(u_j);
472    trie->next(u_a);
473    trie->next(u_n);
474    // hasUniqueValue() directly after next()
475    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
476        errln("not unique value 1 after \"jan\"");
477    }
478    trie->first(u_j);
479    trie->next(u_u);
480    if(trie->hasUniqueValue(uniqueValue)) {
481        errln("unique value after \"ju\"");
482    }
483    if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
484        errln("not normal value 6 after \"jun\"");
485    }
486    // hasUniqueValue() after getValue()
487    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
488        errln("not unique value 6 after \"jun\"");
489    }
490    // hasUniqueValue() from within a linear-match node
491    trie->first(u_a);
492    trie->next(u_u);
493    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
494        errln("not unique value 8 after \"au\"");
495    }
496}
497
498void UCharsTrieTest::TestGetNextUChars() {
499    LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
500    if(trie.isNull()) {
501        return;  // buildTrie() reported an error
502    }
503    UnicodeString buffer;
504    UnicodeStringAppendable app(buffer);
505    int32_t count=trie->getNextUChars(app);
506    if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) {
507        errln("months getNextUChars()!=[aj] at root");
508    }
509    trie->next(u_j);
510    trie->next(u_a);
511    trie->next(u_n);
512    // getNextUChars() directly after next()
513    buffer.remove();
514    count=trie->getNextUChars(app);
515    if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
516        errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
517    }
518    // getNextUChars() after getValue()
519    trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
520    buffer.remove();
521    count=trie->getNextUChars(app);
522    if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
523        errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
524    }
525    // getNextUChars() from a linear-match node
526    trie->next(u_u);
527    buffer.remove();
528    count=trie->getNextUChars(app);
529    if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) {
530        errln("months getNextUChars()!=[a] after \"janu\"");
531    }
532    trie->next(u_a);
533    buffer.remove();
534    count=trie->getNextUChars(app);
535    if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) {
536        errln("months getNextUChars()!=[r] after \"janua\"");
537    }
538    trie->next(u_r);
539    trie->next(u_y);
540    // getNextUChars() after a final match
541    buffer.remove();
542    count=trie->getNextUChars(app);
543    if(count!=0 || buffer.length()!=0) {
544        errln("months getNextUChars()!=[] after \"january\"");
545    }
546}
547
548void UCharsTrieTest::TestIteratorFromBranch() {
549    LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
550    if(trie.isNull()) {
551        return;  // buildTrie() reported an error
552    }
553    // Go to a branch node.
554    trie->next(u_j);
555    trie->next(u_a);
556    trie->next(u_n);
557    IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
558    UCharsTrie::Iterator iter(*trie, 0, errorCode);
559    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
560        return;
561    }
562    // Expected data: Same as in buildMonthsTrie(), except only the suffixes
563    // following "jan".
564    static const StringAndValue data[]={
565        { "", 1 },
566        { ".", 1 },
567        { "a", 1 },
568        { "bb", 1 },
569        { "c", 1 },
570        { "ddd", 1 },
571        { "ee", 1 },
572        { "ef", 1 },
573        { "f", 1 },
574        { "gg", 1 },
575        { "h", 1 },
576        { "iiii", 1 },
577        { "j", 1 },
578        { "kk", 1 },
579        { "kl", 1 },
580        { "kmm", 1 },
581        { "l", 1 },
582        { "m", 1 },
583        { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
584        { "o", 1 },
585        { "pp", 1 },
586        { "qqq", 1 },
587        { "r", 1 },
588        { "uar", 1 },
589        { "uary", 1 }
590    };
591    checkIterator(iter, data, UPRV_LENGTHOF(data));
592    // Reset, and we should get the same result.
593    logln("after iter.reset()");
594    checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
595}
596
597void UCharsTrieTest::TestIteratorFromLinearMatch() {
598    LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
599    if(trie.isNull()) {
600        return;  // buildTrie() reported an error
601    }
602    // Go into a linear-match node.
603    trie->next(u_j);
604    trie->next(u_a);
605    trie->next(u_n);
606    trie->next(u_u);
607    trie->next(u_a);
608    IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
609    UCharsTrie::Iterator iter(*trie, 0, errorCode);
610    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
611        return;
612    }
613    // Expected data: Same as in buildMonthsTrie(), except only the suffixes
614    // following "janua".
615    static const StringAndValue data[]={
616        { "r", 1 },
617        { "ry", 1 }
618    };
619    checkIterator(iter, data, UPRV_LENGTHOF(data));
620    // Reset, and we should get the same result.
621    logln("after iter.reset()");
622    checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
623}
624
625void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
626    LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
627    if(trie.isNull()) {
628        return;  // buildTrie() reported an error
629    }
630    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
631    UCharsTrie::Iterator iter(*trie, 4, errorCode);
632    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
633        return;
634    }
635    // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
636    // of each string, and no string duplicates from the truncation.
637    static const StringAndValue data[]={
638        { "augu", -1 },
639        { "jan", 1 },
640        { "jan.", 1 },
641        { "jana", 1 },
642        { "janb", -1 },
643        { "janc", 1 },
644        { "jand", -1 },
645        { "jane", -1 },
646        { "janf", 1 },
647        { "jang", -1 },
648        { "janh", 1 },
649        { "jani", -1 },
650        { "janj", 1 },
651        { "jank", -1 },
652        { "janl", 1 },
653        { "janm", 1 },
654        { "jann", -1 },
655        { "jano", 1 },
656        { "janp", -1 },
657        { "janq", -1 },
658        { "janr", 1 },
659        { "janu", -1 },
660        { "july", 7 },
661        { "jun", 6 },
662        { "jun.", 6 },
663        { "june", 6 }
664    };
665    checkIterator(iter, data, UPRV_LENGTHOF(data));
666    // Reset, and we should get the same result.
667    logln("after iter.reset()");
668    checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
669}
670
671void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
672    static const StringAndValue data[]={
673        { "abcdef", 10 },
674        { "abcdepq", 200 },
675        { "abcdeyz", 3000 }
676    };
677    LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
678    if(trie.isNull()) {
679        return;  // buildTrie() reported an error
680    }
681    // Go into a linear-match node.
682    trie->next(u_a);
683    trie->next(u_b);
684    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
685    // Truncate within the linear-match node.
686    UCharsTrie::Iterator iter(*trie, 2, errorCode);
687    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
688        return;
689    }
690    static const StringAndValue expected[]={
691        { "cd", -1 }
692    };
693    checkIterator(iter, expected, UPRV_LENGTHOF(expected));
694    // Reset, and we should get the same result.
695    logln("after iter.reset()");
696    checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
697}
698
699void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
700    static const StringAndValue data[]={
701        { "abcdef", 10 },
702        { "abcdepq", 200 },
703        { "abcdeyz", 3000 }
704    };
705    LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
706    if(trie.isNull()) {
707        return;  // buildTrie() reported an error
708    }
709    // Go into a linear-match node.
710    trie->next(u_a);
711    trie->next(u_b);
712    trie->next(u_c);
713    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
714    // Truncate after the linear-match node.
715    UCharsTrie::Iterator iter(*trie, 3, errorCode);
716    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
717        return;
718    }
719    static const StringAndValue expected[]={
720        { "def", 10 },
721        { "dep", -1 },
722        { "dey", -1 }
723    };
724    checkIterator(iter, expected, UPRV_LENGTHOF(expected));
725    // Reset, and we should get the same result.
726    logln("after iter.reset()");
727    checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
728}
729
730void UCharsTrieTest::TestIteratorFromUChars() {
731    static const StringAndValue data[]={
732        { "mm", 3 },
733        { "mmm", 33 },
734        { "mmnop", 333 }
735    };
736    builder_->clear();
737    IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()");
738    for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
739        builder_->add(data[i].s, data[i].value, errorCode);
740    }
741    UnicodeString trieUChars;
742    builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
743    UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
744    checkIterator(iter, data, UPRV_LENGTHOF(data));
745}
746
747void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
748    logln("checkData(dataLength=%d, fast)", (int)dataLength);
749    checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
750    logln("checkData(dataLength=%d, small)", (int)dataLength);
751    checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
752}
753
754void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
755    LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption));
756    if(trie.isNull()) {
757        return;  // buildTrie() reported an error
758    }
759    checkFirst(*trie, data, dataLength);
760    checkNext(*trie, data, dataLength);
761    checkNextWithState(*trie, data, dataLength);
762    checkNextString(*trie, data, dataLength);
763    checkIterator(*trie, data, dataLength);
764}
765
766UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
767                                      UStringTrieBuildOption buildOption) {
768    IcuTestErrorCode errorCode(*this, "buildTrie()");
769    // Add the items to the trie builder in an interesting (not trivial, not random) order.
770    int32_t index, step;
771    if(dataLength&1) {
772        // Odd number of items.
773        index=dataLength/2;
774        step=2;
775    } else if((dataLength%3)!=0) {
776        // Not a multiple of 3.
777        index=dataLength/5;
778        step=3;
779    } else {
780        index=dataLength-1;
781        step=-1;
782    }
783    builder_->clear();
784    for(int32_t i=0; i<dataLength; ++i) {
785        builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(),
786                      data[index].value, errorCode);
787        index=(index+step)%dataLength;
788    }
789    UnicodeString trieUChars;
790    builder_->buildUnicodeString(buildOption, trieUChars, errorCode);
791    LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode));
792    if(!errorCode.logIfFailureAndReset("add()/build()")) {
793        builder_->add("zzz", 999, errorCode);
794        if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
795            errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
796        }
797    }
798    logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
799    UnicodeString trieUChars2;
800    builder_->buildUnicodeString(buildOption, trieUChars2, errorCode);
801    if(trieUChars.getBuffer()==trieUChars2.getBuffer()) {
802        errln("builder.buildUnicodeString() before & after build() returned same array");
803    }
804    if(errorCode.isFailure()) {
805        return NULL;
806    }
807    // Tries from either build() method should be identical but
808    // UCharsTrie does not implement equals().
809    // We just return either one.
810    if((dataLength&1)!=0) {
811        return trie.orphan();
812    } else {
813        return new UCharsTrie(trieUChars2.getBuffer());
814    }
815}
816
817void UCharsTrieTest::checkFirst(UCharsTrie &trie,
818                                const StringAndValue data[], int32_t dataLength) {
819    for(int32_t i=0; i<dataLength; ++i) {
820        if(*data[i].s==0) {
821            continue;  // skip empty string
822        }
823        UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
824        UChar32 c=expectedString[0];
825        UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0;
826        UStringTrieResult firstResult=trie.first(c);
827        int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
828        UStringTrieResult nextResult=trie.next(nextCp);
829        if(firstResult!=trie.reset().next(c) ||
830           firstResult!=trie.current() ||
831           firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
832           nextResult!=trie.next(nextCp)
833        ) {
834            errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
835                  c, data[i].s);
836        }
837        c=expectedString.char32At(0);
838        int32_t cLength=U16_LENGTH(c);
839        nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0;
840        firstResult=trie.firstForCodePoint(c);
841        firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
842        nextResult=trie.nextForCodePoint(nextCp);
843        if(firstResult!=trie.reset().nextForCodePoint(c) ||
844           firstResult!=trie.current() ||
845           firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
846           nextResult!=trie.nextForCodePoint(nextCp)
847        ) {
848            errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
849                  c, data[i].s);
850        }
851    }
852    trie.reset();
853}
854
855void UCharsTrieTest::checkNext(UCharsTrie &trie,
856                               const StringAndValue data[], int32_t dataLength) {
857    UCharsTrie::State state;
858    for(int32_t i=0; i<dataLength; ++i) {
859        UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
860        int32_t stringLength= (i&1) ? -1 : expectedString.length();
861        UStringTrieResult result;
862        if( !USTRINGTRIE_HAS_VALUE(
863                result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) ||
864            result!=trie.current()
865        ) {
866            errln("trie does not seem to contain %s", data[i].s);
867        } else if(trie.getValue()!=data[i].value) {
868            errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
869                  data[i].s,
870                  (long)trie.getValue(), (long)trie.getValue(),
871                  (long)data[i].value, (long)data[i].value);
872        } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
873            errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
874        }
875        trie.reset();
876        stringLength=expectedString.length();
877        result=trie.current();
878        for(int32_t j=0; j<stringLength; ++j) {
879            if(!USTRINGTRIE_HAS_NEXT(result)) {
880                errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
881                break;
882            }
883            if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
884                trie.getValue();
885                if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
886                    errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
887                    break;
888                }
889            }
890            result=trie.next(expectedString[j]);
891            if(!USTRINGTRIE_MATCHES(result)) {
892                errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
893                break;
894            }
895            if(result!=trie.current()) {
896                errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
897                break;
898            }
899        }
900        if(!USTRINGTRIE_HAS_VALUE(result)) {
901            errln("trie.next()!=hasValue at the end of %s", data[i].s);
902            continue;
903        }
904        trie.getValue();
905        if(result!=trie.current()) {
906            errln("trie.current() != current()+getValue()+current() after end of %s",
907                  data[i].s);
908        }
909        // Compare the final current() with whether next() can actually continue.
910        trie.saveState(state);
911        UBool nextContinues=FALSE;
912        for(int32_t c=0x20; c<0xe000; ++c) {
913            if(c==0x80) {
914                c=0xd800;  // Check for ASCII and surrogates but not all of the BMP.
915            }
916            if(trie.resetToState(state).next(c)) {
917                nextContinues=TRUE;
918                break;
919            }
920        }
921        if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
922            errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
923                  "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
924        }
925        trie.reset();
926    }
927}
928
929void UCharsTrieTest::checkNextWithState(UCharsTrie &trie,
930                                        const StringAndValue data[], int32_t dataLength) {
931    UCharsTrie::State noState, state;
932    for(int32_t i=0; i<dataLength; ++i) {
933        if((i&1)==0) {
934            // This should have no effect.
935            trie.resetToState(noState);
936        }
937        UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
938        int32_t stringLength=expectedString.length();
939        int32_t partialLength=stringLength/3;
940        for(int32_t j=0; j<partialLength; ++j) {
941            if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
942                errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
943                return;
944            }
945        }
946        trie.saveState(state);
947        UStringTrieResult resultAtState=trie.current();
948        UStringTrieResult result;
949        int32_t valueAtState=-99;
950        if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
951            valueAtState=trie.getValue();
952        }
953        result=trie.next(0);  // mismatch
954        if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
955            errln("trie.next(0) matched after part of %s", data[i].s);
956        }
957        if( resultAtState!=trie.resetToState(state).current() ||
958            (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
959        ) {
960            errln("trie.next(part of %s) changes current()/getValue() after "
961                  "saveState/next(0)/resetToState",
962                  data[i].s);
963        } else if(!USTRINGTRIE_HAS_VALUE(
964                      result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
965                                       stringLength-partialLength)) ||
966                  result!=trie.current()) {
967            errln("trie.next(rest of %s) does not seem to contain %s after "
968                  "saveState/next(0)/resetToState",
969                  data[i].s, data[i].s);
970        } else if(!USTRINGTRIE_HAS_VALUE(
971                      result=trie.resetToState(state).
972                                  next(expectedString.getTerminatedBuffer()+partialLength,
973                                       stringLength-partialLength)) ||
974                  result!=trie.current()) {
975            errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
976                  data[i].s);
977        } else if(trie.getValue()!=data[i].value) {
978            errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
979                  data[i].s,
980                  (long)trie.getValue(), (long)trie.getValue(),
981                  (long)data[i].value, (long)data[i].value);
982        }
983        trie.reset();
984    }
985}
986
987// next(string) is also tested in other functions,
988// but here we try to go partway through the string, and then beyond it.
989void UCharsTrieTest::checkNextString(UCharsTrie &trie,
990                                     const StringAndValue data[], int32_t dataLength) {
991    for(int32_t i=0; i<dataLength; ++i) {
992        UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
993        int32_t stringLength=expectedString.length();
994        if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) {
995            errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
996            continue;
997        }
998        // Test that we stop properly at the end of the string.
999        if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2,
1000                     stringLength+1-stringLength/2)) {
1001            errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
1002        }
1003        trie.reset();
1004    }
1005}
1006
1007void UCharsTrieTest::checkIterator(UCharsTrie &trie,
1008                                   const StringAndValue data[], int32_t dataLength) {
1009    IcuTestErrorCode errorCode(*this, "checkIterator()");
1010    UCharsTrie::Iterator iter(trie, 0, errorCode);
1011    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
1012        return;
1013    }
1014    checkIterator(iter, data, dataLength);
1015}
1016
1017void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter,
1018                                   const StringAndValue data[], int32_t dataLength) {
1019    IcuTestErrorCode errorCode(*this, "checkIterator()");
1020    for(int32_t i=0; i<dataLength; ++i) {
1021        if(!iter.hasNext()) {
1022            errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
1023            break;
1024        }
1025        UBool hasNext=iter.next(errorCode);
1026        if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
1027            break;
1028        }
1029        if(!hasNext) {
1030            errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
1031            break;
1032        }
1033        UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
1034        if(iter.getString()!=expectedString) {
1035            char buffer[1000];
1036            UnicodeString invString(prettify(iter.getString()));
1037            invString.extract(0, invString.length(), buffer, UPRV_LENGTHOF(buffer), US_INV);
1038            errln("trie iterator next().getString()=%s but expected %s for item %d",
1039                  buffer, data[i].s, (int)i);
1040        }
1041        if(iter.getValue()!=data[i].value) {
1042            errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
1043                  (long)iter.getValue(), (long)iter.getValue(),
1044                  (long)data[i].value, (long)data[i].value,
1045                  (int)i, data[i].s);
1046        }
1047    }
1048    if(iter.hasNext()) {
1049        errln("trie iterator hasNext()=TRUE after all items");
1050    }
1051    UBool hasNext=iter.next(errorCode);
1052    errorCode.logIfFailureAndReset("trie iterator next() after all items");
1053    if(hasNext) {
1054        errln("trie iterator next()=TRUE after all items");
1055    }
1056}
1057