1/*
2*******************************************************************************
3*   Copyright (C) 2010-2013, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   file name:  ucharstrietest.cpp
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11*   created on: 2010nov16
12*   created by: Markus W. Scherer
13*/
14
15#include <string.h>
16
17#include "unicode/utypes.h"
18#include "unicode/appendable.h"
19#include "unicode/localpointer.h"
20#include "unicode/ucharstrie.h"
21#include "unicode/ucharstriebuilder.h"
22#include "unicode/uniset.h"
23#include "unicode/unistr.h"
24#include "intltest.h"
25
26#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
27
28struct StringAndValue {
29    const char *s;
30    int32_t value;
31};
32
33class UCharsTrieTest : public IntlTest {
34public:
35    UCharsTrieTest();
36    virtual ~UCharsTrieTest();
37
38    void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
39    void TestBuilder();
40    void TestEmpty();
41    void Test_a();
42    void Test_a_ab();
43    void TestShortestBranch();
44    void TestBranches();
45    void TestLongSequence();
46    void TestLongBranch();
47    void TestValuesForState();
48    void TestCompact();
49    void TestFirstForCodePoint();
50    void TestNextForCodePoint();
51
52    UCharsTrie *buildLargeTrie(int32_t numUniqueFirst);
53    void TestLargeTrie();
54
55    UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
56    void TestHasUniqueValue();
57    void TestGetNextUChars();
58    void TestIteratorFromBranch();
59    void TestIteratorFromLinearMatch();
60    void TestTruncatingIteratorFromRoot();
61    void TestTruncatingIteratorFromLinearMatchShort();
62    void TestTruncatingIteratorFromLinearMatchLong();
63    void TestIteratorFromUChars();
64
65    void checkData(const StringAndValue data[], int32_t dataLength);
66    void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
67    UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
68                          UStringTrieBuildOption buildOption);
69    void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
70    void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
71    void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
72    void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
73    void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
74    void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
75
76private:
77    UCharsTrieBuilder *builder_;
78};
79
80extern IntlTest *createUCharsTrieTest() {
81    return new UCharsTrieTest();
82}
83
84UCharsTrieTest::UCharsTrieTest() : builder_(NULL) {
85    IcuTestErrorCode errorCode(*this, "UCharsTrieTest()");
86    builder_=new UCharsTrieBuilder(errorCode);
87}
88
89UCharsTrieTest::~UCharsTrieTest() {
90    delete builder_;
91}
92
93void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
94    if(exec) {
95        logln("TestSuite UCharsTrieTest: ");
96    }
97    TESTCASE_AUTO_BEGIN;
98    TESTCASE_AUTO(TestBuilder);
99    TESTCASE_AUTO(TestEmpty);
100    TESTCASE_AUTO(Test_a);
101    TESTCASE_AUTO(Test_a_ab);
102    TESTCASE_AUTO(TestShortestBranch);
103    TESTCASE_AUTO(TestBranches);
104    TESTCASE_AUTO(TestLongSequence);
105    TESTCASE_AUTO(TestLongBranch);
106    TESTCASE_AUTO(TestValuesForState);
107    TESTCASE_AUTO(TestCompact);
108    TESTCASE_AUTO(TestFirstForCodePoint);
109    TESTCASE_AUTO(TestNextForCodePoint);
110    TESTCASE_AUTO(TestLargeTrie);
111    TESTCASE_AUTO(TestHasUniqueValue);
112    TESTCASE_AUTO(TestGetNextUChars);
113    TESTCASE_AUTO(TestIteratorFromBranch);
114    TESTCASE_AUTO(TestIteratorFromLinearMatch);
115    TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
116    TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
117    TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
118    TESTCASE_AUTO(TestIteratorFromUChars);
119    TESTCASE_AUTO_END;
120}
121
122void UCharsTrieTest::TestBuilder() {
123    IcuTestErrorCode errorCode(*this, "TestBuilder()");
124    delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
125    if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
126        errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
127        return;
128    }
129    // TODO: remove .build(...) once add() checks for duplicates.
130    builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
131    if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
132        errln("UCharsTrieBuilder.add() did not detect duplicates");
133        return;
134    }
135}
136
137void UCharsTrieTest::TestEmpty() {
138    static const StringAndValue data[]={
139        { "", 0 }
140    };
141    checkData(data, LENGTHOF(data));
142}
143
144void UCharsTrieTest::Test_a() {
145    static const StringAndValue data[]={
146        { "a", 1 }
147    };
148    checkData(data, LENGTHOF(data));
149}
150
151void UCharsTrieTest::Test_a_ab() {
152    static const StringAndValue data[]={
153        { "a", 1 },
154        { "ab", 100 }
155    };
156    checkData(data, LENGTHOF(data));
157}
158
159void UCharsTrieTest::TestShortestBranch() {
160    static const StringAndValue data[]={
161        { "a", 1000 },
162        { "b", 2000 }
163    };
164    checkData(data, LENGTHOF(data));
165}
166
167void UCharsTrieTest::TestBranches() {
168    static const StringAndValue data[]={
169        { "a", 0x10 },
170        { "cc", 0x40 },
171        { "e", 0x100 },
172        { "ggg", 0x400 },
173        { "i", 0x1000 },
174        { "kkkk", 0x4000 },
175        { "n", 0x10000 },
176        { "ppppp", 0x40000 },
177        { "r", 0x100000 },
178        { "sss", 0x200000 },
179        { "t", 0x400000 },
180        { "uu", 0x800000 },
181        { "vv", 0x7fffffff },
182        { "zz", (int32_t)0x80000000 }
183    };
184    for(int32_t length=2; length<=LENGTHOF(data); ++length) {
185        logln("TestBranches length=%d", (int)length);
186        checkData(data, length);
187    }
188}
189
190void UCharsTrieTest::TestLongSequence() {
191    static const StringAndValue data[]={
192        { "a", -1 },
193        // sequence of linear-match nodes
194        { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
195        // more than 256 units
196        { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
197          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
198          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
199          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
200          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
201          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
202    };
203    checkData(data, LENGTHOF(data));
204}
205
206void UCharsTrieTest::TestLongBranch() {
207    // Split-branch and interesting compact-integer values.
208    static const StringAndValue data[]={
209        { "a", -2 },
210        { "b", -1 },
211        { "c", 0 },
212        { "d2", 1 },
213        { "f", 0x3f },
214        { "g", 0x40 },
215        { "h", 0x41 },
216        { "j23", 0x1900 },
217        { "j24", 0x19ff },
218        { "j25", 0x1a00 },
219        { "k2", 0x1a80 },
220        { "k3", 0x1aff },
221        { "l234567890", 0x1b00 },
222        { "l234567890123", 0x1b01 },
223        { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
224        { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
225        { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
226        { "r", 0x333333 },
227        { "s2345", 0x4444444 },
228        { "t234567890", 0x77777777 },
229        { "z", (int32_t)0x80000001 }
230    };
231    checkData(data, LENGTHOF(data));
232}
233
234void UCharsTrieTest::TestValuesForState() {
235    // Check that saveState() and resetToState() interact properly
236    // with next() and current().
237    static const StringAndValue data[]={
238        { "a", -1 },
239        { "ab", -2 },
240        { "abc", -3 },
241        { "abcd", -4 },
242        { "abcde", -5 },
243        { "abcdef", -6 }
244    };
245    checkData(data, LENGTHOF(data));
246}
247
248void UCharsTrieTest::TestCompact() {
249    // Duplicate trailing strings and values provide opportunities for compacting.
250    static const StringAndValue data[]={
251        { "+", 0 },
252        { "+august", 8 },
253        { "+december", 12 },
254        { "+july", 7 },
255        { "+june", 6 },
256        { "+november", 11 },
257        { "+october", 10 },
258        { "+september", 9 },
259        { "-", 0 },
260        { "-august", 8 },
261        { "-december", 12 },
262        { "-july", 7 },
263        { "-june", 6 },
264        { "-november", 11 },
265        { "-october", 10 },
266        { "-september", 9 },
267        // The l+n branch (with its sub-nodes) is a duplicate but will be written
268        // both times because each time it follows a different linear-match node.
269        { "xjuly", 7 },
270        { "xjune", 6 }
271    };
272    checkData(data, LENGTHOF(data));
273}
274
275void UCharsTrieTest::TestFirstForCodePoint() {
276    static const StringAndValue data[]={
277        { "a", 1 },
278        { "a\\ud800", 2 },
279        { "a\\U00010000", 3 },
280        { "\\ud840", 4 },
281        { "\\U00020000\\udbff", 5 },
282        { "\\U00020000\\U0010ffff", 6 },
283        { "\\U00020000\\U0010ffffz", 7 },
284        { "\\U00050000xy", 8 },
285        { "\\U00050000xyz", 9 }
286    };
287    checkData(data, LENGTHOF(data));
288}
289
290void UCharsTrieTest::TestNextForCodePoint() {
291    static const StringAndValue data[]={
292        { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
293        { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
294        { "\\u4dff\\U000103ff", 99999 }
295    };
296    LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
297    if(trie.isNull()) {
298        return;  // buildTrie() reported an error
299    }
300    UStringTrieResult result;
301    if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
302        (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
303        (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
304        (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
305        (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
306        (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
307        trie->getValue()!=2000000000
308    ) {
309        errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
310    }
311    if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
312        (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
313        (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
314        (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
315        trie->getValue()!=44444
316    ) {
317        errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
318    }
319    if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
320        (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
321        (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
322        (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current()  // no match for trail surrogate
323    ) {
324        errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
325    }
326    if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
327        (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
328        trie->getValue()!=99999
329    ) {
330        errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
331    }
332}
333
334// Definitions in the anonymous namespace are invisible outside this file.
335namespace {
336
337// Generate (string, value) pairs.
338// The first string (before next()) will be empty.
339class Generator {
340public:
341    Generator() : value(4711), num(0) {}
342    void next() {
343        UChar c;
344        s.truncate(0);
345        s.append(c=(UChar)(value>>16));
346        s.append((UChar)(value>>4));
347        if(value&1) {
348            s.append((UChar)value);
349        }
350        set.add(c);
351        value+=((value>>5)&0x7ff)*3+1;
352        ++num;
353    }
354    const UnicodeString &getString() const { return s; }
355    int32_t getValue() const { return value; }
356    int32_t countUniqueFirstChars() const { return set.size(); }
357    int32_t getIndex() const { return num; }
358
359private:
360    UnicodeString s;
361    UnicodeSet set;
362    int32_t value;
363    int32_t num;
364};
365
366}  // end namespace
367
368UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) {
369    IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
370    Generator gen;
371    builder_->clear();
372    while(gen.countUniqueFirstChars()<numUniqueFirst) {
373        builder_->add(gen.getString(), gen.getValue(), errorCode);
374        gen.next();
375    }
376    logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
377    UnicodeString trieUChars;
378    builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
379    logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
380    return new UCharsTrie(trieUChars.getBuffer());
381}
382
383// Exercise a large branch node.
384void UCharsTrieTest::TestLargeTrie() {
385    LocalPointer<UCharsTrie> trie(buildLargeTrie(1111));
386    if(trie.isNull()) {
387        return;  // buildTrie() reported an error
388    }
389    Generator gen;
390    while(gen.countUniqueFirstChars()<1111) {
391        UnicodeString x(gen.getString());
392        int32_t value=gen.getValue();
393        if(!x.isEmpty()) {
394            if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) {
395                errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
396                      x[0], (long)gen.getIndex());
397                break;
398            }
399            x.remove(0, 1);
400        }
401        UStringTrieResult result=trie->next(x.getBuffer(), x.length());
402        if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) {
403            errln("next(%d chars U+%04X U+%04X)!=hasValue or "
404                  "next()!=current() or getValue() wrong "
405                  "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
406            break;
407        }
408        gen.next();
409    }
410}
411
412enum {
413    u_a=0x61,
414    u_b=0x62,
415    u_c=0x63,
416    u_j=0x6a,
417    u_n=0x6e,
418    u_r=0x72,
419    u_u=0x75,
420    u_y=0x79
421};
422
423UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
424    // All types of nodes leading to the same value,
425    // for code coverage of recursive functions.
426    // In particular, we need a lot of branches on some single level
427    // to exercise a split-branch node.
428    static const StringAndValue data[]={
429        { "august", 8 },
430        { "jan", 1 },
431        { "jan.", 1 },
432        { "jana", 1 },
433        { "janbb", 1 },
434        { "janc", 1 },
435        { "janddd", 1 },
436        { "janee", 1 },
437        { "janef", 1 },
438        { "janf", 1 },
439        { "jangg", 1 },
440        { "janh", 1 },
441        { "janiiii", 1 },
442        { "janj", 1 },
443        { "jankk", 1 },
444        { "jankl", 1 },
445        { "jankmm", 1 },
446        { "janl", 1 },
447        { "janm", 1 },
448        { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
449        { "jano", 1 },
450        { "janpp", 1 },
451        { "janqqq", 1 },
452        { "janr", 1 },
453        { "januar", 1 },
454        { "january", 1 },
455        { "july", 7 },
456        { "jun", 6 },
457        { "jun.", 6 },
458        { "june", 6 }
459    };
460    return buildTrie(data, LENGTHOF(data), buildOption);
461}
462
463void UCharsTrieTest::TestHasUniqueValue() {
464    LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
465    if(trie.isNull()) {
466        return;  // buildTrie() reported an error
467    }
468    int32_t uniqueValue;
469    if(trie->hasUniqueValue(uniqueValue)) {
470        errln("unique value at root");
471    }
472    trie->next(u_j);
473    trie->next(u_a);
474    trie->next(u_n);
475    // hasUniqueValue() directly after next()
476    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
477        errln("not unique value 1 after \"jan\"");
478    }
479    trie->first(u_j);
480    trie->next(u_u);
481    if(trie->hasUniqueValue(uniqueValue)) {
482        errln("unique value after \"ju\"");
483    }
484    if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
485        errln("not normal value 6 after \"jun\"");
486    }
487    // hasUniqueValue() after getValue()
488    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
489        errln("not unique value 6 after \"jun\"");
490    }
491    // hasUniqueValue() from within a linear-match node
492    trie->first(u_a);
493    trie->next(u_u);
494    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
495        errln("not unique value 8 after \"au\"");
496    }
497}
498
499void UCharsTrieTest::TestGetNextUChars() {
500    LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
501    if(trie.isNull()) {
502        return;  // buildTrie() reported an error
503    }
504    UnicodeString buffer;
505    UnicodeStringAppendable app(buffer);
506    int32_t count=trie->getNextUChars(app);
507    if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) {
508        errln("months getNextUChars()!=[aj] at root");
509    }
510    trie->next(u_j);
511    trie->next(u_a);
512    trie->next(u_n);
513    // getNextUChars() directly after next()
514    buffer.remove();
515    count=trie->getNextUChars(app);
516    if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
517        errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
518    }
519    // getNextUChars() after getValue()
520    trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
521    buffer.remove();
522    count=trie->getNextUChars(app);
523    if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
524        errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
525    }
526    // getNextUChars() from a linear-match node
527    trie->next(u_u);
528    buffer.remove();
529    count=trie->getNextUChars(app);
530    if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) {
531        errln("months getNextUChars()!=[a] after \"janu\"");
532    }
533    trie->next(u_a);
534    buffer.remove();
535    count=trie->getNextUChars(app);
536    if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) {
537        errln("months getNextUChars()!=[r] after \"janua\"");
538    }
539    trie->next(u_r);
540    trie->next(u_y);
541    // getNextUChars() after a final match
542    buffer.remove();
543    count=trie->getNextUChars(app);
544    if(count!=0 || buffer.length()!=0) {
545        errln("months getNextUChars()!=[] after \"january\"");
546    }
547}
548
549void UCharsTrieTest::TestIteratorFromBranch() {
550    LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
551    if(trie.isNull()) {
552        return;  // buildTrie() reported an error
553    }
554    // Go to a branch node.
555    trie->next(u_j);
556    trie->next(u_a);
557    trie->next(u_n);
558    IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
559    UCharsTrie::Iterator iter(*trie, 0, errorCode);
560    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
561        return;
562    }
563    // Expected data: Same as in buildMonthsTrie(), except only the suffixes
564    // following "jan".
565    static const StringAndValue data[]={
566        { "", 1 },
567        { ".", 1 },
568        { "a", 1 },
569        { "bb", 1 },
570        { "c", 1 },
571        { "ddd", 1 },
572        { "ee", 1 },
573        { "ef", 1 },
574        { "f", 1 },
575        { "gg", 1 },
576        { "h", 1 },
577        { "iiii", 1 },
578        { "j", 1 },
579        { "kk", 1 },
580        { "kl", 1 },
581        { "kmm", 1 },
582        { "l", 1 },
583        { "m", 1 },
584        { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
585        { "o", 1 },
586        { "pp", 1 },
587        { "qqq", 1 },
588        { "r", 1 },
589        { "uar", 1 },
590        { "uary", 1 }
591    };
592    checkIterator(iter, data, LENGTHOF(data));
593    // Reset, and we should get the same result.
594    logln("after iter.reset()");
595    checkIterator(iter.reset(), data, LENGTHOF(data));
596}
597
598void UCharsTrieTest::TestIteratorFromLinearMatch() {
599    LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
600    if(trie.isNull()) {
601        return;  // buildTrie() reported an error
602    }
603    // Go into a linear-match node.
604    trie->next(u_j);
605    trie->next(u_a);
606    trie->next(u_n);
607    trie->next(u_u);
608    trie->next(u_a);
609    IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
610    UCharsTrie::Iterator iter(*trie, 0, errorCode);
611    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
612        return;
613    }
614    // Expected data: Same as in buildMonthsTrie(), except only the suffixes
615    // following "janua".
616    static const StringAndValue data[]={
617        { "r", 1 },
618        { "ry", 1 }
619    };
620    checkIterator(iter, data, LENGTHOF(data));
621    // Reset, and we should get the same result.
622    logln("after iter.reset()");
623    checkIterator(iter.reset(), data, LENGTHOF(data));
624}
625
626void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
627    LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
628    if(trie.isNull()) {
629        return;  // buildTrie() reported an error
630    }
631    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
632    UCharsTrie::Iterator iter(*trie, 4, errorCode);
633    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
634        return;
635    }
636    // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
637    // of each string, and no string duplicates from the truncation.
638    static const StringAndValue data[]={
639        { "augu", -1 },
640        { "jan", 1 },
641        { "jan.", 1 },
642        { "jana", 1 },
643        { "janb", -1 },
644        { "janc", 1 },
645        { "jand", -1 },
646        { "jane", -1 },
647        { "janf", 1 },
648        { "jang", -1 },
649        { "janh", 1 },
650        { "jani", -1 },
651        { "janj", 1 },
652        { "jank", -1 },
653        { "janl", 1 },
654        { "janm", 1 },
655        { "jann", -1 },
656        { "jano", 1 },
657        { "janp", -1 },
658        { "janq", -1 },
659        { "janr", 1 },
660        { "janu", -1 },
661        { "july", 7 },
662        { "jun", 6 },
663        { "jun.", 6 },
664        { "june", 6 }
665    };
666    checkIterator(iter, data, LENGTHOF(data));
667    // Reset, and we should get the same result.
668    logln("after iter.reset()");
669    checkIterator(iter.reset(), data, LENGTHOF(data));
670}
671
672void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
673    static const StringAndValue data[]={
674        { "abcdef", 10 },
675        { "abcdepq", 200 },
676        { "abcdeyz", 3000 }
677    };
678    LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
679    if(trie.isNull()) {
680        return;  // buildTrie() reported an error
681    }
682    // Go into a linear-match node.
683    trie->next(u_a);
684    trie->next(u_b);
685    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
686    // Truncate within the linear-match node.
687    UCharsTrie::Iterator iter(*trie, 2, errorCode);
688    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
689        return;
690    }
691    static const StringAndValue expected[]={
692        { "cd", -1 }
693    };
694    checkIterator(iter, expected, LENGTHOF(expected));
695    // Reset, and we should get the same result.
696    logln("after iter.reset()");
697    checkIterator(iter.reset(), expected, LENGTHOF(expected));
698}
699
700void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
701    static const StringAndValue data[]={
702        { "abcdef", 10 },
703        { "abcdepq", 200 },
704        { "abcdeyz", 3000 }
705    };
706    LocalPointer<UCharsTrie> trie(buildTrie(data, LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
707    if(trie.isNull()) {
708        return;  // buildTrie() reported an error
709    }
710    // Go into a linear-match node.
711    trie->next(u_a);
712    trie->next(u_b);
713    trie->next(u_c);
714    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
715    // Truncate after the linear-match node.
716    UCharsTrie::Iterator iter(*trie, 3, errorCode);
717    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
718        return;
719    }
720    static const StringAndValue expected[]={
721        { "def", 10 },
722        { "dep", -1 },
723        { "dey", -1 }
724    };
725    checkIterator(iter, expected, LENGTHOF(expected));
726    // Reset, and we should get the same result.
727    logln("after iter.reset()");
728    checkIterator(iter.reset(), expected, LENGTHOF(expected));
729}
730
731void UCharsTrieTest::TestIteratorFromUChars() {
732    static const StringAndValue data[]={
733        { "mm", 3 },
734        { "mmm", 33 },
735        { "mmnop", 333 }
736    };
737    builder_->clear();
738    IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()");
739    for(int32_t i=0; i<LENGTHOF(data); ++i) {
740        builder_->add(data[i].s, data[i].value, errorCode);
741    }
742    UnicodeString trieUChars;
743    builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
744    UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
745    checkIterator(iter, data, LENGTHOF(data));
746}
747
748void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
749    logln("checkData(dataLength=%d, fast)", (int)dataLength);
750    checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
751    logln("checkData(dataLength=%d, small)", (int)dataLength);
752    checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
753}
754
755void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
756    LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption));
757    if(trie.isNull()) {
758        return;  // buildTrie() reported an error
759    }
760    checkFirst(*trie, data, dataLength);
761    checkNext(*trie, data, dataLength);
762    checkNextWithState(*trie, data, dataLength);
763    checkNextString(*trie, data, dataLength);
764    checkIterator(*trie, data, dataLength);
765}
766
767UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
768                                      UStringTrieBuildOption buildOption) {
769    IcuTestErrorCode errorCode(*this, "buildTrie()");
770    // Add the items to the trie builder in an interesting (not trivial, not random) order.
771    int32_t index, step;
772    if(dataLength&1) {
773        // Odd number of items.
774        index=dataLength/2;
775        step=2;
776    } else if((dataLength%3)!=0) {
777        // Not a multiple of 3.
778        index=dataLength/5;
779        step=3;
780    } else {
781        index=dataLength-1;
782        step=-1;
783    }
784    builder_->clear();
785    for(int32_t i=0; i<dataLength; ++i) {
786        builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(),
787                      data[index].value, errorCode);
788        index=(index+step)%dataLength;
789    }
790    UnicodeString trieUChars;
791    builder_->buildUnicodeString(buildOption, trieUChars, errorCode);
792    LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode));
793    if(!errorCode.logIfFailureAndReset("add()/build()")) {
794        builder_->add("zzz", 999, errorCode);
795        if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
796            errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
797        }
798    }
799    logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
800    UnicodeString trieUChars2;
801    builder_->buildUnicodeString(buildOption, trieUChars2, errorCode);
802    if(trieUChars.getBuffer()==trieUChars2.getBuffer()) {
803        errln("builder.buildUnicodeString() before & after build() returned same array");
804    }
805    if(errorCode.isFailure()) {
806        return NULL;
807    }
808    // Tries from either build() method should be identical but
809    // UCharsTrie does not implement equals().
810    // We just return either one.
811    if((dataLength&1)!=0) {
812        return trie.orphan();
813    } else {
814        return new UCharsTrie(trieUChars2.getBuffer());
815    }
816}
817
818void UCharsTrieTest::checkFirst(UCharsTrie &trie,
819                                const StringAndValue data[], int32_t dataLength) {
820    for(int32_t i=0; i<dataLength; ++i) {
821        if(*data[i].s==0) {
822            continue;  // skip empty string
823        }
824        UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
825        UChar32 c=expectedString[0];
826        UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0;
827        UStringTrieResult firstResult=trie.first(c);
828        int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
829        UStringTrieResult nextResult=trie.next(nextCp);
830        if(firstResult!=trie.reset().next(c) ||
831           firstResult!=trie.current() ||
832           firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
833           nextResult!=trie.next(nextCp)
834        ) {
835            errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
836                  c, data[i].s);
837        }
838        c=expectedString.char32At(0);
839        int32_t cLength=U16_LENGTH(c);
840        nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0;
841        firstResult=trie.firstForCodePoint(c);
842        firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
843        nextResult=trie.nextForCodePoint(nextCp);
844        if(firstResult!=trie.reset().nextForCodePoint(c) ||
845           firstResult!=trie.current() ||
846           firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
847           nextResult!=trie.nextForCodePoint(nextCp)
848        ) {
849            errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
850                  c, data[i].s);
851        }
852    }
853    trie.reset();
854}
855
856void UCharsTrieTest::checkNext(UCharsTrie &trie,
857                               const StringAndValue data[], int32_t dataLength) {
858    UCharsTrie::State state;
859    for(int32_t i=0; i<dataLength; ++i) {
860        UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
861        int32_t stringLength= (i&1) ? -1 : expectedString.length();
862        UStringTrieResult result;
863        if( !USTRINGTRIE_HAS_VALUE(
864                result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) ||
865            result!=trie.current()
866        ) {
867            errln("trie does not seem to contain %s", data[i].s);
868        } else if(trie.getValue()!=data[i].value) {
869            errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
870                  data[i].s,
871                  (long)trie.getValue(), (long)trie.getValue(),
872                  (long)data[i].value, (long)data[i].value);
873        } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
874            errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
875        }
876        trie.reset();
877        stringLength=expectedString.length();
878        result=trie.current();
879        for(int32_t j=0; j<stringLength; ++j) {
880            if(!USTRINGTRIE_HAS_NEXT(result)) {
881                errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
882                break;
883            }
884            if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
885                trie.getValue();
886                if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
887                    errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
888                    break;
889                }
890            }
891            result=trie.next(expectedString[j]);
892            if(!USTRINGTRIE_MATCHES(result)) {
893                errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
894                break;
895            }
896            if(result!=trie.current()) {
897                errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
898                break;
899            }
900        }
901        if(!USTRINGTRIE_HAS_VALUE(result)) {
902            errln("trie.next()!=hasValue at the end of %s", data[i].s);
903            continue;
904        }
905        trie.getValue();
906        if(result!=trie.current()) {
907            errln("trie.current() != current()+getValue()+current() after end of %s",
908                  data[i].s);
909        }
910        // Compare the final current() with whether next() can actually continue.
911        trie.saveState(state);
912        UBool nextContinues=FALSE;
913        for(int32_t c=0x20; c<0xe000; ++c) {
914            if(c==0x80) {
915                c=0xd800;  // Check for ASCII and surrogates but not all of the BMP.
916            }
917            if(trie.resetToState(state).next(c)) {
918                nextContinues=TRUE;
919                break;
920            }
921        }
922        if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
923            errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
924                  "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
925        }
926        trie.reset();
927    }
928}
929
930void UCharsTrieTest::checkNextWithState(UCharsTrie &trie,
931                                        const StringAndValue data[], int32_t dataLength) {
932    UCharsTrie::State noState, state;
933    for(int32_t i=0; i<dataLength; ++i) {
934        if((i&1)==0) {
935            // This should have no effect.
936            trie.resetToState(noState);
937        }
938        UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
939        int32_t stringLength=expectedString.length();
940        int32_t partialLength=stringLength/3;
941        for(int32_t j=0; j<partialLength; ++j) {
942            if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
943                errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
944                return;
945            }
946        }
947        trie.saveState(state);
948        UStringTrieResult resultAtState=trie.current();
949        UStringTrieResult result;
950        int32_t valueAtState=-99;
951        if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
952            valueAtState=trie.getValue();
953        }
954        result=trie.next(0);  // mismatch
955        if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
956            errln("trie.next(0) matched after part of %s", data[i].s);
957        }
958        if( resultAtState!=trie.resetToState(state).current() ||
959            (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
960        ) {
961            errln("trie.next(part of %s) changes current()/getValue() after "
962                  "saveState/next(0)/resetToState",
963                  data[i].s);
964        } else if(!USTRINGTRIE_HAS_VALUE(
965                      result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
966                                       stringLength-partialLength)) ||
967                  result!=trie.current()) {
968            errln("trie.next(rest of %s) does not seem to contain %s after "
969                  "saveState/next(0)/resetToState",
970                  data[i].s, data[i].s);
971        } else if(!USTRINGTRIE_HAS_VALUE(
972                      result=trie.resetToState(state).
973                                  next(expectedString.getTerminatedBuffer()+partialLength,
974                                       stringLength-partialLength)) ||
975                  result!=trie.current()) {
976            errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
977                  data[i].s);
978        } else if(trie.getValue()!=data[i].value) {
979            errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
980                  data[i].s,
981                  (long)trie.getValue(), (long)trie.getValue(),
982                  (long)data[i].value, (long)data[i].value);
983        }
984        trie.reset();
985    }
986}
987
988// next(string) is also tested in other functions,
989// but here we try to go partway through the string, and then beyond it.
990void UCharsTrieTest::checkNextString(UCharsTrie &trie,
991                                     const StringAndValue data[], int32_t dataLength) {
992    for(int32_t i=0; i<dataLength; ++i) {
993        UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
994        int32_t stringLength=expectedString.length();
995        if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) {
996            errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
997            continue;
998        }
999        // Test that we stop properly at the end of the string.
1000        if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2,
1001                     stringLength+1-stringLength/2)) {
1002            errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
1003        }
1004        trie.reset();
1005    }
1006}
1007
1008void UCharsTrieTest::checkIterator(UCharsTrie &trie,
1009                                   const StringAndValue data[], int32_t dataLength) {
1010    IcuTestErrorCode errorCode(*this, "checkIterator()");
1011    UCharsTrie::Iterator iter(trie, 0, errorCode);
1012    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
1013        return;
1014    }
1015    checkIterator(iter, data, dataLength);
1016}
1017
1018void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter,
1019                                   const StringAndValue data[], int32_t dataLength) {
1020    IcuTestErrorCode errorCode(*this, "checkIterator()");
1021    for(int32_t i=0; i<dataLength; ++i) {
1022        if(!iter.hasNext()) {
1023            errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
1024            break;
1025        }
1026        UBool hasNext=iter.next(errorCode);
1027        if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
1028            break;
1029        }
1030        if(!hasNext) {
1031            errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
1032            break;
1033        }
1034        UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
1035        if(iter.getString()!=expectedString) {
1036            char buffer[1000];
1037            UnicodeString invString(prettify(iter.getString()));
1038            invString.extract(0, invString.length(), buffer, LENGTHOF(buffer), US_INV);
1039            errln("trie iterator next().getString()=%s but expected %s for item %d",
1040                  buffer, data[i].s, (int)i);
1041        }
1042        if(iter.getValue()!=data[i].value) {
1043            errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
1044                  (long)iter.getValue(), (long)iter.getValue(),
1045                  (long)data[i].value, (long)data[i].value,
1046                  (int)i, data[i].s);
1047        }
1048    }
1049    if(iter.hasNext()) {
1050        errln("trie iterator hasNext()=TRUE after all items");
1051    }
1052    UBool hasNext=iter.next(errorCode);
1053    errorCode.logIfFailureAndReset("trie iterator next() after all items");
1054    if(hasNext) {
1055        errln("trie iterator next()=TRUE after all items");
1056    }
1057}
1058