1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*   Copyright (C) 2010-2014, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7*******************************************************************************
8*   file name:  ucharstrietest.cpp
9*   encoding:   UTF-8
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2010nov16
14*   created by: Markus W. Scherer
15*/
16
17#include <string.h>
18
19#include "unicode/utypes.h"
20#include "unicode/appendable.h"
21#include "unicode/localpointer.h"
22#include "unicode/ucharstrie.h"
23#include "unicode/ucharstriebuilder.h"
24#include "unicode/uniset.h"
25#include "unicode/unistr.h"
26#include "unicode/utf16.h"
27#include "intltest.h"
28#include "cmemory.h"
29
30struct StringAndValue {
31    const char *s;
32    int32_t value;
33};
34
35class UCharsTrieTest : public IntlTest {
36public:
37    UCharsTrieTest();
38    virtual ~UCharsTrieTest();
39
40    void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
41    void TestBuilder();
42    void TestEmpty();
43    void Test_a();
44    void Test_a_ab();
45    void TestShortestBranch();
46    void TestBranches();
47    void TestLongSequence();
48    void TestLongBranch();
49    void TestValuesForState();
50    void TestCompact();
51    void TestFirstForCodePoint();
52    void TestNextForCodePoint();
53
54    UCharsTrie *buildLargeTrie(int32_t numUniqueFirst);
55    void TestLargeTrie();
56
57    UCharsTrie *buildMonthsTrie(UStringTrieBuildOption buildOption);
58    void TestHasUniqueValue();
59    void TestGetNextUChars();
60    void TestIteratorFromBranch();
61    void TestIteratorFromLinearMatch();
62    void TestTruncatingIteratorFromRoot();
63    void TestTruncatingIteratorFromLinearMatchShort();
64    void TestTruncatingIteratorFromLinearMatchLong();
65    void TestIteratorFromUChars();
66
67    void checkData(const StringAndValue data[], int32_t dataLength);
68    void checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption);
69    UCharsTrie *buildTrie(const StringAndValue data[], int32_t dataLength,
70                          UStringTrieBuildOption buildOption);
71    void checkFirst(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
72    void checkNext(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
73    void checkNextWithState(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
74    void checkNextString(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
75    void checkIterator(UCharsTrie &trie, const StringAndValue data[], int32_t dataLength);
76    void checkIterator(UCharsTrie::Iterator &iter, const StringAndValue data[], int32_t dataLength);
77
78private:
79    UCharsTrieBuilder *builder_;
80};
81
82extern IntlTest *createUCharsTrieTest() {
83    return new UCharsTrieTest();
84}
85
86UCharsTrieTest::UCharsTrieTest() : builder_(NULL) {
87    IcuTestErrorCode errorCode(*this, "UCharsTrieTest()");
88    builder_=new UCharsTrieBuilder(errorCode);
89}
90
91UCharsTrieTest::~UCharsTrieTest() {
92    delete builder_;
93}
94
95void UCharsTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
96    if(exec) {
97        logln("TestSuite UCharsTrieTest: ");
98    }
99    TESTCASE_AUTO_BEGIN;
100    TESTCASE_AUTO(TestBuilder);
101    TESTCASE_AUTO(TestEmpty);
102    TESTCASE_AUTO(Test_a);
103    TESTCASE_AUTO(Test_a_ab);
104    TESTCASE_AUTO(TestShortestBranch);
105    TESTCASE_AUTO(TestBranches);
106    TESTCASE_AUTO(TestLongSequence);
107    TESTCASE_AUTO(TestLongBranch);
108    TESTCASE_AUTO(TestValuesForState);
109    TESTCASE_AUTO(TestCompact);
110    TESTCASE_AUTO(TestFirstForCodePoint);
111    TESTCASE_AUTO(TestNextForCodePoint);
112    TESTCASE_AUTO(TestLargeTrie);
113    TESTCASE_AUTO(TestHasUniqueValue);
114    TESTCASE_AUTO(TestGetNextUChars);
115    TESTCASE_AUTO(TestIteratorFromBranch);
116    TESTCASE_AUTO(TestIteratorFromLinearMatch);
117    TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
118    TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
119    TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
120    TESTCASE_AUTO(TestIteratorFromUChars);
121    TESTCASE_AUTO_END;
122}
123
124void UCharsTrieTest::TestBuilder() {
125    IcuTestErrorCode errorCode(*this, "TestBuilder()");
126    delete builder_->build(USTRINGTRIE_BUILD_FAST, errorCode);
127    if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
128        errln("UCharsTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
129        return;
130    }
131    // TODO: remove .build(...) once add() checks for duplicates.
132    builder_->add("=", 0, errorCode).add("=", 1, errorCode).build(USTRINGTRIE_BUILD_FAST, errorCode);
133    if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
134        errln("UCharsTrieBuilder.add() did not detect duplicates");
135        return;
136    }
137}
138
139void UCharsTrieTest::TestEmpty() {
140    static const StringAndValue data[]={
141        { "", 0 }
142    };
143    checkData(data, UPRV_LENGTHOF(data));
144}
145
146void UCharsTrieTest::Test_a() {
147    static const StringAndValue data[]={
148        { "a", 1 }
149    };
150    checkData(data, UPRV_LENGTHOF(data));
151}
152
153void UCharsTrieTest::Test_a_ab() {
154    static const StringAndValue data[]={
155        { "a", 1 },
156        { "ab", 100 }
157    };
158    checkData(data, UPRV_LENGTHOF(data));
159}
160
161void UCharsTrieTest::TestShortestBranch() {
162    static const StringAndValue data[]={
163        { "a", 1000 },
164        { "b", 2000 }
165    };
166    checkData(data, UPRV_LENGTHOF(data));
167}
168
169void UCharsTrieTest::TestBranches() {
170    static const StringAndValue data[]={
171        { "a", 0x10 },
172        { "cc", 0x40 },
173        { "e", 0x100 },
174        { "ggg", 0x400 },
175        { "i", 0x1000 },
176        { "kkkk", 0x4000 },
177        { "n", 0x10000 },
178        { "ppppp", 0x40000 },
179        { "r", 0x100000 },
180        { "sss", 0x200000 },
181        { "t", 0x400000 },
182        { "uu", 0x800000 },
183        { "vv", 0x7fffffff },
184        { "zz", (int32_t)0x80000000 }
185    };
186    for(int32_t length=2; length<=UPRV_LENGTHOF(data); ++length) {
187        logln("TestBranches length=%d", (int)length);
188        checkData(data, length);
189    }
190}
191
192void UCharsTrieTest::TestLongSequence() {
193    static const StringAndValue data[]={
194        { "a", -1 },
195        // sequence of linear-match nodes
196        { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
197        // more than 256 units
198        { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
199          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
200          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
201          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
202          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
203          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
204    };
205    checkData(data, UPRV_LENGTHOF(data));
206}
207
208void UCharsTrieTest::TestLongBranch() {
209    // Split-branch and interesting compact-integer values.
210    static const StringAndValue data[]={
211        { "a", -2 },
212        { "b", -1 },
213        { "c", 0 },
214        { "d2", 1 },
215        { "f", 0x3f },
216        { "g", 0x40 },
217        { "h", 0x41 },
218        { "j23", 0x1900 },
219        { "j24", 0x19ff },
220        { "j25", 0x1a00 },
221        { "k2", 0x1a80 },
222        { "k3", 0x1aff },
223        { "l234567890", 0x1b00 },
224        { "l234567890123", 0x1b01 },
225        { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
226        { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
227        { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
228        { "r", 0x333333 },
229        { "s2345", 0x4444444 },
230        { "t234567890", 0x77777777 },
231        { "z", (int32_t)0x80000001 }
232    };
233    checkData(data, UPRV_LENGTHOF(data));
234}
235
236void UCharsTrieTest::TestValuesForState() {
237    // Check that saveState() and resetToState() interact properly
238    // with next() and current().
239    static const StringAndValue data[]={
240        { "a", -1 },
241        { "ab", -2 },
242        { "abc", -3 },
243        { "abcd", -4 },
244        { "abcde", -5 },
245        { "abcdef", -6 }
246    };
247    checkData(data, UPRV_LENGTHOF(data));
248}
249
250void UCharsTrieTest::TestCompact() {
251    // Duplicate trailing strings and values provide opportunities for compacting.
252    static const StringAndValue data[]={
253        { "+", 0 },
254        { "+august", 8 },
255        { "+december", 12 },
256        { "+july", 7 },
257        { "+june", 6 },
258        { "+november", 11 },
259        { "+october", 10 },
260        { "+september", 9 },
261        { "-", 0 },
262        { "-august", 8 },
263        { "-december", 12 },
264        { "-july", 7 },
265        { "-june", 6 },
266        { "-november", 11 },
267        { "-october", 10 },
268        { "-september", 9 },
269        // The l+n branch (with its sub-nodes) is a duplicate but will be written
270        // both times because each time it follows a different linear-match node.
271        { "xjuly", 7 },
272        { "xjune", 6 }
273    };
274    checkData(data, UPRV_LENGTHOF(data));
275}
276
277void UCharsTrieTest::TestFirstForCodePoint() {
278    static const StringAndValue data[]={
279        { "a", 1 },
280        { "a\\ud800", 2 },
281        { "a\\U00010000", 3 },
282        { "\\ud840", 4 },
283        { "\\U00020000\\udbff", 5 },
284        { "\\U00020000\\U0010ffff", 6 },
285        { "\\U00020000\\U0010ffffz", 7 },
286        { "\\U00050000xy", 8 },
287        { "\\U00050000xyz", 9 }
288    };
289    checkData(data, UPRV_LENGTHOF(data));
290}
291
292void UCharsTrieTest::TestNextForCodePoint() {
293    static const StringAndValue data[]={
294        { "\\u4dff\\U00010000\\u9999\\U00020000\\udfff\\U0010ffff", 2000000000 },
295        { "\\u4dff\\U00010000\\u9999\\U00020002", 44444 },
296        { "\\u4dff\\U000103ff", 99999 }
297    };
298    LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
299    if(trie.isNull()) {
300        return;  // buildTrie() reported an error
301    }
302    UStringTrieResult result;
303    if( (result=trie->nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
304        (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
305        (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
306        (result=trie->nextForCodePoint(0x20000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
307        (result=trie->nextForCodePoint(0xdfff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
308        (result=trie->nextForCodePoint(0x10ffff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
309        trie->getValue()!=2000000000
310    ) {
311        errln("UCharsTrie.nextForCodePoint() fails for %s", data[0].s);
312    }
313    if( (result=trie->firstForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
314        (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
315        (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
316        (result=trie->nextForCodePoint(0x20002))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
317        trie->getValue()!=44444
318    ) {
319        errln("UCharsTrie.nextForCodePoint() fails for %s", data[1].s);
320    }
321    if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
322        (result=trie->nextForCodePoint(0x10000))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
323        (result=trie->nextForCodePoint(0x9999))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
324        (result=trie->nextForCodePoint(0x20222))!=USTRINGTRIE_NO_MATCH || result!=trie->current()  // no match for trail surrogate
325    ) {
326        errln("UCharsTrie.nextForCodePoint() fails for \\u4dff\\U00010000\\u9999\\U00020222");
327    }
328    if( (result=trie->reset().nextForCodePoint(0x4dff))!=USTRINGTRIE_NO_VALUE || result!=trie->current() ||
329        (result=trie->nextForCodePoint(0x103ff))!=USTRINGTRIE_FINAL_VALUE || result!=trie->current() ||
330        trie->getValue()!=99999
331    ) {
332        errln("UCharsTrie.nextForCodePoint() fails for %s", data[2].s);
333    }
334}
335
336// Definitions in the anonymous namespace are invisible outside this file.
337namespace {
338
339// Generate (string, value) pairs.
340// The first string (before next()) will be empty.
341class Generator {
342public:
343    Generator() : value(4711), num(0) {}
344    void next() {
345        UChar c;
346        s.truncate(0);
347        s.append(c=(UChar)(value>>16));
348        s.append((UChar)(value>>4));
349        if(value&1) {
350            s.append((UChar)value);
351        }
352        set.add(c);
353        value+=((value>>5)&0x7ff)*3+1;
354        ++num;
355    }
356    const UnicodeString &getString() const { return s; }
357    int32_t getValue() const { return value; }
358    int32_t countUniqueFirstChars() const { return set.size(); }
359    int32_t getIndex() const { return num; }
360
361private:
362    UnicodeString s;
363    UnicodeSet set;
364    int32_t value;
365    int32_t num;
366};
367
368}  // end namespace
369
370UCharsTrie *UCharsTrieTest::buildLargeTrie(int32_t numUniqueFirst) {
371    IcuTestErrorCode errorCode(*this, "buildLargeTrie()");
372    Generator gen;
373    builder_->clear();
374    while(gen.countUniqueFirstChars()<numUniqueFirst) {
375        builder_->add(gen.getString(), gen.getValue(), errorCode);
376        gen.next();
377    }
378    logln("buildLargeTrie(%ld) added %ld strings", (long)numUniqueFirst, (long)gen.getIndex());
379    UnicodeString trieUChars;
380    builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
381    logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
382    return new UCharsTrie(trieUChars.getBuffer());
383}
384
385// Exercise a large branch node.
386void UCharsTrieTest::TestLargeTrie() {
387    LocalPointer<UCharsTrie> trie(buildLargeTrie(1111));
388    if(trie.isNull()) {
389        return;  // buildTrie() reported an error
390    }
391    Generator gen;
392    while(gen.countUniqueFirstChars()<1111) {
393        UnicodeString x(gen.getString());
394        int32_t value=gen.getValue();
395        if(!x.isEmpty()) {
396            if(trie->first(x[0])==USTRINGTRIE_NO_MATCH) {
397                errln("first(first char U+%04X)=USTRINGTRIE_NO_MATCH for string %ld\n",
398                      x[0], (long)gen.getIndex());
399                break;
400            }
401            x.remove(0, 1);
402        }
403        UStringTrieResult result=trie->next(x.getBuffer(), x.length());
404        if(!USTRINGTRIE_HAS_VALUE(result) || result!=trie->current() || value!=trie->getValue()) {
405            errln("next(%d chars U+%04X U+%04X)!=hasValue or "
406                  "next()!=current() or getValue() wrong "
407                  "for string %ld\n", (int)x.length(), x[0], x[1], (long)gen.getIndex());
408            break;
409        }
410        gen.next();
411    }
412}
413
414enum {
415    u_a=0x61,
416    u_b=0x62,
417    u_c=0x63,
418    u_j=0x6a,
419    u_n=0x6e,
420    u_r=0x72,
421    u_u=0x75,
422    u_y=0x79
423};
424
425UCharsTrie *UCharsTrieTest::buildMonthsTrie(UStringTrieBuildOption buildOption) {
426    // All types of nodes leading to the same value,
427    // for code coverage of recursive functions.
428    // In particular, we need a lot of branches on some single level
429    // to exercise a split-branch node.
430    static const StringAndValue data[]={
431        { "august", 8 },
432        { "jan", 1 },
433        { "jan.", 1 },
434        { "jana", 1 },
435        { "janbb", 1 },
436        { "janc", 1 },
437        { "janddd", 1 },
438        { "janee", 1 },
439        { "janef", 1 },
440        { "janf", 1 },
441        { "jangg", 1 },
442        { "janh", 1 },
443        { "janiiii", 1 },
444        { "janj", 1 },
445        { "jankk", 1 },
446        { "jankl", 1 },
447        { "jankmm", 1 },
448        { "janl", 1 },
449        { "janm", 1 },
450        { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
451        { "jano", 1 },
452        { "janpp", 1 },
453        { "janqqq", 1 },
454        { "janr", 1 },
455        { "januar", 1 },
456        { "january", 1 },
457        { "july", 7 },
458        { "jun", 6 },
459        { "jun.", 6 },
460        { "june", 6 }
461    };
462    return buildTrie(data, UPRV_LENGTHOF(data), buildOption);
463}
464
465void UCharsTrieTest::TestHasUniqueValue() {
466    LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
467    if(trie.isNull()) {
468        return;  // buildTrie() reported an error
469    }
470    int32_t uniqueValue;
471    if(trie->hasUniqueValue(uniqueValue)) {
472        errln("unique value at root");
473    }
474    trie->next(u_j);
475    trie->next(u_a);
476    trie->next(u_n);
477    // hasUniqueValue() directly after next()
478    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=1) {
479        errln("not unique value 1 after \"jan\"");
480    }
481    trie->first(u_j);
482    trie->next(u_u);
483    if(trie->hasUniqueValue(uniqueValue)) {
484        errln("unique value after \"ju\"");
485    }
486    if(trie->next(u_n)!=USTRINGTRIE_INTERMEDIATE_VALUE || 6!=trie->getValue()) {
487        errln("not normal value 6 after \"jun\"");
488    }
489    // hasUniqueValue() after getValue()
490    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=6) {
491        errln("not unique value 6 after \"jun\"");
492    }
493    // hasUniqueValue() from within a linear-match node
494    trie->first(u_a);
495    trie->next(u_u);
496    if(!trie->hasUniqueValue(uniqueValue) || uniqueValue!=8) {
497        errln("not unique value 8 after \"au\"");
498    }
499}
500
501void UCharsTrieTest::TestGetNextUChars() {
502    LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
503    if(trie.isNull()) {
504        return;  // buildTrie() reported an error
505    }
506    UnicodeString buffer;
507    UnicodeStringAppendable app(buffer);
508    int32_t count=trie->getNextUChars(app);
509    if(count!=2 || buffer.length()!=2 || buffer[0]!=u_a || buffer[1]!=u_j) {
510        errln("months getNextUChars()!=[aj] at root");
511    }
512    trie->next(u_j);
513    trie->next(u_a);
514    trie->next(u_n);
515    // getNextUChars() directly after next()
516    buffer.remove();
517    count=trie->getNextUChars(app);
518    if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
519        errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"");
520    }
521    // getNextUChars() after getValue()
522    trie->getValue();  // next() had returned USTRINGTRIE_INTERMEDIATE_VALUE.
523    buffer.remove();
524    count=trie->getNextUChars(app);
525    if(count!=20 || buffer!=UNICODE_STRING_SIMPLE(".abcdefghijklmnopqru")) {
526        errln("months getNextUChars()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
527    }
528    // getNextUChars() from a linear-match node
529    trie->next(u_u);
530    buffer.remove();
531    count=trie->getNextUChars(app);
532    if(count!=1 || buffer.length()!=1 || buffer[0]!=u_a) {
533        errln("months getNextUChars()!=[a] after \"janu\"");
534    }
535    trie->next(u_a);
536    buffer.remove();
537    count=trie->getNextUChars(app);
538    if(count!=1 || buffer.length()!=1 || buffer[0]!=u_r) {
539        errln("months getNextUChars()!=[r] after \"janua\"");
540    }
541    trie->next(u_r);
542    trie->next(u_y);
543    // getNextUChars() after a final match
544    buffer.remove();
545    count=trie->getNextUChars(app);
546    if(count!=0 || buffer.length()!=0) {
547        errln("months getNextUChars()!=[] after \"january\"");
548    }
549}
550
551void UCharsTrieTest::TestIteratorFromBranch() {
552    LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
553    if(trie.isNull()) {
554        return;  // buildTrie() reported an error
555    }
556    // Go to a branch node.
557    trie->next(u_j);
558    trie->next(u_a);
559    trie->next(u_n);
560    IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
561    UCharsTrie::Iterator iter(*trie, 0, errorCode);
562    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
563        return;
564    }
565    // Expected data: Same as in buildMonthsTrie(), except only the suffixes
566    // following "jan".
567    static const StringAndValue data[]={
568        { "", 1 },
569        { ".", 1 },
570        { "a", 1 },
571        { "bb", 1 },
572        { "c", 1 },
573        { "ddd", 1 },
574        { "ee", 1 },
575        { "ef", 1 },
576        { "f", 1 },
577        { "gg", 1 },
578        { "h", 1 },
579        { "iiii", 1 },
580        { "j", 1 },
581        { "kk", 1 },
582        { "kl", 1 },
583        { "kmm", 1 },
584        { "l", 1 },
585        { "m", 1 },
586        { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
587        { "o", 1 },
588        { "pp", 1 },
589        { "qqq", 1 },
590        { "r", 1 },
591        { "uar", 1 },
592        { "uary", 1 }
593    };
594    checkIterator(iter, data, UPRV_LENGTHOF(data));
595    // Reset, and we should get the same result.
596    logln("after iter.reset()");
597    checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
598}
599
600void UCharsTrieTest::TestIteratorFromLinearMatch() {
601    LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_SMALL));
602    if(trie.isNull()) {
603        return;  // buildTrie() reported an error
604    }
605    // Go into a linear-match node.
606    trie->next(u_j);
607    trie->next(u_a);
608    trie->next(u_n);
609    trie->next(u_u);
610    trie->next(u_a);
611    IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
612    UCharsTrie::Iterator iter(*trie, 0, errorCode);
613    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
614        return;
615    }
616    // Expected data: Same as in buildMonthsTrie(), except only the suffixes
617    // following "janua".
618    static const StringAndValue data[]={
619        { "r", 1 },
620        { "ry", 1 }
621    };
622    checkIterator(iter, data, UPRV_LENGTHOF(data));
623    // Reset, and we should get the same result.
624    logln("after iter.reset()");
625    checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
626}
627
628void UCharsTrieTest::TestTruncatingIteratorFromRoot() {
629    LocalPointer<UCharsTrie> trie(buildMonthsTrie(USTRINGTRIE_BUILD_FAST));
630    if(trie.isNull()) {
631        return;  // buildTrie() reported an error
632    }
633    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
634    UCharsTrie::Iterator iter(*trie, 4, errorCode);
635    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
636        return;
637    }
638    // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
639    // of each string, and no string duplicates from the truncation.
640    static const StringAndValue data[]={
641        { "augu", -1 },
642        { "jan", 1 },
643        { "jan.", 1 },
644        { "jana", 1 },
645        { "janb", -1 },
646        { "janc", 1 },
647        { "jand", -1 },
648        { "jane", -1 },
649        { "janf", 1 },
650        { "jang", -1 },
651        { "janh", 1 },
652        { "jani", -1 },
653        { "janj", 1 },
654        { "jank", -1 },
655        { "janl", 1 },
656        { "janm", 1 },
657        { "jann", -1 },
658        { "jano", 1 },
659        { "janp", -1 },
660        { "janq", -1 },
661        { "janr", 1 },
662        { "janu", -1 },
663        { "july", 7 },
664        { "jun", 6 },
665        { "jun.", 6 },
666        { "june", 6 }
667    };
668    checkIterator(iter, data, UPRV_LENGTHOF(data));
669    // Reset, and we should get the same result.
670    logln("after iter.reset()");
671    checkIterator(iter.reset(), data, UPRV_LENGTHOF(data));
672}
673
674void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
675    static const StringAndValue data[]={
676        { "abcdef", 10 },
677        { "abcdepq", 200 },
678        { "abcdeyz", 3000 }
679    };
680    LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
681    if(trie.isNull()) {
682        return;  // buildTrie() reported an error
683    }
684    // Go into a linear-match node.
685    trie->next(u_a);
686    trie->next(u_b);
687    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
688    // Truncate within the linear-match node.
689    UCharsTrie::Iterator iter(*trie, 2, errorCode);
690    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
691        return;
692    }
693    static const StringAndValue expected[]={
694        { "cd", -1 }
695    };
696    checkIterator(iter, expected, UPRV_LENGTHOF(expected));
697    // Reset, and we should get the same result.
698    logln("after iter.reset()");
699    checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
700}
701
702void UCharsTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
703    static const StringAndValue data[]={
704        { "abcdef", 10 },
705        { "abcdepq", 200 },
706        { "abcdeyz", 3000 }
707    };
708    LocalPointer<UCharsTrie> trie(buildTrie(data, UPRV_LENGTHOF(data), USTRINGTRIE_BUILD_FAST));
709    if(trie.isNull()) {
710        return;  // buildTrie() reported an error
711    }
712    // Go into a linear-match node.
713    trie->next(u_a);
714    trie->next(u_b);
715    trie->next(u_c);
716    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
717    // Truncate after the linear-match node.
718    UCharsTrie::Iterator iter(*trie, 3, errorCode);
719    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trie) constructor")) {
720        return;
721    }
722    static const StringAndValue expected[]={
723        { "def", 10 },
724        { "dep", -1 },
725        { "dey", -1 }
726    };
727    checkIterator(iter, expected, UPRV_LENGTHOF(expected));
728    // Reset, and we should get the same result.
729    logln("after iter.reset()");
730    checkIterator(iter.reset(), expected, UPRV_LENGTHOF(expected));
731}
732
733void UCharsTrieTest::TestIteratorFromUChars() {
734    static const StringAndValue data[]={
735        { "mm", 3 },
736        { "mmm", 33 },
737        { "mmnop", 333 }
738    };
739    builder_->clear();
740    IcuTestErrorCode errorCode(*this, "TestIteratorFromUChars()");
741    for(int32_t i=0; i<UPRV_LENGTHOF(data); ++i) {
742        builder_->add(data[i].s, data[i].value, errorCode);
743    }
744    UnicodeString trieUChars;
745    builder_->buildUnicodeString(USTRINGTRIE_BUILD_FAST, trieUChars, errorCode);
746    UCharsTrie::Iterator iter(trieUChars.getBuffer(), 0, errorCode);
747    checkIterator(iter, data, UPRV_LENGTHOF(data));
748}
749
750void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
751    logln("checkData(dataLength=%d, fast)", (int)dataLength);
752    checkData(data, dataLength, USTRINGTRIE_BUILD_FAST);
753    logln("checkData(dataLength=%d, small)", (int)dataLength);
754    checkData(data, dataLength, USTRINGTRIE_BUILD_SMALL);
755}
756
757void UCharsTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UStringTrieBuildOption buildOption) {
758    LocalPointer<UCharsTrie> trie(buildTrie(data, dataLength, buildOption));
759    if(trie.isNull()) {
760        return;  // buildTrie() reported an error
761    }
762    checkFirst(*trie, data, dataLength);
763    checkNext(*trie, data, dataLength);
764    checkNextWithState(*trie, data, dataLength);
765    checkNextString(*trie, data, dataLength);
766    checkIterator(*trie, data, dataLength);
767}
768
769UCharsTrie *UCharsTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
770                                      UStringTrieBuildOption buildOption) {
771    IcuTestErrorCode errorCode(*this, "buildTrie()");
772    // Add the items to the trie builder in an interesting (not trivial, not random) order.
773    int32_t index, step;
774    if(dataLength&1) {
775        // Odd number of items.
776        index=dataLength/2;
777        step=2;
778    } else if((dataLength%3)!=0) {
779        // Not a multiple of 3.
780        index=dataLength/5;
781        step=3;
782    } else {
783        index=dataLength-1;
784        step=-1;
785    }
786    builder_->clear();
787    for(int32_t i=0; i<dataLength; ++i) {
788        builder_->add(UnicodeString(data[index].s, -1, US_INV).unescape(),
789                      data[index].value, errorCode);
790        index=(index+step)%dataLength;
791    }
792    UnicodeString trieUChars;
793    builder_->buildUnicodeString(buildOption, trieUChars, errorCode);
794    LocalPointer<UCharsTrie> trie(builder_->build(buildOption, errorCode));
795    if(!errorCode.logIfFailureAndReset("add()/build()")) {
796        builder_->add("zzz", 999, errorCode);
797        if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
798            errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
799        }
800    }
801    logln("serialized trie size: %ld UChars\n", (long)trieUChars.length());
802    UnicodeString trieUChars2;
803    builder_->buildUnicodeString(buildOption, trieUChars2, errorCode);
804    if(trieUChars.getBuffer()==trieUChars2.getBuffer()) {
805        errln("builder.buildUnicodeString() before & after build() returned same array");
806    }
807    if(errorCode.isFailure()) {
808        return NULL;
809    }
810    // Tries from either build() method should be identical but
811    // UCharsTrie does not implement equals().
812    // We just return either one.
813    if((dataLength&1)!=0) {
814        return trie.orphan();
815    } else {
816        return new UCharsTrie(trieUChars2.getBuffer());
817    }
818}
819
820void UCharsTrieTest::checkFirst(UCharsTrie &trie,
821                                const StringAndValue data[], int32_t dataLength) {
822    for(int32_t i=0; i<dataLength; ++i) {
823        if(*data[i].s==0) {
824            continue;  // skip empty string
825        }
826        UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
827        UChar32 c=expectedString[0];
828        UChar32 nextCp=expectedString.length()>1 ? expectedString[1] : 0;
829        UStringTrieResult firstResult=trie.first(c);
830        int32_t firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
831        UStringTrieResult nextResult=trie.next(nextCp);
832        if(firstResult!=trie.reset().next(c) ||
833           firstResult!=trie.current() ||
834           firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
835           nextResult!=trie.next(nextCp)
836        ) {
837            errln("trie.first(U+%04X)!=trie.reset().next(same) for %s",
838                  c, data[i].s);
839        }
840        c=expectedString.char32At(0);
841        int32_t cLength=U16_LENGTH(c);
842        nextCp=expectedString.length()>cLength ? expectedString.char32At(cLength) : 0;
843        firstResult=trie.firstForCodePoint(c);
844        firstValue=USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1;
845        nextResult=trie.nextForCodePoint(nextCp);
846        if(firstResult!=trie.reset().nextForCodePoint(c) ||
847           firstResult!=trie.current() ||
848           firstValue!=(USTRINGTRIE_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
849           nextResult!=trie.nextForCodePoint(nextCp)
850        ) {
851            errln("trie.firstForCodePoint(U+%04X)!=trie.reset().nextForCodePoint(same) for %s",
852                  c, data[i].s);
853        }
854    }
855    trie.reset();
856}
857
858void UCharsTrieTest::checkNext(UCharsTrie &trie,
859                               const StringAndValue data[], int32_t dataLength) {
860    UCharsTrie::State state;
861    for(int32_t i=0; i<dataLength; ++i) {
862        UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
863        int32_t stringLength= (i&1) ? -1 : expectedString.length();
864        UStringTrieResult result;
865        if( !USTRINGTRIE_HAS_VALUE(
866                result=trie.next(expectedString.getTerminatedBuffer(), stringLength)) ||
867            result!=trie.current()
868        ) {
869            errln("trie does not seem to contain %s", data[i].s);
870        } else if(trie.getValue()!=data[i].value) {
871            errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
872                  data[i].s,
873                  (long)trie.getValue(), (long)trie.getValue(),
874                  (long)data[i].value, (long)data[i].value);
875        } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
876            errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
877        }
878        trie.reset();
879        stringLength=expectedString.length();
880        result=trie.current();
881        for(int32_t j=0; j<stringLength; ++j) {
882            if(!USTRINGTRIE_HAS_NEXT(result)) {
883                errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
884                break;
885            }
886            if(result==USTRINGTRIE_INTERMEDIATE_VALUE) {
887                trie.getValue();
888                if(trie.current()!=USTRINGTRIE_INTERMEDIATE_VALUE) {
889                    errln("trie.getValue().current()!=USTRINGTRIE_INTERMEDIATE_VALUE before end of %s (at index %d)", data[i].s, j);
890                    break;
891                }
892            }
893            result=trie.next(expectedString[j]);
894            if(!USTRINGTRIE_MATCHES(result)) {
895                errln("trie.next()=USTRINGTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
896                break;
897            }
898            if(result!=trie.current()) {
899                errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
900                break;
901            }
902        }
903        if(!USTRINGTRIE_HAS_VALUE(result)) {
904            errln("trie.next()!=hasValue at the end of %s", data[i].s);
905            continue;
906        }
907        trie.getValue();
908        if(result!=trie.current()) {
909            errln("trie.current() != current()+getValue()+current() after end of %s",
910                  data[i].s);
911        }
912        // Compare the final current() with whether next() can actually continue.
913        trie.saveState(state);
914        UBool nextContinues=FALSE;
915        for(int32_t c=0x20; c<0xe000; ++c) {
916            if(c==0x80) {
917                c=0xd800;  // Check for ASCII and surrogates but not all of the BMP.
918            }
919            if(trie.resetToState(state).next(c)) {
920                nextContinues=TRUE;
921                break;
922            }
923        }
924        if((result==USTRINGTRIE_INTERMEDIATE_VALUE)!=nextContinues) {
925            errln("(trie.current()==USTRINGTRIE_INTERMEDIATE_VALUE) contradicts "
926                  "(trie.next(some UChar)!=USTRINGTRIE_NO_MATCH) after end of %s", data[i].s);
927        }
928        trie.reset();
929    }
930}
931
932void UCharsTrieTest::checkNextWithState(UCharsTrie &trie,
933                                        const StringAndValue data[], int32_t dataLength) {
934    UCharsTrie::State noState, state;
935    for(int32_t i=0; i<dataLength; ++i) {
936        if((i&1)==0) {
937            // This should have no effect.
938            trie.resetToState(noState);
939        }
940        UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
941        int32_t stringLength=expectedString.length();
942        int32_t partialLength=stringLength/3;
943        for(int32_t j=0; j<partialLength; ++j) {
944            if(!USTRINGTRIE_MATCHES(trie.next(expectedString[j]))) {
945                errln("trie.next()=USTRINGTRIE_NO_MATCH for a prefix of %s", data[i].s);
946                return;
947            }
948        }
949        trie.saveState(state);
950        UStringTrieResult resultAtState=trie.current();
951        UStringTrieResult result;
952        int32_t valueAtState=-99;
953        if(USTRINGTRIE_HAS_VALUE(resultAtState)) {
954            valueAtState=trie.getValue();
955        }
956        result=trie.next(0);  // mismatch
957        if(result!=USTRINGTRIE_NO_MATCH || result!=trie.current()) {
958            errln("trie.next(0) matched after part of %s", data[i].s);
959        }
960        if( resultAtState!=trie.resetToState(state).current() ||
961            (USTRINGTRIE_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
962        ) {
963            errln("trie.next(part of %s) changes current()/getValue() after "
964                  "saveState/next(0)/resetToState",
965                  data[i].s);
966        } else if(!USTRINGTRIE_HAS_VALUE(
967                      result=trie.next(expectedString.getTerminatedBuffer()+partialLength,
968                                       stringLength-partialLength)) ||
969                  result!=trie.current()) {
970            errln("trie.next(rest of %s) does not seem to contain %s after "
971                  "saveState/next(0)/resetToState",
972                  data[i].s, data[i].s);
973        } else if(!USTRINGTRIE_HAS_VALUE(
974                      result=trie.resetToState(state).
975                                  next(expectedString.getTerminatedBuffer()+partialLength,
976                                       stringLength-partialLength)) ||
977                  result!=trie.current()) {
978            errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
979                  data[i].s);
980        } else if(trie.getValue()!=data[i].value) {
981            errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
982                  data[i].s,
983                  (long)trie.getValue(), (long)trie.getValue(),
984                  (long)data[i].value, (long)data[i].value);
985        }
986        trie.reset();
987    }
988}
989
990// next(string) is also tested in other functions,
991// but here we try to go partway through the string, and then beyond it.
992void UCharsTrieTest::checkNextString(UCharsTrie &trie,
993                                     const StringAndValue data[], int32_t dataLength) {
994    for(int32_t i=0; i<dataLength; ++i) {
995        UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
996        int32_t stringLength=expectedString.length();
997        if(!trie.next(expectedString.getTerminatedBuffer(), stringLength/2)) {
998            errln("trie.next(up to middle of string)=USTRINGTRIE_NO_MATCH for %s", data[i].s);
999            continue;
1000        }
1001        // Test that we stop properly at the end of the string.
1002        if(trie.next(expectedString.getTerminatedBuffer()+stringLength/2,
1003                     stringLength+1-stringLength/2)) {
1004            errln("trie.next(string+NUL)!=USTRINGTRIE_NO_MATCH for %s", data[i].s);
1005        }
1006        trie.reset();
1007    }
1008}
1009
1010void UCharsTrieTest::checkIterator(UCharsTrie &trie,
1011                                   const StringAndValue data[], int32_t dataLength) {
1012    IcuTestErrorCode errorCode(*this, "checkIterator()");
1013    UCharsTrie::Iterator iter(trie, 0, errorCode);
1014    if(errorCode.logIfFailureAndReset("UCharsTrie::Iterator(trieUChars) constructor")) {
1015        return;
1016    }
1017    checkIterator(iter, data, dataLength);
1018}
1019
1020void UCharsTrieTest::checkIterator(UCharsTrie::Iterator &iter,
1021                                   const StringAndValue data[], int32_t dataLength) {
1022    IcuTestErrorCode errorCode(*this, "checkIterator()");
1023    for(int32_t i=0; i<dataLength; ++i) {
1024        if(!iter.hasNext()) {
1025            errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
1026            break;
1027        }
1028        UBool hasNext=iter.next(errorCode);
1029        if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
1030            break;
1031        }
1032        if(!hasNext) {
1033            errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
1034            break;
1035        }
1036        UnicodeString expectedString=UnicodeString(data[i].s, -1, US_INV).unescape();
1037        if(iter.getString()!=expectedString) {
1038            char buffer[1000];
1039            UnicodeString invString(prettify(iter.getString()));
1040            invString.extract(0, invString.length(), buffer, UPRV_LENGTHOF(buffer), US_INV);
1041            errln("trie iterator next().getString()=%s but expected %s for item %d",
1042                  buffer, data[i].s, (int)i);
1043        }
1044        if(iter.getValue()!=data[i].value) {
1045            errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
1046                  (long)iter.getValue(), (long)iter.getValue(),
1047                  (long)data[i].value, (long)data[i].value,
1048                  (int)i, data[i].s);
1049        }
1050    }
1051    if(iter.hasNext()) {
1052        errln("trie iterator hasNext()=TRUE after all items");
1053    }
1054    UBool hasNext=iter.next(errorCode);
1055    errorCode.logIfFailureAndReset("trie iterator next() after all items");
1056    if(hasNext) {
1057        errln("trie iterator next()=TRUE after all items");
1058    }
1059}
1060