1fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/*
2fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2012-2014, International Business Machines
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others.  All Rights Reserved.
5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationtest.cpp
7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*
8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2012apr27
9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer
10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/
11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h"
13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION
15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/coll.h"
17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/errorcode.h"
18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/localpointer.h"
19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/normalizer2.h"
20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/sortkey.h"
21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/std_string.h"
22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/strenum.h"
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/tblcoll.h"
24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uiter.h"
25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uniset.h"
26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/unistr.h"
27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/usetiter.h"
28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/ustring.h"
29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "charstr.h"
30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cmemory.h"
31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h"
32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h"
33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationfcd.h"
34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationiterator.h"
35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationroot.h"
36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationrootelements.h"
37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationruleparser.h"
38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationweights.h"
39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cstring.h"
40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "intltest.h"
41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "normalizer2impl.h"
42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "ucbuf.h"
43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uhash.h"
44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uitercollationiterator.h"
45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "utf16collationiterator.h"
46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "utf8collationiterator.h"
47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr32.h"
48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr64.h"
49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "writesrc.h"
50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius// TODO: Move to ucbuf.h
54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_DEFINE_LOCAL_OPEN_POINTER(LocalUCHARBUFPointer, UCHARBUF, ucbuf_close);
55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass CodePointIterator;
57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius// TODO: try to share code with IntlTestCollator; for example, prettify(CollationKey)
59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass CollationTest : public IntlTest {
61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic:
62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationTest()
63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            : fcd(NULL), nfd(NULL),
64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              fileLineNumber(0),
65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              coll(NULL) {}
66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ~CollationTest() {
68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        delete coll;
69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void TestMinMax();
74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void TestImplicits();
75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void TestNulTerminated();
76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void TestIllegalUTF8();
77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void TestShortFCDData();
78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void TestFCD();
79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void TestCollationWeights();
80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void TestRootElements();
81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void TestTailoredElements();
82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void TestDataDriven();
83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate:
85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void checkFCD(const char *name, CollationIterator &ci, CodePointIterator &cpi);
86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void checkAllocWeights(CollationWeights &cw,
87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                           uint32_t lowerLimit, uint32_t upperLimit, int32_t n,
88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                           int32_t someLength, int32_t minCount);
89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static UnicodeString printSortKey(const uint8_t *p, int32_t length);
91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static UnicodeString printCollationKey(const CollationKey &key);
92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Helpers & fields for data-driven test.
94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static UBool isCROrLF(UChar c) { return c == 0xa || c == 0xd; }
95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static UBool isSpace(UChar c) { return c == 9 || c == 0x20 || c == 0x3000; }
96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static UBool isSectionStarter(UChar c) { return c == 0x25 || c == 0x2a || c == 0x40; }  // %*@
97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t skipSpaces(int32_t i) {
98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        while(isSpace(fileLine[i])) { ++i; }
99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return i;
100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool readLine(UCHARBUF *f, IcuTestErrorCode &errorCode);
103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void parseString(int32_t &start, UnicodeString &prefix, UnicodeString &s, UErrorCode &errorCode);
104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    Collation::Level parseRelationAndString(UnicodeString &s, IcuTestErrorCode &errorCode);
105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void parseAndSetAttribute(IcuTestErrorCode &errorCode);
106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void parseAndSetReorderCodes(int32_t start, IcuTestErrorCode &errorCode);
107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void buildTailoring(UCHARBUF *f, IcuTestErrorCode &errorCode);
108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void setRootCollator(IcuTestErrorCode &errorCode);
109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void setLocaleCollator(IcuTestErrorCode &errorCode);
110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool needsNormalization(const UnicodeString &s, UErrorCode &errorCode) const;
112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool getSortKeyParts(const UChar *s, int32_t length,
114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                          CharString &dest, int32_t partSize,
115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                          IcuTestErrorCode &errorCode);
116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool getCollationKey(const char *norm, const UnicodeString &line,
117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                          const UChar *s, int32_t length,
118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                          CollationKey &key, IcuTestErrorCode &errorCode);
119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool checkCompareTwo(const char *norm, const UnicodeString &prevFileLine,
120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                          const UnicodeString &prevString, const UnicodeString &s,
121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                          UCollationResult expectedOrder, Collation::Level expectedLevel,
122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                          IcuTestErrorCode &errorCode);
123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void checkCompareStrings(UCHARBUF *f, IcuTestErrorCode &errorCode);
124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const Normalizer2 *fcd, *nfd;
126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString fileLine;
127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t fileLineNumber;
128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString fileTestName;
129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    Collator *coll;
130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusextern IntlTest *createCollationTest() {
133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return new CollationTest();
134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(exec) {
138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        logln("TestSuite CollationTest: ");
139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    TESTCASE_AUTO_BEGIN;
141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    TESTCASE_AUTO(TestMinMax);
142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    TESTCASE_AUTO(TestImplicits);
143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    TESTCASE_AUTO(TestNulTerminated);
144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    TESTCASE_AUTO(TestIllegalUTF8);
145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    TESTCASE_AUTO(TestShortFCDData);
146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    TESTCASE_AUTO(TestFCD);
147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    TESTCASE_AUTO(TestCollationWeights);
148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    TESTCASE_AUTO(TestRootElements);
149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    TESTCASE_AUTO(TestTailoredElements);
150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    TESTCASE_AUTO(TestDataDriven);
151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    TESTCASE_AUTO_END;
152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestMinMax() {
155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    IcuTestErrorCode errorCode(*this, "TestMinMax");
156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    setRootCollator(errorCode);
158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.isFailure()) {
159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode.reset();
160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(coll);
163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(rbc == NULL) {
164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("the root collator is not a RuleBasedCollator");
165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const UChar s[2] = { 0xfffe, 0xffff };
169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UVector64 ces(errorCode);
170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    rbc->internalGetCEs(UnicodeString(FALSE, s, 2), ces, errorCode);
171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    errorCode.assertSuccess();
172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(ces.size() != 2) {
173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("expected 2 CEs for <FFFE, FFFF>, got %d", (int)ces.size());
174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int64_t ce = ces.elementAti(0);
177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int64_t expected =
178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ((int64_t)Collation::MERGE_SEPARATOR_PRIMARY << 32) |
179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        Collation::MERGE_SEPARATOR_LOWER32;
180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(ce != expected) {
181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("CE(U+fffe)=%04lx != 02.02.02", (long)ce);
182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ce = ces.elementAti(1);
185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    expected = Collation::makeCE(Collation::MAX_PRIMARY);
186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(ce != expected) {
187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("CE(U+ffff)=%04lx != max..", (long)ce);
188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestImplicits() {
192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    IcuTestErrorCode errorCode(*this, "TestImplicits");
193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const CollationData *cd = CollationRoot::getData(errorCode);
195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.logDataIfFailureAndReset("CollationRoot::getBaseData()")) {
196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Implicit primary weights should be assigned for the following sets,
200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // and sort in ascending order by set and then code point.
201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // See http://www.unicode.org/reports/tr10/#Implicit_Weights
202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // core Han Unified Ideographs
203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeSet coreHan("[\\p{unified_ideograph}&"
204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            "[\\p{Block=CJK_Unified_Ideographs}"
205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            "\\p{Block=CJK_Compatibility_Ideographs}]]",
206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                       errorCode);
207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // all other Unified Han ideographs
208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeSet otherHan("[\\p{unified ideograph}-"
209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            "[\\p{Block=CJK_Unified_Ideographs}"
210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            "\\p{Block=CJK_Compatibility_Ideographs}]]",
211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        errorCode);
212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeSet unassigned("[[:Cn:][:Cs:][:Co:]]", errorCode);
213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    unassigned.remove(0xfffe, 0xffff);  // These have special CLDR root mappings.
214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.logIfFailureAndReset("UnicodeSet")) {
215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UnicodeSet *sets[] = { &coreHan, &otherHan, &unassigned };
218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UChar32 prev = 0;
219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t prevPrimary = 0;
220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UTF16CollationIterator ci(cd, FALSE, NULL, NULL, NULL);
221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(int32_t i = 0; i < LENGTHOF(sets); ++i) {
222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        LocalPointer<UnicodeSetIterator> iter(new UnicodeSetIterator(*sets[i]));
223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        while(iter->next()) {
224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UChar32 c = iter->getCodepoint();
225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UnicodeString s(c);
226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ci.setText(s.getBuffer(), s.getBuffer() + s.length());
227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            int64_t ce = ci.nextCE(errorCode);
228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            int64_t ce2 = ci.nextCE(errorCode);
229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(errorCode.logIfFailureAndReset("CollationIterator.nextCE()")) {
230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(ce == Collation::NO_CE || ce2 != Collation::NO_CE) {
233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                errln("CollationIterator.nextCE(U+%04lx) did not yield exactly one CE", (long)c);
234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                continue;
235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if((ce & 0xffffffff) != Collation::COMMON_SEC_AND_TER_CE) {
237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                errln("CollationIterator.nextCE(U+%04lx) has non-common sec/ter weights: %08lx",
238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                      (long)c, (long)(ce & 0xffffffff));
239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                continue;
240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uint32_t primary = (uint32_t)(ce >> 32);
242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(!(primary > prevPrimary)) {
243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                errln("CE(U+%04lx)=%04lx.. not greater than CE(U+%04lx)=%04lx..",
244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                      (long)c, (long)primary, (long)prev, (long)prevPrimary);
245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            prev = c;
247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            prevPrimary = primary;
248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestNulTerminated() {
253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    IcuTestErrorCode errorCode(*this, "TestNulTerminated");
254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const CollationData *data = CollationRoot::getData(errorCode);
255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.logDataIfFailureAndReset("CollationRoot::getData()")) {
256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const UChar s[] = { 0x61, 0x62, 0x61, 0x62, 0 };
260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UTF16CollationIterator ci1(data, FALSE, s, s, s + 2);
262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UTF16CollationIterator ci2(data, FALSE, s + 2, s + 2, NULL);
263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(int32_t i = 0;; ++i) {
264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int64_t ce1 = ci1.nextCE(errorCode);
265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int64_t ce2 = ci2.nextCE(errorCode);
266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(errorCode.logIfFailureAndReset("CollationIterator.nextCE()")) {
267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(ce1 != ce2) {
270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("CollationIterator.nextCE(with length) != nextCE(NUL-terminated) at CE %d", (int)i);
271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(ce1 == Collation::NO_CE) { break; }
274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
275fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
276fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestIllegalUTF8() {
278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    IcuTestErrorCode errorCode(*this, "TestIllegalUTF8");
279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    setRootCollator(errorCode);
281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.isFailure()) {
282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode.reset();
283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, errorCode);
286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const char *strings[] = {
288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // U+FFFD
289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        "a\xef\xbf\xbdz",
290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // illegal byte sequences
291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        "a\x80z",  // trail byte
292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        "a\xc1\x81z",  // non-shortest form
293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        "a\xe0\x82\x83z",  // non-shortest form
294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        "a\xed\xa0\x80z",  // lead surrogate: would be U+D800
295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        "a\xed\xbf\xbfz",  // trail surrogate: would be U+DFFF
296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        "a\xf0\x8f\xbf\xbfz",  // non-shortest form
297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        "a\xf4\x90\x80\x80z"  // out of range: would be U+110000
298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    };
299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    StringPiece fffd(strings[0]);
301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(int32_t i = 1; i < LENGTHOF(strings); ++i) {
302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        StringPiece illegal(strings[i]);
303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UCollationResult order = coll->compareUTF8(fffd, illegal, errorCode);
304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(order != UCOL_EQUAL) {
305fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("compareUTF8(U+FFFD, string %d with illegal UTF-8)=%d != UCOL_EQUAL",
306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  (int)i, order);
307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace {
312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid addLeadSurrogatesForSupplementary(const UnicodeSet &src, UnicodeSet &dest) {
314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(UChar32 c = 0x10000; c < 0x110000;) {
315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 next = c + 0x400;
316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(src.containsSome(c, next - 1)) {
317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            dest.add(U16_LEAD(c));
318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        c = next;
320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
321fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
322fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}  // namespace
324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
325fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestShortFCDData() {
326fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // See CollationFCD class comments.
327fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    IcuTestErrorCode errorCode(*this, "TestShortFCDData");
328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeSet expectedLccc("[:^lccc=0:]", errorCode);
329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    errorCode.assertSuccess();
330fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    expectedLccc.add(0xdc00, 0xdfff);  // add all trail surrogates
331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    addLeadSurrogatesForSupplementary(expectedLccc, expectedLccc);
332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeSet lccc;  // actual
333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(UChar32 c = 0; c <= 0xffff; ++c) {
334fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(CollationFCD::hasLccc(c)) { lccc.add(c); }
335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeSet diff(expectedLccc);
337fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    diff.removeAll(lccc);
338fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    diff.remove(0x10000, 0x10ffff);  // hasLccc() only works for the BMP
339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString empty("[]");
340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString diffString;
341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    diff.toPattern(diffString, TRUE);
342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    assertEquals("CollationFCD::hasLccc() expected-actual", empty, diffString);
343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    diff = lccc;
344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    diff.removeAll(expectedLccc);
345fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    diff.toPattern(diffString, TRUE);
346fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    assertEquals("CollationFCD::hasLccc() actual-expected", empty, diffString, TRUE);
347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeSet expectedTccc("[:^tccc=0:]", errorCode);
349fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (errorCode.isSuccess()) {
350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        addLeadSurrogatesForSupplementary(expectedLccc, expectedTccc);
351fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        addLeadSurrogatesForSupplementary(expectedTccc, expectedTccc);
352fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UnicodeSet tccc;  // actual
353fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        for(UChar32 c = 0; c <= 0xffff; ++c) {
354fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(CollationFCD::hasTccc(c)) { tccc.add(c); }
355fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
356fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        diff = expectedTccc;
357fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        diff.removeAll(tccc);
358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        diff.remove(0x10000, 0x10ffff);  // hasTccc() only works for the BMP
359fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        assertEquals("CollationFCD::hasTccc() expected-actual", empty, diffString);
360fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        diff = tccc;
361fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        diff.removeAll(expectedTccc);
362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        diff.toPattern(diffString, TRUE);
363fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        assertEquals("CollationFCD::hasTccc() actual-expected", empty, diffString);
364fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
365fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
367fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass CodePointIterator {
368fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic:
369fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CodePointIterator(const UChar32 *cp, int32_t length) : cp(cp), length(length), pos(0) {}
370fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void resetToStart() { pos = 0; }
371fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UChar32 next() { return (pos < length) ? cp[pos++] : U_SENTINEL; }
372fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UChar32 previous() { return (pos > 0) ? cp[--pos] : U_SENTINEL; }
373fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t getLength() const { return length; }
374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int getIndex() const { return (int)pos; }
375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate:
376fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UChar32 *cp;
377fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t length;
378fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t pos;
379fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
380fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
381fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::checkFCD(const char *name,
382fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                             CollationIterator &ci, CodePointIterator &cpi) {
383fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    IcuTestErrorCode errorCode(*this, "checkFCD");
384fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
385fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Iterate forward to the limit.
386fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(;;) {
387fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 c1 = ci.nextCodePoint(errorCode);
388fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 c2 = cpi.next();
389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(c1 != c2) {
390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("%s.nextCodePoint(to limit, 1st pass) = U+%04lx != U+%04lx at %d",
391fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  name, (long)c1, (long)c2, cpi.getIndex());
392fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
393fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
394fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(c1 < 0) { break; }
395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
396fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Iterate backward most of the way.
398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(int32_t n = (cpi.getLength() * 2) / 3; n > 0; --n) {
399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 c1 = ci.previousCodePoint(errorCode);
400fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 c2 = cpi.previous();
401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(c1 != c2) {
402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("%s.previousCodePoint() = U+%04lx != U+%04lx at %d",
403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  name, (long)c1, (long)c2, cpi.getIndex());
404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
406fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
407fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
408fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Forward again.
409fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(;;) {
410fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 c1 = ci.nextCodePoint(errorCode);
411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 c2 = cpi.next();
412fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(c1 != c2) {
413fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("%s.nextCodePoint(to limit again) = U+%04lx != U+%04lx at %d",
414fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  name, (long)c1, (long)c2, cpi.getIndex());
415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
416fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
417fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(c1 < 0) { break; }
418fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
419fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Iterate backward to the start.
421fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(;;) {
422fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 c1 = ci.previousCodePoint(errorCode);
423fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 c2 = cpi.previous();
424fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(c1 != c2) {
425fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("%s.previousCodePoint(to start) = U+%04lx != U+%04lx at %d",
426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  name, (long)c1, (long)c2, cpi.getIndex());
427fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
429fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(c1 < 0) { break; }
430fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
431fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
432fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
433fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestFCD() {
434fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    IcuTestErrorCode errorCode(*this, "TestFCD");
435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const CollationData *data = CollationRoot::getData(errorCode);
436fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.logDataIfFailureAndReset("CollationRoot::getData()")) {
437fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
438fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
439fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
440fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Input string, not FCD, NUL-terminated.
441fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const UChar s[] = {
442fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        0x308, 0xe1, 0x62, 0x301, 0x327, 0x430, 0x62,
443fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U16_LEAD(0x1D15F), U16_TRAIL(0x1D15F),  // MUSICAL SYMBOL QUARTER NOTE=1D158 1D165, ccc=0, 216
444fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        0x327, 0x308,  // ccc=202, 230
445fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D),  // MUSICAL SYMBOL COMBINING AUGMENTATION DOT, ccc=226
446fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U16_LEAD(0x1D15F), U16_TRAIL(0x1D15F),
447fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D),
448fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        0xac01,
449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        0xe7,  // Character with tccc!=0 decomposed together with mis-ordered sequence.
450fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D), U16_LEAD(0x1D165), U16_TRAIL(0x1D165),
451fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        0xe1,  // Character with tccc!=0 decomposed together with decomposed sequence.
452fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        0xf73, 0xf75,  // Tibetan composite vowels must be decomposed.
453fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        0x4e00, 0xf81,
454fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        0
455fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    };
456fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Expected code points.
457fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const UChar32 cp[] = {
458fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        0x308, 0xe1, 0x62, 0x327, 0x301, 0x430, 0x62,
459fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        0x1D158, 0x327, 0x1D165, 0x1D16D, 0x308,
460fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        0x1D15F, 0x1D16D,
461fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        0xac01,
462fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        0x63, 0x327, 0x1D165, 0x1D16D,
463fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        0x61,
464fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        0xf71, 0xf71, 0xf72, 0xf74, 0x301,
465fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        0x4e00, 0xf71, 0xf80
466fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    };
467fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
468fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    FCDUTF16CollationIterator u16ci(data, FALSE, s, s, NULL);
469fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.logIfFailureAndReset("FCDUTF16CollationIterator constructor")) {
470fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
471fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
472fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CodePointIterator cpi(cp, LENGTHOF(cp));
473fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkFCD("FCDUTF16CollationIterator", u16ci, cpi);
474fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
475fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if U_HAVE_STD_STRING
476fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    cpi.resetToStart();
477fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    std::string utf8;
478fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString(s).toUTF8String(utf8);
479fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    FCDUTF8CollationIterator u8ci(data, FALSE,
480fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                  reinterpret_cast<const uint8_t *>(utf8.c_str()), 0, -1);
481fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.logIfFailureAndReset("FCDUTF8CollationIterator constructor")) {
482fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
483fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
484fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkFCD("FCDUTF8CollationIterator", u8ci, cpi);
485fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif
486fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
487fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    cpi.resetToStart();
488fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UCharIterator iter;
489fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uiter_setString(&iter, s, LENGTHOF(s) - 1);  // -1: without the terminating NUL
490fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    FCDUIterCollationIterator uici(data, FALSE, iter, 0);
491fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.logIfFailureAndReset("FCDUIterCollationIterator constructor")) {
492fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
493fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
494fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkFCD("FCDUIterCollationIterator", uici, cpi);
495fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
496fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
497fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::checkAllocWeights(CollationWeights &cw,
498fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                      uint32_t lowerLimit, uint32_t upperLimit, int32_t n,
499fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                      int32_t someLength, int32_t minCount) {
500fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(!cw.allocWeights(lowerLimit, upperLimit, n)) {
501fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("CollationWeights::allocWeights(%lx, %lx, %ld) = FALSE",
502fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              (long)lowerLimit, (long)upperLimit, (long)n);
503fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
504fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
505fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t previous = lowerLimit;
506fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t count = 0;  // number of weights that have someLength
507fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(int32_t i = 0; i < n; ++i) {
508fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t w = cw.nextWeight();
509fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(w == 0xffffffff) {
510fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() "
511fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  "returns only %ld weights",
512fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  (long)lowerLimit, (long)upperLimit, (long)n, (long)i);
513fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
514fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
515fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!(previous < w && w < upperLimit)) {
516fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() "
517fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  "number %ld -> %lx not between %lx and %lx",
518fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  (long)lowerLimit, (long)upperLimit, (long)n,
519fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  (long)(i + 1), (long)w, (long)previous, (long)upperLimit);
520fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
521fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
522fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(CollationWeights::lengthOfWeight(w) == someLength) { ++count; }
523fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
524fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(count < minCount) {
525fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() "
526fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              "returns only %ld < %ld weights of length %d",
527fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              (long)lowerLimit, (long)upperLimit, (long)n,
528fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              (long)count, (long)minCount, (int)someLength);
529fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
530fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
531fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
532fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestCollationWeights() {
533fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationWeights cw;
534fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
535fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Non-compressible primaries use 254 second bytes 02..FF.
536fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    logln("CollationWeights.initForPrimary(non-compressible)");
537fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    cw.initForPrimary(FALSE);
538fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Expect 1 weight 11 and 254 weights 12xx.
539fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkAllocWeights(cw, 0x10000000, 0x13000000, 255, 1, 1);
540fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkAllocWeights(cw, 0x10000000, 0x13000000, 255, 2, 254);
541fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Expect 255 two-byte weights from the ranges 10ff, 11xx, 1202.
542fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkAllocWeights(cw, 0x10fefe40, 0x12030300, 260, 2, 255);
543fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Expect 254 two-byte weights from the ranges 10ff and 11xx.
544fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkAllocWeights(cw, 0x10fefe40, 0x12030300, 600, 2, 254);
545fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Expect 254^2=64516 three-byte weights.
546fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // During computation, there should be 3 three-byte ranges
547fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // 10ffff, 11xxxx, 120202.
548fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // The middle one should be split 64515:1,
549fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // and the newly-split-off range and the last ranged lengthened.
550fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkAllocWeights(cw, 0x10fffe00, 0x12020300, 1 + 64516 + 254 + 1, 3, 64516);
551fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Expect weights 1102 & 1103.
552fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkAllocWeights(cw, 0x10ff0000, 0x11040000, 2, 2, 2);
553fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Expect weights 102102 & 102103.
554fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkAllocWeights(cw, 0x1020ff00, 0x10210400, 2, 3, 2);
555fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
556fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Compressible primaries use 251 second bytes 04..FE.
557fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    logln("CollationWeights.initForPrimary(compressible)");
558fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    cw.initForPrimary(TRUE);
559fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Expect 1 weight 11 and 251 weights 12xx.
560fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkAllocWeights(cw, 0x10000000, 0x13000000, 252, 1, 1);
561fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkAllocWeights(cw, 0x10000000, 0x13000000, 252, 2, 251);
562fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Expect 252 two-byte weights from the ranges 10fe, 11xx, 1204.
563fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkAllocWeights(cw, 0x10fdfe40, 0x12050300, 260, 2, 252);
564fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Expect weights 1104 & 1105.
565fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkAllocWeights(cw, 0x10fe0000, 0x11060000, 2, 2, 2);
566fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Expect weights 102102 & 102103.
567fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkAllocWeights(cw, 0x1020ff00, 0x10210400, 2, 3, 2);
568fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
569fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Secondary and tertiary weights use only bytes 3 & 4.
570fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    logln("CollationWeights.initForSecondary()");
571fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    cw.initForSecondary();
572fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Expect weights fbxx and all four fc..ff.
573fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkAllocWeights(cw, 0xfb20, 0x10000, 20, 3, 4);
574fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
575fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    logln("CollationWeights.initForTertiary()");
576fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    cw.initForTertiary();
577fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Expect weights 3dxx and both 3e & 3f.
578fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkAllocWeights(cw, 0x3d02, 0x4000, 10, 3, 2);
579fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
580fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
581fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace {
582fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
583fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool isValidCE(const CollationRootElements &re, const CollationData &data,
584fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                uint32_t p, uint32_t s, uint32_t ctq) {
585fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t p1 = p >> 24;
586fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t p2 = (p >> 16) & 0xff;
587fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t p3 = (p >> 8) & 0xff;
588fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t p4 = p & 0xff;
589fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t s1 = s >> 8;
590fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t s2 = s & 0xff;
591fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // ctq = Case, Tertiary, Quaternary
592fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t c = (ctq & Collation::CASE_MASK) >> 14;
593fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t t = ctq & Collation::ONLY_TERTIARY_MASK;
594fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t t1 = t >> 8;
595fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t t2 = t & 0xff;
596fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t q = ctq & Collation::QUATERNARY_MASK;
597fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // No leading zero bytes.
598fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if((p != 0 && p1 == 0) || (s != 0 && s1 == 0) || (t != 0 && t1 == 0)) {
599fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
600fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
601fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // No intermediate zero bytes.
602fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(p1 != 0 && p2 == 0 && (p & 0xffff) != 0) {
603fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
604fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
605fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(p2 != 0 && p3 == 0 && p4 != 0) {
606fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
607fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
608fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Minimum & maximum lead bytes.
609fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if((p1 != 0 && p1 <= Collation::MERGE_SEPARATOR_BYTE) ||
610fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            (s1 != 0 && s1 <= Collation::MERGE_SEPARATOR_BYTE) ||
611fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            (t1 != 0 && t1 <= Collation::MERGE_SEPARATOR_BYTE)) {
612fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
613fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
614fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(t1 != 0 && t1 > 0x3f) {
615fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
616fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
617fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(c > 2) {
618fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
619fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
620fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // The valid byte range for the second primary byte depends on compressibility.
621fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(p2 != 0) {
622fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data.isCompressibleLeadByte(p1)) {
623fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(p2 <= Collation::PRIMARY_COMPRESSION_LOW_BYTE ||
624fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    Collation::PRIMARY_COMPRESSION_HIGH_BYTE <= p2) {
625fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return FALSE;
626fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
627fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
628fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(p2 <= Collation::LEVEL_SEPARATOR_BYTE) {
629fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return FALSE;
630fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
631fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
632fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
633fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Other bytes just need to avoid the level separator.
634fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Trailing zeros are ok.
635fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    U_ASSERT(Collation::LEVEL_SEPARATOR_BYTE == 1);
636fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(p3 == Collation::LEVEL_SEPARATOR_BYTE || p4 == Collation::LEVEL_SEPARATOR_BYTE ||
637fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            s2 == Collation::LEVEL_SEPARATOR_BYTE || t2 == Collation::LEVEL_SEPARATOR_BYTE) {
638fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
639fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
640fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Well-formed CEs.
641fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(p == 0) {
642fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(s == 0) {
643fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(t == 0) {
644fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Completely ignorable CE.
645fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Quaternary CEs are not supported.
646fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(c != 0 || q != 0) {
647fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    return FALSE;
648fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
649fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
650fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Tertiary CE.
651fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(t < re.getTertiaryBoundary() || c != 2) {
652fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    return FALSE;
653fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
654fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
655fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
656fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Secondary CE.
657fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(s < re.getSecondaryBoundary() || t == 0 || t >= re.getTertiaryBoundary()) {
658fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return FALSE;
659fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
660fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
661fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
662fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Primary CE.
663fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(s == 0 || (Collation::COMMON_WEIGHT16 < s && s <= re.getLastCommonSecondary()) ||
664fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                s >= re.getSecondaryBoundary()) {
665fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return FALSE;
666fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
667fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(t == 0 || t >= re.getTertiaryBoundary()) {
668fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return FALSE;
669fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
670fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
671fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return TRUE;
672fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
673fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
674fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool isValidCE(const CollationRootElements &re, const CollationData &data, int64_t ce) {
675fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t p = (uint32_t)(ce >> 32);
676fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t secTer = (uint32_t)ce;
677fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return isValidCE(re, data, p, secTer >> 16, secTer & 0xffff);
678fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
679fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
680fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass RootElementsIterator {
681fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic:
682fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    RootElementsIterator(const CollationData &root)
683fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            : data(root),
684fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              elements(root.rootElements), length(root.rootElementsLength),
685fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              pri(0), secTer(0),
686fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              index((int32_t)elements[CollationRootElements::IX_FIRST_TERTIARY_INDEX]) {}
687fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
688fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool next() {
689fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(index >= length) { return FALSE; }
690fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t p = elements[index];
691fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(p == CollationRootElements::PRIMARY_SENTINEL) { return FALSE; }
692fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if((p & CollationRootElements::SEC_TER_DELTA_FLAG) != 0) {
693fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ++index;
694fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            secTer = p & ~CollationRootElements::SEC_TER_DELTA_FLAG;
695fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return TRUE;
696fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
697fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if((p & CollationRootElements::PRIMARY_STEP_MASK) != 0) {
698fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // End of a range, enumerate the primaries in the range.
699fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            int32_t step = (int32_t)p & CollationRootElements::PRIMARY_STEP_MASK;
700fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            p &= 0xffffff00;
701fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(pri == p) {
702fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Finished the range, return the next CE after it.
703fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ++index;
704fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return next();
705fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
706fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            U_ASSERT(pri < p);
707fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Return the next primary in this range.
708fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UBool isCompressible = data.isCompressiblePrimary(pri);
709fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if((pri & 0xffff) == 0) {
710fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                pri = Collation::incTwoBytePrimaryByOffset(pri, isCompressible, step);
711fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
712fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                pri = Collation::incThreeBytePrimaryByOffset(pri, isCompressible, step);
713fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
714fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return TRUE;
715fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
716fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Simple primary CE.
717fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ++index;
718fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        pri = p;
719fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        secTer = Collation::COMMON_SEC_AND_TER_CE;
720fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return TRUE;
721fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
722fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
723fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t getPrimary() const { return pri; }
724fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t getSecTer() const { return secTer; }
725fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
726fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate:
727fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const CollationData &data;
728fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const uint32_t *elements;
729fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t length;
730fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
731fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t pri;
732fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t secTer;
733fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t index;
734fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
735fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
736fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}  // namespace
737fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
738fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestRootElements() {
739fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    IcuTestErrorCode errorCode(*this, "TestRootElements");
740fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const CollationData *root = CollationRoot::getData(errorCode);
741fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.logDataIfFailureAndReset("CollationRoot::getData()")) {
742fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
743fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
744fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationRootElements rootElements(root->rootElements, root->rootElementsLength);
745fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    RootElementsIterator iter(*root);
746fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
747fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // We check each root CE for validity,
748fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // and we also verify that there is a tailoring gap between each two CEs.
749fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationWeights cw1c;  // compressible primary weights
750fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationWeights cw1u;  // uncompressible primary weights
751fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationWeights cw2;
752fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationWeights cw3;
753fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
754fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    cw1c.initForPrimary(TRUE);
755fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    cw1u.initForPrimary(FALSE);
756fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    cw2.initForSecondary();
757fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    cw3.initForTertiary();
758fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
759fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Note: The root elements do not include Han-implicit or unassigned-implicit CEs,
760fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // nor the special merge-separator CE for U+FFFE.
761fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t prevPri = 0;
762fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t prevSec = 0;
763fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t prevTer = 0;
764fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while(iter.next()) {
765fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t pri = iter.getPrimary();
766fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t secTer = iter.getSecTer();
767fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // CollationRootElements CEs must have 0 case and quaternary bits.
768fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if((secTer & Collation::CASE_AND_QUATERNARY_MASK) != 0) {
769fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("CollationRootElements CE has non-zero case and/or quaternary bits: %08lx %08lx",
770fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  (long)pri, (long)secTer);
771fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
772fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t sec = secTer >> 16;
773fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t ter = secTer & Collation::ONLY_TERTIARY_MASK;
774fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t ctq = ter;
775fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(pri == 0 && sec == 0 && ter != 0) {
776fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Tertiary CEs must have uppercase bits,
777fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // but they are not stored in the CollationRootElements.
778fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ctq |= 0x8000;
779fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
780fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!isValidCE(rootElements, *root, pri, sec, ctq)) {
781fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("invalid root CE %08lx %08lx", (long)pri, (long)secTer);
782fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
783fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(pri != prevPri) {
784fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                uint32_t newWeight = 0;
785fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(prevPri == 0 || prevPri >= Collation::FFFD_PRIMARY) {
786fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // There is currently no tailoring gap after primary ignorables,
787fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // and we forbid tailoring after U+FFFD and U+FFFF.
788fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                } else if(root->isCompressiblePrimary(prevPri)) {
789fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(!cw1c.allocWeights(prevPri, pri, 1)) {
790fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        errln("no primary/compressible tailoring gap between %08lx and %08lx",
791fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                              (long)prevPri, (long)pri);
792fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    } else {
793fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        newWeight = cw1c.nextWeight();
794fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
795fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                } else {
796fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(!cw1u.allocWeights(prevPri, pri, 1)) {
797fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        errln("no primary/uncompressible tailoring gap between %08lx and %08lx",
798fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                              (long)prevPri, (long)pri);
799fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    } else {
800fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        newWeight = cw1u.nextWeight();
801fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
802fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
803fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(newWeight != 0 && !(prevPri < newWeight && newWeight < pri)) {
804fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    errln("mis-allocated primary weight, should get %08lx < %08lx < %08lx",
805fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                          (long)prevPri, (long)newWeight, (long)pri);
806fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
807fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if(sec != prevSec) {
808fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                uint32_t lowerLimit =
809fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    prevSec == 0 ? rootElements.getSecondaryBoundary() - 0x100 : prevSec;
810fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(!cw2.allocWeights(lowerLimit, sec, 1)) {
811fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    errln("no secondary tailoring gap between %04x and %04x", lowerLimit, sec);
812fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                } else {
813fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    uint32_t newWeight = cw2.nextWeight();
814fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(!(prevSec < newWeight && newWeight < sec)) {
815fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        errln("mis-allocated secondary weight, should get %04x < %04x < %04x",
816fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                              (long)lowerLimit, (long)newWeight, (long)sec);
817fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
818fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
819fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if(ter != prevTer) {
820fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                uint32_t lowerLimit =
821fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    prevTer == 0 ? rootElements.getTertiaryBoundary() - 0x100 : prevTer;
822fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(!cw3.allocWeights(lowerLimit, ter, 1)) {
823fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    errln("no teriary tailoring gap between %04x and %04x", lowerLimit, ter);
824fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                } else {
825fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    uint32_t newWeight = cw3.nextWeight();
826fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(!(prevTer < newWeight && newWeight < ter)) {
827fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        errln("mis-allocated secondary weight, should get %04x < %04x < %04x",
828fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                              (long)lowerLimit, (long)newWeight, (long)ter);
829fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
830fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
831fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
832fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                errln("duplicate root CE %08lx %08lx", (long)pri, (long)secTer);
833fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
834fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
835fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        prevPri = pri;
836fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        prevSec = sec;
837fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        prevTer = ter;
838fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
839fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
840fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
841fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestTailoredElements() {
842fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    IcuTestErrorCode errorCode(*this, "TestTailoredElements");
843fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const CollationData *root = CollationRoot::getData(errorCode);
844fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.logDataIfFailureAndReset("CollationRoot::getData()")) {
845fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
846fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
847fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationRootElements rootElements(root->rootElements, root->rootElementsLength);
848fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
849fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UHashtable *prevLocales = uhash_open(uhash_hashChars, uhash_compareChars, NULL, errorCode);
850fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.logIfFailureAndReset("failed to create a hash table")) {
851fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
852fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
853fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uhash_setKeyDeleter(prevLocales, uprv_free);
854fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // TestRootElements() tests the root collator which does not have tailorings.
855fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uhash_puti(prevLocales, uprv_strdup(""), 1, errorCode);
856fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uhash_puti(prevLocales, uprv_strdup("root"), 1, errorCode);
857fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uhash_puti(prevLocales, uprv_strdup("root@collation=standard"), 1, errorCode);
858fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
859fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UVector64 ces(errorCode);
860fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    LocalPointer<StringEnumeration> locales(Collator::getAvailableLocales());
861fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    U_ASSERT(locales.isValid());
862fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const char *localeID = "root";
863fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    do {
864fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        Locale locale(localeID);
865fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        LocalPointer<StringEnumeration> types(
866fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                Collator::getKeywordValuesForLocale("collation", locale, FALSE, errorCode));
867fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode.assertSuccess();
868fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const char *type = NULL;  // default type
869fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        do {
870fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            Locale localeWithType(locale);
871fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(type != NULL) {
872fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                localeWithType.setKeywordValue("collation", type, errorCode);
873fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
874fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode.assertSuccess();
875fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            LocalPointer<Collator> coll(Collator::createInstance(localeWithType, errorCode));
876fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(errorCode.logIfFailureAndReset("Collator::createInstance(%s)",
877fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                              localeWithType.getName())) {
878fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                continue;
879fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
880fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            Locale actual = coll->getLocale(ULOC_ACTUAL_LOCALE, errorCode);
881fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(uhash_geti(prevLocales, actual.getName()) != 0) {
882fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                continue;
883fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
884fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uhash_puti(prevLocales, uprv_strdup(actual.getName()), 1, errorCode);
885fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode.assertSuccess();
886fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            logln("TestTailoredElements(): requested %s -> actual %s",
887fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  localeWithType.getName(), actual.getName());
888fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(coll.getAlias());
889fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(rbc == NULL) {
890fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                continue;
891fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
892fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Note: It would be better to get tailored strings such that we can
893fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // identify the prefix, and only get the CEs for the prefix+string,
894fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // not also for the prefix.
895fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // There is currently no API for that.
896fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // It would help in an unusual case where a contraction starting in the prefix
897fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // extends past its end, and we do not see the intended mapping.
898fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // For example, for a mapping p|st, if there is also a contraction ps,
899fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // then we get CEs(ps)+CEs(t), rather than CEs(p|st).
900fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            LocalPointer<UnicodeSet> tailored(coll->getTailoredSet(errorCode));
901fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode.assertSuccess();
902fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UnicodeSetIterator iter(*tailored);
903fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            while(iter.next()) {
904fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                const UnicodeString &s = iter.getString();
905fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ces.removeAllElements();
906fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                rbc->internalGetCEs(s, ces, errorCode);
907fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                errorCode.assertSuccess();
908fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                for(int32_t i = 0; i < ces.size(); ++i) {
909fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    int64_t ce = ces.elementAti(i);
910fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(!isValidCE(rootElements, *root, ce)) {
911fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        errln("invalid tailored CE %016llx at CE index %d from string:",
912fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                              (long long)ce, (int)i);
913fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        infoln(prettify(s));
914fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
915fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
916fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
917fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } while((type = types->next(NULL, errorCode)) != NULL);
918fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } while((localeID = locales->next(NULL, errorCode)) != NULL);
919fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uhash_close(prevLocales);
920fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
921fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
922fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUnicodeString CollationTest::printSortKey(const uint8_t *p, int32_t length) {
923fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString s;
924fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(int32_t i = 0; i < length; ++i) {
925fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(i > 0) { s.append((UChar)0x20); }
926fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint8_t b = p[i];
927fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(b == 0) {
928fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            s.append((UChar)0x2e);  // period
929fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(b == 1) {
930fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            s.append((UChar)0x7c);  // vertical bar
931fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
932fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            appendHex(b, 2, s);
933fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
934fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
935fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return s;
936fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
937fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
938fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUnicodeString CollationTest::printCollationKey(const CollationKey &key) {
939fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t length;
940fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const uint8_t *p = key.getByteArray(length);
941fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return printSortKey(p, length);
942fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
943fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
944fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool CollationTest::readLine(UCHARBUF *f, IcuTestErrorCode &errorCode) {
945fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t lineLength;
946fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UChar *line = ucbuf_readline(f, &lineLength, errorCode);
947fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(line == NULL || errorCode.isFailure()) {
948fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        fileLine.remove();
949fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
950fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
951fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ++fileLineNumber;
952fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Strip trailing CR/LF, comments, and spaces.
953fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UChar *comment = u_memchr(line, 0x23, lineLength);  // '#'
954fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(comment != NULL) {
955fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        lineLength = (int32_t)(comment - line);
956fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
957fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        while(lineLength > 0 && isCROrLF(line[lineLength - 1])) { --lineLength; }
958fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
959fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while(lineLength > 0 && isSpace(line[lineLength - 1])) { --lineLength; }
960fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    fileLine.setTo(FALSE, line, lineLength);
961fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return TRUE;
962fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
963fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
964fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::parseString(int32_t &start, UnicodeString &prefix, UnicodeString &s,
965fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                UErrorCode &errorCode) {
966fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t length = fileLine.length();
967fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t i;
968fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(i = start; i < length && !isSpace(fileLine[i]); ++i) {}
969fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t pipeIndex = fileLine.indexOf((UChar)0x7c, start, i - start);  // '|'
970fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(pipeIndex >= 0) {
971fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        prefix = fileLine.tempSubStringBetween(start, pipeIndex).unescape();
972fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(prefix.isEmpty()) {
973fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("empty prefix on line %d", (int)fileLineNumber);
974fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileLine);
975fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_PARSE_ERROR;
976fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
977fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
978fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        start = pipeIndex + 1;
979fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
980fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        prefix.remove();
981fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
982fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    s = fileLine.tempSubStringBetween(start, i).unescape();
983fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(s.isEmpty()) {
984fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("empty string on line %d", (int)fileLineNumber);
985fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileLine);
986fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_PARSE_ERROR;
987fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
988fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
989fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    start = i;
990fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
991fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
992fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollation::Level CollationTest::parseRelationAndString(UnicodeString &s, IcuTestErrorCode &errorCode) {
993fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    Collation::Level relation;
994fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t start;
995fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(fileLine[0] == 0x3c) {  // <
996fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar second = fileLine[1];
997fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        start = 2;
998fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        switch(second) {
999fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case 0x31:  // <1
1000fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            relation = Collation::PRIMARY_LEVEL;
1001fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
1002fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case 0x32:  // <2
1003fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            relation = Collation::SECONDARY_LEVEL;
1004fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
1005fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case 0x33:  // <3
1006fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            relation = Collation::TERTIARY_LEVEL;
1007fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
1008fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case 0x34:  // <4
1009fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            relation = Collation::QUATERNARY_LEVEL;
1010fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
1011fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case 0x63:  // <c
1012fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            relation = Collation::CASE_LEVEL;
1013fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
1014fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case 0x69:  // <i
1015fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            relation = Collation::IDENTICAL_LEVEL;
1016fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
1017fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        default:  // just <
1018fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            relation = Collation::NO_LEVEL;
1019fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            start = 1;
1020fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
1021fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1022fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(fileLine[0] == 0x3d) {  // =
1023fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        relation = Collation::ZERO_LEVEL;
1024fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        start = 1;
1025fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
1026fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        start = 0;
1027fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1028fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(start == 0 || !isSpace(fileLine[start])) {
1029fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("no relation (= < <1 <2 <c <3 <4 <i) at beginning of line %d", (int)fileLineNumber);
1030fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileLine);
1031fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode.set(U_PARSE_ERROR);
1032fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return Collation::NO_LEVEL;
1033fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1034fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    start = skipSpaces(start);
1035fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString prefix;
1036fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    parseString(start, prefix, s, errorCode);
1037fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.isSuccess() && !prefix.isEmpty()) {
1038fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("prefix string not allowed for test string: on line %d", (int)fileLineNumber);
1039fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileLine);
1040fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode.set(U_PARSE_ERROR);
1041fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return Collation::NO_LEVEL;
1042fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1043fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(start < fileLine.length()) {
1044fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("unexpected line contents after test string on line %d", (int)fileLineNumber);
1045fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileLine);
1046fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode.set(U_PARSE_ERROR);
1047fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return Collation::NO_LEVEL;
1048fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1049fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return relation;
1050fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
1051fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1052fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic const struct {
1053fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const char *name;
1054fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UColAttribute attr;
1055fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} attributes[] = {
1056fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "backwards", UCOL_FRENCH_COLLATION },
1057fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "alternate", UCOL_ALTERNATE_HANDLING },
1058fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "caseFirst", UCOL_CASE_FIRST },
1059fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "caseLevel", UCOL_CASE_LEVEL },
1060fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // UCOL_NORMALIZATION_MODE is turned on and off automatically.
1061fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "strength", UCOL_STRENGTH },
1062fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // UCOL_HIRAGANA_QUATERNARY_MODE is deprecated.
1063fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "numeric", UCOL_NUMERIC_COLLATION }
1064fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
1065fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1066fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic const struct {
1067fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const char *name;
1068fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UColAttributeValue value;
1069fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} attributeValues[] = {
1070fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "default", UCOL_DEFAULT },
1071fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "primary", UCOL_PRIMARY },
1072fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "secondary", UCOL_SECONDARY },
1073fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "tertiary", UCOL_TERTIARY },
1074fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "quaternary", UCOL_QUATERNARY },
1075fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "identical", UCOL_IDENTICAL },
1076fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "off", UCOL_OFF },
1077fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "on", UCOL_ON },
1078fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "shifted", UCOL_SHIFTED },
1079fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "non-ignorable", UCOL_NON_IGNORABLE },
1080fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "lower", UCOL_LOWER_FIRST },
1081fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { "upper", UCOL_UPPER_FIRST }
1082fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
1083fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1084fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::parseAndSetAttribute(IcuTestErrorCode &errorCode) {
1085fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t start = skipSpaces(1);
1086fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t equalPos = fileLine.indexOf(0x3d);
1087fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(equalPos < 0) {
1088fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(fileLine.compare(start, 7, UNICODE_STRING("reorder", 7)) == 0) {
1089fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            parseAndSetReorderCodes(start + 7, errorCode);
1090fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
1091fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1092fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("missing '=' on line %d", (int)fileLineNumber);
1093fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileLine);
1094fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode.set(U_PARSE_ERROR);
1095fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
1096fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1097fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1098fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString attrString = fileLine.tempSubStringBetween(start, equalPos);
1099fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString valueString = fileLine.tempSubString(equalPos+1);
1100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(attrString == UNICODE_STRING("maxVariable", 11)) {
1101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UColReorderCode max;
1102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(valueString == UNICODE_STRING("space", 5)) {
1103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            max = UCOL_REORDER_CODE_SPACE;
1104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(valueString == UNICODE_STRING("punct", 5)) {
1105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            max = UCOL_REORDER_CODE_PUNCTUATION;
1106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(valueString == UNICODE_STRING("symbol", 6)) {
1107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            max = UCOL_REORDER_CODE_SYMBOL;
1108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(valueString == UNICODE_STRING("currency", 8)) {
1109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            max = UCOL_REORDER_CODE_CURRENCY;
1110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
1111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("invalid attribute value name on line %d", (int)fileLineNumber);
1112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileLine);
1113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode.set(U_PARSE_ERROR);
1114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
1115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        coll->setMaxVariable(max, errorCode);
1117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(errorCode.isFailure()) {
1118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("setMaxVariable() failed on line %d: %s",
1119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  (int)fileLineNumber, errorCode.errorName());
1120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileLine);
1121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
1122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        fileLine.remove();
1124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
1125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UColAttribute attr;
1128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(int32_t i = 0;; ++i) {
1129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(i == LENGTHOF(attributes)) {
1130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("invalid attribute name on line %d", (int)fileLineNumber);
1131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileLine);
1132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode.set(U_PARSE_ERROR);
1133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
1134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(attrString == UnicodeString(attributes[i].name, -1, US_INV)) {
1136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            attr = attributes[i].attr;
1137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
1138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UColAttributeValue value;
1142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(int32_t i = 0;; ++i) {
1143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(i == LENGTHOF(attributeValues)) {
1144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("invalid attribute value name on line %d", (int)fileLineNumber);
1145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileLine);
1146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode.set(U_PARSE_ERROR);
1147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
1148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(valueString == UnicodeString(attributeValues[i].name, -1, US_INV)) {
1150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            value = attributeValues[i].value;
1151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
1152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    coll->setAttribute(attr, value, errorCode);
1156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.isFailure()) {
1157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("illegal attribute=value combination on line %d: %s",
1158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              (int)fileLineNumber, errorCode.errorName());
1159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileLine);
1160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
1161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    fileLine.remove();
1163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
1164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::parseAndSetReorderCodes(int32_t start, IcuTestErrorCode &errorCode) {
1166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UVector32 reorderCodes(errorCode);
1167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while(start < fileLine.length()) {
1168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        start = skipSpaces(start);
1169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t limit = start;
1170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        while(limit < fileLine.length() && !isSpace(fileLine[limit])) { ++limit; }
1171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        CharString name;
1172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        name.appendInvariantChars(fileLine.tempSubStringBetween(start, limit), errorCode);
1173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t code = CollationRuleParser::getReorderCode(name.data());
1174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(code < -1) {
1175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("invalid reorder code '%s' on line %d", name.data(), (int)fileLineNumber);
1176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileLine);
1177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode.set(U_PARSE_ERROR);
1178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
1179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        reorderCodes.addElement(code, errorCode);
1181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        start = limit;
1182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    coll->setReorderCodes(reorderCodes.getBuffer(), reorderCodes.size(), errorCode);
1184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.isFailure()) {
1185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("setReorderCodes() failed on line %d: %s", (int)fileLineNumber, errorCode.errorName());
1186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileLine);
1187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
1188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    fileLine.remove();
1190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
1191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::buildTailoring(UCHARBUF *f, IcuTestErrorCode &errorCode) {
1193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString rules;
1194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while(readLine(f, errorCode)) {
1195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(fileLine.isEmpty()) { continue; }
1196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(isSectionStarter(fileLine[0])) { break; }
1197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        rules.append(fileLine.unescape());
1198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.isFailure()) { return; }
1200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    logln(rules);
1201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UParseError parseError;
1203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString reason;
1204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    delete coll;
1205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    coll = new RuleBasedCollator(rules, parseError, reason, errorCode);
1206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(coll == NULL) {
1207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("unable to allocate a new collator");
1208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode.set(U_MEMORY_ALLOCATION_ERROR);
1209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
1210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.isFailure()) {
1212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("RuleBasedCollator(rules) failed - %s", errorCode.errorName());
1213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(UnicodeString("  reason: ") + reason);
1214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(parseError.offset >= 0) { infoln("  rules offset: %d", (int)parseError.offset); }
1215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
1216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(UnicodeString("  snippet: ...") +
1217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                parseError.preContext + "(!)" + parseError.postContext + "...");
1218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
1220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        assertEquals("no error reason when RuleBasedCollator(rules) succeeds",
1221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                     UnicodeString(), reason);
1222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
1224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::setRootCollator(IcuTestErrorCode &errorCode) {
1226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.isFailure()) { return; }
1227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    delete coll;
1228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    coll = Collator::createInstance(Locale::getRoot(), errorCode);
1229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.isFailure()) {
1230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        dataerrln("unable to create a root collator");
1231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
1232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
1234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::setLocaleCollator(IcuTestErrorCode &errorCode) {
1236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.isFailure()) { return; }
1237dbc22bd174be483711cea006f3189d8289835830ccornelius    int32_t at = fileLine.indexOf((UChar)0x40, 9);  // @ is not invariant
1238dbc22bd174be483711cea006f3189d8289835830ccornelius    if(at >= 0) {
1239dbc22bd174be483711cea006f3189d8289835830ccornelius        fileLine.setCharAt(at, (UChar)0x2a);  // *
1240dbc22bd174be483711cea006f3189d8289835830ccornelius    }
1241dbc22bd174be483711cea006f3189d8289835830ccornelius    CharString localeID;
1242dbc22bd174be483711cea006f3189d8289835830ccornelius    localeID.appendInvariantChars(fileLine.tempSubString(9), errorCode);
1243dbc22bd174be483711cea006f3189d8289835830ccornelius    if(at >= 0) {
1244dbc22bd174be483711cea006f3189d8289835830ccornelius        localeID.data()[at - 9] = '@';
1245dbc22bd174be483711cea006f3189d8289835830ccornelius    }
1246dbc22bd174be483711cea006f3189d8289835830ccornelius    Locale locale(localeID.data());
1247dbc22bd174be483711cea006f3189d8289835830ccornelius    if(fileLine.length() == 9 || errorCode.isFailure() || locale.isBogus()) {
1248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("invalid language tag on line %d", (int)fileLineNumber);
1249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileLine);
1250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(errorCode.isSuccess()) { errorCode.set(U_PARSE_ERROR); }
1251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
1252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    logln("creating a collator for locale ID %s", locale.getName());
1255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    Collator *newColl = Collator::createInstance(locale, errorCode);
1256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.isFailure()) {
1257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        dataerrln("unable to create a collator for locale %s on line %d",
1258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  locale.getName(), (int)fileLineNumber);
1259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileLine);
1260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
1261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    delete coll;
1263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    coll = newColl;
1264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
1265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool CollationTest::needsNormalization(const UnicodeString &s, UErrorCode &errorCode) const {
1267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode) || !fcd->isNormalized(s, errorCode)) { return TRUE; }
1268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // In some sequences with Tibetan composite vowel signs,
1269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // even if the string passes the FCD check,
1270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // those composites must be decomposed.
1271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Check if s contains 0F71 immediately followed by 0F73 or 0F75 or 0F81.
1272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t index = 0;
1273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while((index = s.indexOf((UChar)0xf71, index)) >= 0) {
1274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(++index < s.length()) {
1275fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UChar c = s[index];
1276fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(c == 0xf73 || c == 0xf75 || c == 0xf81) { return TRUE; }
1277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return FALSE;
1280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
1281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool CollationTest::getSortKeyParts(const UChar *s, int32_t length,
1283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                     CharString &dest, int32_t partSize,
1284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                     IcuTestErrorCode &errorCode) {
1285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.isFailure()) { return FALSE; }
1286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint8_t part[32];
1287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    U_ASSERT(partSize <= LENGTHOF(part));
1288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UCharIterator iter;
1289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uiter_setString(&iter, s, length);
1290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t state[2] = { 0, 0 };
1291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(;;) {
1292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t partLength = coll->internalNextSortKeyPart(&iter, state, part, partSize, errorCode);
1293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UBool done = partLength < partSize;
1294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(done) {
1295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // At the end, append the next byte as well which should be 00.
1296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ++partLength;
1297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        dest.append(reinterpret_cast<char *>(part), partLength, errorCode);
1299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(done) {
1300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return errorCode.isSuccess();
1301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
1304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1305fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool CollationTest::getCollationKey(const char *norm, const UnicodeString &line,
1306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                     const UChar *s, int32_t length,
1307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                     CollationKey &key, IcuTestErrorCode &errorCode) {
1308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.isFailure()) { return FALSE; }
1309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    coll->getCollationKey(s, length, key, errorCode);
1310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.isFailure()) {
1311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileTestName);
1312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("Collator(%s).getCollationKey() failed: %s",
1313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              norm, errorCode.errorName());
1314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(line);
1315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
1316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t keyLength;
1318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const uint8_t *keyBytes = key.getByteArray(keyLength);
1319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(keyLength == 0 || keyBytes[keyLength - 1] != 0) {
1320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileTestName);
1321fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("Collator(%s).getCollationKey() wrote an empty or unterminated key",
1322fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              norm);
1323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(line);
1324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(printCollationKey(key));
1325fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
1326fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1327fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t numLevels = coll->getAttribute(UCOL_STRENGTH, errorCode);
1329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(numLevels < UCOL_IDENTICAL) {
1330fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ++numLevels;
1331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
1332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        numLevels = 5;
1333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1334fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(coll->getAttribute(UCOL_CASE_LEVEL, errorCode) == UCOL_ON) {
1335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ++numLevels;
1336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1337fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    errorCode.assertSuccess();
1338fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t numLevelSeparators = 0;
1339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(int32_t i = 0; i < (keyLength - 1); ++i) {
1340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint8_t b = keyBytes[i];
1341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(b == 0) {
1342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileTestName);
1343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("Collator(%s).getCollationKey() contains a 00 byte", norm);
1344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(line);
1345fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(printCollationKey(key));
1346fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return FALSE;
1347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(b == 1) { ++numLevelSeparators; }
1349fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(numLevelSeparators != (numLevels - 1)) {
1351fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileTestName);
1352fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("Collator(%s).getCollationKey() has %d level separators for %d levels",
1353fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              norm, (int)numLevelSeparators, (int)numLevels);
1354fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(line);
1355fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(printCollationKey(key));
1356fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
1357fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1359fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // If s contains U+FFFE, check that merged segments make the same key.
1360fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    LocalMemory<uint8_t> mergedKey;
1361fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t mergedKeyLength = 0;
1362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t mergedKeyCapacity = 0;
1363fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t sLength = (length >= 0) ? length : u_strlen(s);
1364fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t segmentStart = 0;
1365fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(int32_t i = 0;;) {
1366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(i == sLength) {
1367fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(segmentStart == 0) {
1368fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // s does not contain any U+FFFE.
1369fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                break;
1370fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
1371fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(s[i] != 0xfffe) {
1372fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ++i;
1373fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            continue;
1374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Get the sort key for another segment and merge it into mergedKey.
1376fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        CollationKey key1(mergedKey.getAlias(), mergedKeyLength);  // copies the bytes
1377fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        CollationKey key2;
1378fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        coll->getCollationKey(s + segmentStart, i - segmentStart, key2, errorCode);
1379fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t key1Length, key2Length;
1380fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const uint8_t *key1Bytes = key1.getByteArray(key1Length);
1381fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const uint8_t *key2Bytes = key2.getByteArray(key2Length);
1382fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint8_t *dest;
1383fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t minCapacity = key1Length + key2Length;
1384fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(key1Length > 0) { --minCapacity; }
1385fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(minCapacity <= mergedKeyCapacity) {
1386fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            dest = mergedKey.getAlias();
1387fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
1388fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(minCapacity <= 200) {
1389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                mergedKeyCapacity = 200;
1390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if(minCapacity <= 2 * mergedKeyCapacity) {
1391fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                mergedKeyCapacity *= 2;
1392fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
1393fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                mergedKeyCapacity = minCapacity;
1394fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
1395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            dest = mergedKey.allocateInsteadAndReset(mergedKeyCapacity);
1396fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT(dest != NULL || mergedKeyCapacity == 0);
1398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(key1Length == 0) {
1399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // key2 is the sort key for the first segment.
1400fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uprv_memcpy(dest, key2Bytes, key2Length);
1401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            mergedKeyLength = key2Length;
1402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
1403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            mergedKeyLength =
1404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ucol_mergeSortkeys(key1Bytes, key1Length, key2Bytes, key2Length,
1405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                   dest, mergedKeyCapacity);
1406fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1407fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(i == sLength) { break; }
1408fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        segmentStart = ++i;
1409fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1410fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(segmentStart != 0 &&
1411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            (mergedKeyLength != keyLength ||
1412fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uprv_memcmp(mergedKey.getAlias(), keyBytes, keyLength) != 0)) {
1413fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileTestName);
1414fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("Collator(%s).getCollationKey(with U+FFFE) != "
1415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              "ucol_mergeSortkeys(segments)",
1416fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              norm);
1417fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(line);
1418fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(printCollationKey(key));
1419fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(printSortKey(mergedKey.getAlias(), mergedKeyLength));
1420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
1421fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1422fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1423fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Check that internalNextSortKeyPart() makes the same key, with several part sizes.
1424fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const int32_t partSizes[] = { 32, 3, 1 };
1425fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(int32_t psi = 0; psi < LENGTHOF(partSizes); ++psi) {
1426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t partSize = partSizes[psi];
1427fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        CharString parts;
1428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!getSortKeyParts(s, length, parts, 32, errorCode)) {
1429fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileTestName);
1430fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("Collator(%s).internalNextSortKeyPart(%d) failed: %s",
1431fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  norm, (int)partSize, errorCode.errorName());
1432fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(line);
1433fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return FALSE;
1434fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(keyLength != parts.length() || uprv_memcmp(keyBytes, parts.data(), keyLength) != 0) {
1436fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileTestName);
1437fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("Collator(%s).getCollationKey() != internalNextSortKeyPart(%d)",
1438fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  norm, (int)partSize);
1439fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(line);
1440fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(printCollationKey(key));
1441fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(printSortKey(reinterpret_cast<uint8_t *>(parts.data()), parts.length()));
1442fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return FALSE;
1443fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1444fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1445fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return TRUE;
1446fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
1447fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1448fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace {
1449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1450fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/**
1451fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Replaces unpaired surrogates with U+FFFD.
1452fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Returns s if no replacement was made, otherwise buffer.
1453fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */
1454fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusconst UnicodeString &surrogatesToFFFD(const UnicodeString &s, UnicodeString &buffer) {
1455fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t i = 0;
1456fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while(i < s.length()) {
1457fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 c = s.char32At(i);
1458fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(U_IS_SURROGATE(c)) {
1459fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(buffer.length() < i) {
1460fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                buffer.append(s, buffer.length(), i - buffer.length());
1461fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
1462fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            buffer.append((UChar)0xfffd);
1463fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1464fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        i += U16_LENGTH(c);
1465fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1466fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(buffer.isEmpty()) {
1467fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return s;
1468fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1469fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(buffer.length() < i) {
1470fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        buffer.append(s, buffer.length(), i - buffer.length());
1471fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1472fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return buffer;
1473fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
1474fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1475fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
1476fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1477fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool CollationTest::checkCompareTwo(const char *norm, const UnicodeString &prevFileLine,
1478fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                     const UnicodeString &prevString, const UnicodeString &s,
1479fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                     UCollationResult expectedOrder, Collation::Level expectedLevel,
1480fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                     IcuTestErrorCode &errorCode) {
1481fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.isFailure()) { return FALSE; }
1482fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1483fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Get the sort keys first, for error debug output.
1484fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationKey prevKey;
1485fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(!getCollationKey(norm, prevFileLine, prevString.getBuffer(), prevString.length(),
1486fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        prevKey, errorCode)) {
1487fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
1488fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1489fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationKey key;
1490fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(!getCollationKey(norm, fileLine, s.getBuffer(), s.length(), key, errorCode)) { return FALSE; }
1491fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1492fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UCollationResult order = coll->compare(prevString, s, errorCode);
1493fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(order != expectedOrder || errorCode.isFailure()) {
1494fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileTestName);
1495fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("line %d Collator(%s).compare(previous, current) wrong order: %d != %d (%s)",
1496fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              (int)fileLineNumber, norm, order, expectedOrder, errorCode.errorName());
1497fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(prevFileLine);
1498fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileLine);
1499fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(printCollationKey(prevKey));
1500fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(printCollationKey(key));
1501fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
1502fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1503fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    order = coll->compare(s, prevString, errorCode);
1504fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(order != -expectedOrder || errorCode.isFailure()) {
1505fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileTestName);
1506fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("line %d Collator(%s).compare(current, previous) wrong order: %d != %d (%s)",
1507fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              (int)fileLineNumber, norm, order, -expectedOrder, errorCode.errorName());
1508fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(prevFileLine);
1509fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileLine);
1510fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(printCollationKey(prevKey));
1511fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(printCollationKey(key));
1512fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
1513fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1514fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Test NUL-termination if the strings do not contain NUL characters.
1515fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool containNUL = prevString.indexOf((UChar)0) >= 0 || s.indexOf((UChar)0) >= 0;
1516fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(!containNUL) {
1517fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        order = coll->compare(prevString.getBuffer(), -1, s.getBuffer(), -1, errorCode);
1518fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(order != expectedOrder || errorCode.isFailure()) {
1519fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileTestName);
1520fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("line %d Collator(%s).compare(previous-NUL, current-NUL) wrong order: %d != %d (%s)",
1521fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  (int)fileLineNumber, norm, order, expectedOrder, errorCode.errorName());
1522fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(prevFileLine);
1523fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileLine);
1524fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(printCollationKey(prevKey));
1525fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(printCollationKey(key));
1526fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return FALSE;
1527fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1528fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        order = coll->compare(s.getBuffer(), -1, prevString.getBuffer(), -1, errorCode);
1529fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(order != -expectedOrder || errorCode.isFailure()) {
1530fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileTestName);
1531fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("line %d Collator(%s).compare(current-NUL, previous-NUL) wrong order: %d != %d (%s)",
1532fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  (int)fileLineNumber, norm, order, -expectedOrder, errorCode.errorName());
1533fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(prevFileLine);
1534fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileLine);
1535fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(printCollationKey(prevKey));
1536fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(printCollationKey(key));
1537fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return FALSE;
1538fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1539fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1540fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1541fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if U_HAVE_STD_STRING
1542fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // compare(UTF-16) treats unpaired surrogates like unassigned code points.
1543fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Unpaired surrogates cannot be converted to UTF-8.
1544fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Create valid UTF-16 strings if necessary, and use those for
1545fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // both the expected compare() result and for the input to compare(UTF-8).
1546fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString prevBuffer, sBuffer;
1547fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UnicodeString &prevValid = surrogatesToFFFD(prevString, prevBuffer);
1548fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UnicodeString &sValid = surrogatesToFFFD(s, sBuffer);
1549fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    std::string prevUTF8, sUTF8;
1550fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString(prevValid).toUTF8String(prevUTF8);
1551fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString(sValid).toUTF8String(sUTF8);
1552fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UCollationResult expectedUTF8Order;
1553fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(&prevValid == &prevString && &sValid == &s) {
1554fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        expectedUTF8Order = expectedOrder;
1555fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
1556fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        expectedUTF8Order = coll->compare(prevValid, sValid, errorCode);
1557fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1558fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1559fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    order = coll->compareUTF8(prevUTF8, sUTF8, errorCode);
1560fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(order != expectedUTF8Order || errorCode.isFailure()) {
1561fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileTestName);
1562fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("line %d Collator(%s).compareUTF8(previous, current) wrong order: %d != %d (%s)",
1563fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              (int)fileLineNumber, norm, order, expectedUTF8Order, errorCode.errorName());
1564fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(prevFileLine);
1565fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileLine);
1566fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(printCollationKey(prevKey));
1567fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(printCollationKey(key));
1568fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
1569fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1570fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    order = coll->compareUTF8(sUTF8, prevUTF8, errorCode);
1571fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(order != -expectedUTF8Order || errorCode.isFailure()) {
1572fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileTestName);
1573fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("line %d Collator(%s).compareUTF8(current, previous) wrong order: %d != %d (%s)",
1574fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              (int)fileLineNumber, norm, order, -expectedUTF8Order, errorCode.errorName());
1575fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(prevFileLine);
1576fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileLine);
1577fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(printCollationKey(prevKey));
1578fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(printCollationKey(key));
1579fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
1580fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1581fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Test NUL-termination if the strings do not contain NUL characters.
1582fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(!containNUL) {
1583fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        order = coll->internalCompareUTF8(prevUTF8.c_str(), -1, sUTF8.c_str(), -1, errorCode);
1584fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(order != expectedUTF8Order || errorCode.isFailure()) {
1585fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileTestName);
1586fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("line %d Collator(%s).internalCompareUTF8(previous-NUL, current-NUL) wrong order: %d != %d (%s)",
1587fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  (int)fileLineNumber, norm, order, expectedUTF8Order, errorCode.errorName());
1588fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(prevFileLine);
1589fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileLine);
1590fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(printCollationKey(prevKey));
1591fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(printCollationKey(key));
1592fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return FALSE;
1593fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1594fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        order = coll->internalCompareUTF8(sUTF8.c_str(), -1, prevUTF8.c_str(), -1, errorCode);
1595fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(order != -expectedUTF8Order || errorCode.isFailure()) {
1596fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileTestName);
1597fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("line %d Collator(%s).internalCompareUTF8(current-NUL, previous-NUL) wrong order: %d != %d (%s)",
1598fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  (int)fileLineNumber, norm, order, -expectedUTF8Order, errorCode.errorName());
1599fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(prevFileLine);
1600fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileLine);
1601fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(printCollationKey(prevKey));
1602fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(printCollationKey(key));
1603fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return FALSE;
1604fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1605fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1606fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif
1607fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1608fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UCharIterator leftIter;
1609fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UCharIterator rightIter;
1610fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uiter_setString(&leftIter, prevString.getBuffer(), prevString.length());
1611fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uiter_setString(&rightIter, s.getBuffer(), s.length());
1612fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    order = coll->compare(leftIter, rightIter, errorCode);
1613fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(order != expectedOrder || errorCode.isFailure()) {
1614fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileTestName);
1615fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("line %d Collator(%s).compare(UCharIterator: previous, current) "
1616fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              "wrong order: %d != %d (%s)",
1617fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              (int)fileLineNumber, norm, order, expectedOrder, errorCode.errorName());
1618fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(prevFileLine);
1619fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileLine);
1620fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(printCollationKey(prevKey));
1621fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(printCollationKey(key));
1622fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
1623fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1624fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1625fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    order = prevKey.compareTo(key, errorCode);
1626fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(order != expectedOrder || errorCode.isFailure()) {
1627fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileTestName);
1628fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errln("line %d Collator(%s).getCollationKey(previous, current).compareTo() wrong order: %d != %d (%s)",
1629fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              (int)fileLineNumber, norm, order, expectedOrder, errorCode.errorName());
1630fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(prevFileLine);
1631fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(fileLine);
1632fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(printCollationKey(prevKey));
1633fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        infoln(printCollationKey(key));
1634fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
1635fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1636fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(order != UCOL_EQUAL && expectedLevel != Collation::NO_LEVEL) {
1637fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t prevKeyLength;
1638fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const uint8_t *prevBytes = prevKey.getByteArray(prevKeyLength);
1639fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t keyLength;
1640fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const uint8_t *bytes = key.getByteArray(keyLength);
1641fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t level = Collation::PRIMARY_LEVEL;
1642fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        for(int32_t i = 0;; ++i) {
1643fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uint8_t b = prevBytes[i];
1644fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(b != bytes[i]) { break; }
1645fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(b == Collation::LEVEL_SEPARATOR_BYTE) {
1646fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ++level;
1647fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(level == Collation::CASE_LEVEL &&
1648fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        coll->getAttribute(UCOL_CASE_LEVEL, errorCode) == UCOL_OFF) {
1649fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    ++level;
1650fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
1651fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
1652fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1653fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(level != expectedLevel) {
1654fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileTestName);
1655fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("line %d Collator(%s).getCollationKey(previous, current).compareTo()=%d wrong level: %d != %d",
1656fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                  (int)fileLineNumber, norm, order, level, expectedLevel);
1657fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(prevFileLine);
1658fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileLine);
1659fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(printCollationKey(prevKey));
1660fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(printCollationKey(key));
1661fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return FALSE;
1662fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1663fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1664fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return TRUE;
1665fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
1666fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1667fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::checkCompareStrings(UCHARBUF *f, IcuTestErrorCode &errorCode) {
1668fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.isFailure()) { return; }
1669fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString prevFileLine = UNICODE_STRING("(none)", 6);
1670fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString prevString, s;
1671fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    prevString.getTerminatedBuffer();  // Ensure NUL-termination.
1672fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while(readLine(f, errorCode)) {
1673fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(fileLine.isEmpty()) { continue; }
1674fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(isSectionStarter(fileLine[0])) { break; }
1675fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        Collation::Level relation = parseRelationAndString(s, errorCode);
1676fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(errorCode.isFailure()) {
1677fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode.reset();
1678fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
1679fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1680fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UCollationResult expectedOrder = (relation == Collation::ZERO_LEVEL) ? UCOL_EQUAL : UCOL_LESS;
1681fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        Collation::Level expectedLevel = relation;
1682fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        s.getTerminatedBuffer();  // Ensure NUL-termination.
1683fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UBool isOk = TRUE;
1684fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!needsNormalization(prevString, errorCode) && !needsNormalization(s, errorCode)) {
1685fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, errorCode);
1686fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            isOk = checkCompareTwo("normalization=on", prevFileLine, prevString, s,
1687fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                   expectedOrder, expectedLevel, errorCode);
1688fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1689fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(isOk) {
1690fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, errorCode);
1691fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            isOk = checkCompareTwo("normalization=off", prevFileLine, prevString, s,
1692fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                   expectedOrder, expectedLevel, errorCode);
1693fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1694fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(isOk && (!nfd->isNormalized(prevString, errorCode) || !nfd->isNormalized(s, errorCode))) {
1695fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UnicodeString pn = nfd->normalize(prevString, errorCode);
1696fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UnicodeString n = nfd->normalize(s, errorCode);
1697fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            pn.getTerminatedBuffer();
1698fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            n.getTerminatedBuffer();
1699fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode.assertSuccess();
1700fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            isOk = checkCompareTwo("NFD input", prevFileLine, pn, n,
1701fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                   expectedOrder, expectedLevel, errorCode);
1702fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1703fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!isOk) {
1704fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode.reset();  // already reported
1705fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1706fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        prevFileLine = fileLine;
1707fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        prevString = s;
1708fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        prevString.getTerminatedBuffer();  // Ensure NUL-termination.
1709fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1710fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
1711fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1712fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestDataDriven() {
1713fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    IcuTestErrorCode errorCode(*this, "TestDataDriven");
1714fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1715fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    fcd = Normalizer2Factory::getFCDInstance(errorCode);
1716fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    nfd = Normalizer2Factory::getNFDInstance(errorCode);
1717fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.logDataIfFailureAndReset("Normalizer2Factory::getFCDInstance() or getNFDInstance()")) {
1718fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
1719fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1720fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1721fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CharString path(getSourceTestData(errorCode), errorCode);
1722fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    path.appendPathPart("collationtest.txt", errorCode);
1723fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const char *codePage = "UTF-8";
1724fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    LocalUCHARBUFPointer f(ucbuf_open(path.data(), &codePage, TRUE, FALSE, errorCode));
1725fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode.logIfFailureAndReset("ucbuf_open(collationtest.txt)")) {
1726fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
1727fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1728fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while(errorCode.isSuccess()) {
1729fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Read a new line if necessary.
1730fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Sub-parsers leave the first line set that they do not handle.
1731fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(fileLine.isEmpty()) {
1732fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(!readLine(f.getAlias(), errorCode)) { break; }
1733fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            continue;
1734fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1735fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!isSectionStarter(fileLine[0])) {
1736fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("syntax error on line %d", (int)fileLineNumber);
1737fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileLine);
1738fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
1739fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1740fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(fileLine.startsWith(UNICODE_STRING("** test: ", 9))) {
1741fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            fileTestName = fileLine;
1742fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            logln(fileLine);
1743fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            fileLine.remove();
1744fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(fileLine == UNICODE_STRING("@ root", 6)) {
1745fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            setRootCollator(errorCode);
1746fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            fileLine.remove();
1747fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(fileLine.startsWith(UNICODE_STRING("@ locale ", 9))) {
1748fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            setLocaleCollator(errorCode);
1749fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            fileLine.remove();
1750fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(fileLine == UNICODE_STRING("@ rules", 7)) {
1751fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            buildTailoring(f.getAlias(), errorCode);
1752fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(fileLine[0] == 0x25 && isSpace(fileLine[1])) {  // %
1753fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            parseAndSetAttribute(errorCode);
1754fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(fileLine == UNICODE_STRING("* compare", 9)) {
1755fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            checkCompareStrings(f.getAlias(), errorCode);
1756fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
1757fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errln("syntax error on line %d", (int)fileLineNumber);
1758fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            infoln(fileLine);
1759fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
1760fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
1761fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
1762fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
1763fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1764fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // !UCONFIG_NO_COLLATION
1765