10596faeddefbf198de137d5e893708495ab1584cFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
58de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert*   Copyright (C) 1999-2016, International Business Machines
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Date        Name        Description
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   12/09/99    aliu        Ported from Java.
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "thcoll.h"
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/coll.h"
2054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#include "unicode/localpointer.h"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/sortkey.h"
22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/tblcoll.h"
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ustring.h"
248de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert#include "cmemory.h"
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h"
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "filestrm.h"
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "textfile.h"
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The TestDictionary test expects a file of this name, with this
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * encoding, to be present in the directory $ICU/source/test/testdata.
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//#define TEST_FILE           "th18057.txt"
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is the most failures we show in TestDictionary.  If this number
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is < 0, we show all failures.
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAX_FAILURES_TO_SHOW -1
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationThaiTest::CollationThaiTest() {
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    coll = Collator::createInstance(Locale("th", "TH", ""), status);
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (coll && U_SUCCESS(status)) {
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        //coll->setStrength(Collator::TERTIARY);
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        delete coll;
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        coll = 0;
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationThaiTest::~CollationThaiTest() {
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    delete coll;
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CollationThaiTest::runIndexedTest(int32_t index, UBool exec, const char* &name,
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                       char* /*par*/) {
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if((!coll) && exec) {
6085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho      dataerrln(__FILE__ " cannot test - failed to create collator.");
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      name = "some test";
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      return;
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    switch (index) {
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        TESTCASE(0,TestDictionary);
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        TESTCASE(1,TestCornerCases);
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        TESTCASE(2,TestNamesList);
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        TESTCASE(3,TestInvalidThai);
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        TESTCASE(4,TestReordering);
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        default: name = ""; break;
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Read the external names list, and confirms that the collator
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * gets the same results when comparing lines one to another
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * using regular and iterative comparison.
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CollationThaiTest::TestNamesList(void) {
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (coll == 0) {
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        errln("Error: could not construct Thai collator");
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TextFile names("TestNames_Thai.txt", "UTF16LE", ec);
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(ec)) {
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        logln("Can't open TestNames_Thai.txt: %s; skipping test",
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              u_errorName(ec));
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Loop through each word in the dictionary and compare it to the previous
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // word.  They should be in sorted order.
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString lastWord, word;
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //int32_t failed = 0;
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t wordCount = 0;
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while (names.readLineSkippingComments(word, ec, FALSE) && U_SUCCESS(ec)) {
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Show the first 8 words being compared, so we can see what's happening
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ++wordCount;
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (wordCount <= 8) {
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UnicodeString str;
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            logln((UnicodeString)"Word " + wordCount + ": " + IntlTest::prettify(word, str));
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (lastWord.length() > 0) {
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            Collator::EComparisonResult result = coll->compare(lastWord, word);
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            doTest(coll, lastWord, word, result);
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        lastWord = word;
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    assertSuccess("readLine", ec);
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    logln((UnicodeString)"Words checked: " + wordCount);
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Read the external dictionary file, which is already in proper
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sorted order, and confirm that the collator compares each line as
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * preceding the following line.
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CollationThaiTest::TestDictionary(void) {
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (coll == 0) {
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        errln("Error: could not construct Thai collator");
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TextFile riwords("riwords.txt", "UTF8", ec);
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(ec)) {
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        logln("Can't open riwords.txt: %s; skipping test",
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              u_errorName(ec));
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Loop through each word in the dictionary and compare it to the previous
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // word.  They should be in sorted order.
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString lastWord, word;
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t failed = 0;
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t wordCount = 0;
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while (riwords.readLineSkippingComments(word, ec, FALSE) && U_SUCCESS(ec)) {
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Show the first 8 words being compared, so we can see what's happening
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ++wordCount;
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (wordCount <= 8) {
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UnicodeString str;
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            logln((UnicodeString)"Word " + wordCount + ": " + IntlTest::prettify(word, str));
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (lastWord.length() > 0) {
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int32_t result = coll->compare(lastWord, word);
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
16054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            if (result > 0) {
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                failed++;
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if (MAX_FAILURES_TO_SHOW < 0 || failed <= MAX_FAILURES_TO_SHOW) {
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    UnicodeString str;
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    UnicodeString msg =
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        UnicodeString("--------------------------------------------\n")
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        + riwords.getLineNumber()
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        + " compare(" + IntlTest::prettify(lastWord, str);
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    msg += UnicodeString(", ")
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        + IntlTest::prettify(word, str) + ") returned " + result
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        + ", expected -1\n";
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    UErrorCode status = U_ZERO_ERROR;
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    CollationKey k1, k2;
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    coll->getCollationKey(lastWord, k1, status);
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    coll->getCollationKey(word, k2, status);
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if (U_FAILURE(status)) {
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        errln((UnicodeString)"Fail: getCollationKey returned " + u_errorName(status));
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        return;
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    msg.append("key1: ").append(prettify(k1, str)).append("\n");
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    msg.append("key2: ").append(prettify(k2, str));
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    errln(msg);
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        lastWord = word;
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    assertSuccess("readLine", ec);
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (failed != 0) {
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (failed > MAX_FAILURES_TO_SHOW) {
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errln((UnicodeString)"Too many failures; only the first " +
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                  MAX_FAILURES_TO_SHOW + " failures were shown");
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        errln((UnicodeString)"Summary: " + failed + " of " + (riwords.getLineNumber() - 1) +
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              " comparisons failed");
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    logln((UnicodeString)"Words checked: " + wordCount);
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Odd corner conditions taken from "How to Sort Thai Without Rewriting Sort",
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * by Doug Cooper, http://seasrc.th.net/paper/thaisort.zip
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CollationThaiTest::TestCornerCases(void) {
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char* TESTS[] = {
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Shorter words precede longer
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        "\\u0e01",                               "<",    "\\u0e01\\u0e01",
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Tone marks are considered after letters (i.e. are primary ignorable)
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        "\\u0e01\\u0e32",                        "<",    "\\u0e01\\u0e49\\u0e32",
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // ditto for other over-marks
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        "\\u0e01\\u0e32",                        "<",    "\\u0e01\\u0e32\\u0e4c",
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // commonly used mark-in-context order.
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // In effect, marks are sorted after each syllable.
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        "\\u0e01\\u0e32\\u0e01\\u0e49\\u0e32",   "<",    "\\u0e01\\u0e48\\u0e32\\u0e01\\u0e49\\u0e32",
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Hyphens and other punctuation follow whitespace but come before letters
22254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        "\\u0e01\\u0e32",                        "=",    "\\u0e01\\u0e32-",
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        "\\u0e01\\u0e32-",                       "<",    "\\u0e01\\u0e32\\u0e01\\u0e32",
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Doubler follows an indentical word without the doubler
22654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        "\\u0e01\\u0e32",                        "=",    "\\u0e01\\u0e32\\u0e46",
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        "\\u0e01\\u0e32\\u0e46",                 "<",    "\\u0e01\\u0e32\\u0e01\\u0e32",
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // \\u0e45 after either \\u0e24 or \\u0e26 is treated as a single
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // combining character, similar to "c < ch" in traditional spanish.
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // TODO: beef up this case
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        "\\u0e24\\u0e29\\u0e35",                 "<",    "\\u0e24\\u0e45\\u0e29\\u0e35",
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        "\\u0e26\\u0e29\\u0e35",                 "<",    "\\u0e26\\u0e45\\u0e29\\u0e35",
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Vowels reorder, should compare \\u0e2d and \\u0e34
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        "\\u0e40\\u0e01\\u0e2d",                 "<",    "\\u0e40\\u0e01\\u0e34",
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Tones are compared after the rest of the word (e.g. primary ignorable)
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        "\\u0e01\\u0e32\\u0e01\\u0e48\\u0e32",   "<",    "\\u0e01\\u0e49\\u0e32\\u0e01\\u0e32",
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Periods are ignored entirely
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        "\\u0e01.\\u0e01.",                      "<",    "\\u0e01\\u0e32",
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    };
2458de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    const int32_t TESTS_length = UPRV_LENGTHOF(TESTS);
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (coll == 0) {
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        errln("Error: could not construct Thai collator");
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    compareArray(*coll, TESTS, TESTS_length);
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------------------
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Internal utilities
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------------------
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CollationThaiTest::compareArray(Collator& c, const char* tests[],
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                     int32_t testsLength) {
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for (int32_t i = 0; i < testsLength; i += 3) {
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        Collator::EComparisonResult expect;
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (tests[i+1][0] == '<') {
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          expect = Collator::LESS;
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if (tests[i+1][0] == '>') {
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          expect = Collator::GREATER;
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if (tests[i+1][0] == '=') {
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          expect = Collator::EQUAL;
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // expect = Integer.decode(tests[i+1]).intValue();
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errln((UnicodeString)"Error: unknown operator " + tests[i+1]);
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return;
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UnicodeString s1, s2;
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        parseChars(s1, tests[i]);
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        parseChars(s2, tests[i+2]);
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        doTest(&c, s1, s2, expect);
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t result = c.compare(s1, s2);
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (sign(result) != sign(expect))
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UnicodeString t1, t2;
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errln(UnicodeString("") +
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                  i/3 + ": compare(" + IntlTest::prettify(s1, t1)
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                  + " , " + IntlTest::prettify(s2, t2)
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                  + ") got " + result + "; expected " + expect);
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            CollationKey k1, k2;
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c.getCollationKey(s1, k1, status);
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c.getCollationKey(s2, k2, status);
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (U_FAILURE(status)) {
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                errln((UnicodeString)"Fail: getCollationKey returned " + u_errorName(status));
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return;
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errln((UnicodeString)"  key1: " + prettify(k1, t1) );
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errln((UnicodeString)"  key2: " + prettify(k2, t2) );
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // Collator.compare worked OK; now try the collation keys
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            CollationKey k1, k2;
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c.getCollationKey(s1, k1, status);
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c.getCollationKey(s2, k2, status);
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (U_FAILURE(status)) {
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                errln((UnicodeString)"Fail: getCollationKey returned " + u_errorName(status));
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return;
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result = k1.compareTo(k2);
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (sign(result) != sign(expect)) {
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                UnicodeString t1, t2;
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                errln(UnicodeString("") +
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                      i/3 + ": key(" + IntlTest::prettify(s1, t1)
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                      + ").compareTo(key(" + IntlTest::prettify(s2, t2)
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                      + ")) got " + result + "; expected " + expect);
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                errln((UnicodeString)"  " + prettify(k1, t1) + " vs. " + prettify(k2, t2));
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint8_t CollationThaiTest::sign(int32_t i) {
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (i < 0) return -1;
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (i > 0) return 1;
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return 0;
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Set a UnicodeString corresponding to the given string.  Use
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeString and the default converter, unless we see the sequence
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * "\\u", in which case we interpret the subsequent escape.
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString& CollationThaiTest::parseChars(UnicodeString& result,
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                             const char* chars) {
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return result = CharsToUnicodeString(chars);
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUCollator *thaiColl = NULL;
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int U_CALLCONV
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruStrCmp(const void *p1, const void *p2) {
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  return ucol_strcoll(thaiColl, *(UChar **) p1, -1,  *(UChar **)p2, -1);
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define LINES 6
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CollationThaiTest::TestInvalidThai(void) {
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  const char *tests[LINES] = {
357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "\\u0E44\\u0E01\\u0E44\\u0E01",
358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "\\u0E44\\u0E01\\u0E01\\u0E44",
359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "\\u0E01\\u0E44\\u0E01\\u0E44",
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "\\u0E01\\u0E01\\u0E44\\u0E44",
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "\\u0E44\\u0E44\\u0E01\\u0E01",
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    "\\u0E01\\u0E44\\u0E44\\u0E01",
363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  };
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  UChar strings[LINES][20];
366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  UChar *toSort[LINES];
368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  int32_t i = 0, j = 0, len = 0;
370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  UErrorCode coll_status = U_ZERO_ERROR;
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  UnicodeString iteratorText;
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  thaiColl = ucol_open ("th_TH", &coll_status);
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  if (U_FAILURE(coll_status)) {
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    errln("Error opening Thai collator: %s", u_errorName(coll_status));
377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return;
378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  CollationElementIterator* c = ((RuleBasedCollator *)coll)->createCollationElementIterator( iteratorText );
381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
3828de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert  for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    len = u_unescape(tests[i], strings[i], 20);
384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    strings[i][len] = 0;
385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    toSort[i] = strings[i];
386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  qsort (toSort, LINES, sizeof (UChar *), StrCmp);
389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  for (i=0; i < LINES; i++)
391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  {
392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    logln("%i", i);
393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      for (j=i+1; j < LINES; j++) {
394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          if (ucol_strcoll (thaiColl, toSort[i], -1, toSort[j], -1) == UCOL_GREATER)
395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          {
396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              // inconsistency ordering found!
397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            errln("Inconsistent ordering between strings %i and %i", i, j);
398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          }
399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      }
400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      iteratorText.setTo(toSort[i]);
401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      c->setText(iteratorText, coll_status);
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      backAndForth(*c);
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  ucol_close(thaiColl);
407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  delete c;
408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CollationThaiTest::TestReordering(void) {
41154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  // Until UCA 4.1, the collation code swapped Thai/Lao prevowels with the following consonants,
41254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  // resulting in consonant+prevowel == prevowel+consonant.
41354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  // From UCA 5.0 on, there are order-reversing contractions for prevowel+consonant.
41454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  // From UCA 5.0 until UCA 6.1, there was a tertiary difference between
41554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  // consonant+prevowel and prevowel+consonant.
41654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  // In UCA 6.2, they compare equal again.
41754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  // The test was modified to using a collator with strength=secondary,
41854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  // ignoring possible tertiary differences.
41954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  const char *tests[] = {
42054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "\\u0E41c\\u0301",       "=", "\\u0E41\\u0107", // composition
42154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "\\u0E41\\U0001D7CE",    "<", "\\u0E41\\U0001D7CF", // supplementaries
42254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "\\u0E41\\U0001D15F",    "=", "\\u0E41\\U0001D158\\U0001D165", // supplementary composition decomps to supplementary
42354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "\\u0E41\\U0002F802",    "=", "\\u0E41\\u4E41", // supplementary composition decomps to BMP
42454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "\\u0E41\\u0301",        "=", "\\u0E41\\u0301", // unsafe (just checking backwards iteration)
42554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "\\u0E41\\u0301\\u0316", "=", "\\u0E41\\u0316\\u0301",
42654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
42754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "\\u0e24\\u0e41",        "=", "\\u0e41\\u0e24", // exiting contraction bug
42854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "\\u0e3f\\u0e3f\\u0e24\\u0e41", "=", "\\u0e3f\\u0e3f\\u0e41\\u0e24",
42954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
43054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "abc\\u0E41c\\u0301",       "=", "abc\\u0E41\\u0107", // composition
43154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "abc\\u0E41\\U0001D000",    "<", "abc\\u0E41\\U0001D001", // supplementaries
43254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "abc\\u0E41\\U0001D15F",    "=", "abc\\u0E41\\U0001D158\\U0001D165", // supplementary composition decomps to supplementary
43354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "abc\\u0E41\\U0002F802",    "=", "abc\\u0E41\\u4E41", // supplementary composition decomps to BMP
43454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "abc\\u0E41\\u0301",        "=", "abc\\u0E41\\u0301", // unsafe (just checking backwards iteration)
43554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "abc\\u0E41\\u0301\\u0316", "=", "abc\\u0E41\\u0316\\u0301",
43654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
43754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "\\u0E41c\\u0301abc",       "=", "\\u0E41\\u0107abc", // composition
43854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "\\u0E41\\U0001D000abc",    "<", "\\u0E41\\U0001D001abc", // supplementaries
43954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "\\u0E41\\U0001D15Fabc",    "=", "\\u0E41\\U0001D158\\U0001D165abc", // supplementary composition decomps to supplementary
44054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "\\u0E41\\U0002F802abc",    "=", "\\u0E41\\u4E41abc", // supplementary composition decomps to BMP
44154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "\\u0E41\\u0301abc",        "=", "\\u0E41\\u0301abc", // unsafe (just checking backwards iteration)
44254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "\\u0E41\\u0301\\u0316abc", "=", "\\u0E41\\u0316\\u0301abc",
44354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
44454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "abc\\u0E41c\\u0301abc",       "=", "abc\\u0E41\\u0107abc", // composition
44554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "abc\\u0E41\\U0001D000abc",    "<", "abc\\u0E41\\U0001D001abc", // supplementaries
44654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "abc\\u0E41\\U0001D15Fabc",    "=", "abc\\u0E41\\U0001D158\\U0001D165abc", // supplementary composition decomps to supplementary
44754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "abc\\u0E41\\U0002F802abc",    "=", "abc\\u0E41\\u4E41abc", // supplementary composition decomps to BMP
44854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "abc\\u0E41\\u0301abc",        "=", "abc\\u0E41\\u0301abc", // unsafe (just checking backwards iteration)
44954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    "abc\\u0E41\\u0301\\u0316abc", "=", "abc\\u0E41\\u0316\\u0301abc",
45054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  };
45154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
45254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  LocalPointer<Collator> coll2(coll->clone());
45354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  UErrorCode status = U_ZERO_ERROR;
45454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  coll2->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
45554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  if(U_FAILURE(status)) {
45654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    errln("Unable to set the Thai collator clone to secondary strength");
45754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    return;
45854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  }
4598de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert  compareArray(*coll2, tests, UPRV_LENGTHOF(tests));
460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  const char *rule = "& c < ab";
462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  const char *testcontraction[] = { "\\u0E41ab", ">", "\\u0E41c"}; // After UCA 4.1 Thai are normal so won't break a contraction
463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  UnicodeString rules;
464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  parseChars(rules, rule);
4651b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert  LocalPointer<RuleBasedCollator> rcoll(new RuleBasedCollator(rules, status), status);
466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  if(U_SUCCESS(status)) {
467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    compareArray(*rcoll, testcontraction, 3);
468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  } else {
469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    errln("Couldn't instantiate collator from rules");
470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_COLLATION */
476