16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org**********************************************************************
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Copyright (C) 1999-2012, International Business Machines
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Corporation and others.  All Rights Reserved.
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org**********************************************************************
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Date        Name        Description
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   12/09/99    aliu        Ported from Java.
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org**********************************************************************
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_COLLATION
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "thcoll.h"
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/coll.h"
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/localpointer.h"
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/sortkey.h"
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ustring.h"
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cstring.h"
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "filestrm.h"
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "textfile.h"
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The TestDictionary test expects a file of this name, with this
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * encoding, to be present in the directory $ICU/source/test/testdata.
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//#define TEST_FILE           "th18057.txt"
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This is the most failures we show in TestDictionary.  If this number
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is < 0, we show all failures.
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define MAX_FAILURES_TO_SHOW -1
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgCollationThaiTest::CollationThaiTest() {
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode status = U_ZERO_ERROR;
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    coll = Collator::createInstance(Locale("th", "TH", ""), status);
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (coll && U_SUCCESS(status)) {
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //coll->setStrength(Collator::TERTIARY);
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete coll;
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        coll = 0;
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgCollationThaiTest::~CollationThaiTest() {
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete coll;
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid CollationThaiTest::runIndexedTest(int32_t index, UBool exec, const char* &name,
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                       char* /*par*/) {
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if((!coll) && exec) {
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      dataerrln(__FILE__ " cannot test - failed to create collator.");
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      name = "some test";
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      return;
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    switch (index) {
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        TESTCASE(0,TestDictionary);
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        TESTCASE(1,TestCornerCases);
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        TESTCASE(2,TestNamesList);
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        TESTCASE(3,TestInvalidThai);
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        TESTCASE(4,TestReordering);
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        default: name = ""; break;
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Read the external names list, and confirms that the collator
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * gets the same results when comparing lines one to another
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * using regular and iterative comparison.
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid CollationThaiTest::TestNamesList(void) {
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (coll == 0) {
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("Error: could not construct Thai collator");
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode ec = U_ZERO_ERROR;
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    TextFile names("TestNames_Thai.txt", "UTF16LE", ec);
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(ec)) {
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        logln("Can't open TestNames_Thai.txt: %s; skipping test",
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              u_errorName(ec));
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Loop through each word in the dictionary and compare it to the previous
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // word.  They should be in sorted order.
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString lastWord, word;
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //int32_t failed = 0;
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t wordCount = 0;
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while (names.readLineSkippingComments(word, ec, FALSE) && U_SUCCESS(ec)) {
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Show the first 8 words being compared, so we can see what's happening
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ++wordCount;
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (wordCount <= 8) {
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeString str;
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            logln((UnicodeString)"Word " + wordCount + ": " + IntlTest::prettify(word, str));
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (lastWord.length() > 0) {
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            Collator::EComparisonResult result = coll->compare(lastWord, word);
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            doTest(coll, lastWord, word, result);
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        lastWord = word;
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    assertSuccess("readLine", ec);
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    logln((UnicodeString)"Words checked: " + wordCount);
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Read the external dictionary file, which is already in proper
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * sorted order, and confirm that the collator compares each line as
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * preceding the following line.
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid CollationThaiTest::TestDictionary(void) {
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (coll == 0) {
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("Error: could not construct Thai collator");
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode ec = U_ZERO_ERROR;
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    TextFile riwords("riwords.txt", "UTF8", ec);
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(ec)) {
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        logln("Can't open riwords.txt: %s; skipping test",
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              u_errorName(ec));
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Loop through each word in the dictionary and compare it to the previous
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // word.  They should be in sorted order.
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString lastWord, word;
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t failed = 0;
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t wordCount = 0;
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while (riwords.readLineSkippingComments(word, ec, FALSE) && U_SUCCESS(ec)) {
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Show the first 8 words being compared, so we can see what's happening
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        ++wordCount;
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (wordCount <= 8) {
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeString str;
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            logln((UnicodeString)"Word " + wordCount + ": " + IntlTest::prettify(word, str));
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (lastWord.length() > 0) {
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            int32_t result = coll->compare(lastWord, word);
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (result > 0) {
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                failed++;
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (MAX_FAILURES_TO_SHOW < 0 || failed <= MAX_FAILURES_TO_SHOW) {
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UnicodeString str;
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UnicodeString msg =
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        UnicodeString("--------------------------------------------\n")
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        + riwords.getLineNumber()
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        + " compare(" + IntlTest::prettify(lastWord, str);
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    msg += UnicodeString(", ")
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        + IntlTest::prettify(word, str) + ") returned " + result
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        + ", expected -1\n";
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UErrorCode status = U_ZERO_ERROR;
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    CollationKey k1, k2;
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    coll->getCollationKey(lastWord, k1, status);
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    coll->getCollationKey(word, k2, status);
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if (U_FAILURE(status)) {
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        errln((UnicodeString)"Fail: getCollationKey returned " + u_errorName(status));
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        return;
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    msg.append("key1: ").append(prettify(k1, str)).append("\n");
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    msg.append("key2: ").append(prettify(k2, str));
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    errln(msg);
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        lastWord = word;
1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    assertSuccess("readLine", ec);
1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (failed != 0) {
1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (failed > MAX_FAILURES_TO_SHOW) {
1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln((UnicodeString)"Too many failures; only the first " +
1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                  MAX_FAILURES_TO_SHOW + " failures were shown");
1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln((UnicodeString)"Summary: " + failed + " of " + (riwords.getLineNumber() - 1) +
1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              " comparisons failed");
1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    logln((UnicodeString)"Words checked: " + wordCount);
1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Odd corner conditions taken from "How to Sort Thai Without Rewriting Sort",
2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * by Doug Cooper, http://seasrc.th.net/paper/thaisort.zip
2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid CollationThaiTest::TestCornerCases(void) {
2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const char* TESTS[] = {
2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Shorter words precede longer
2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        "\\u0e01",                               "<",    "\\u0e01\\u0e01",
2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Tone marks are considered after letters (i.e. are primary ignorable)
2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        "\\u0e01\\u0e32",                        "<",    "\\u0e01\\u0e49\\u0e32",
2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // ditto for other over-marks
2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        "\\u0e01\\u0e32",                        "<",    "\\u0e01\\u0e32\\u0e4c",
2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // commonly used mark-in-context order.
2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // In effect, marks are sorted after each syllable.
2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        "\\u0e01\\u0e32\\u0e01\\u0e49\\u0e32",   "<",    "\\u0e01\\u0e48\\u0e32\\u0e01\\u0e49\\u0e32",
2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Hyphens and other punctuation follow whitespace but come before letters
2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        "\\u0e01\\u0e32",                        "=",    "\\u0e01\\u0e32-",
2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        "\\u0e01\\u0e32-",                       "<",    "\\u0e01\\u0e32\\u0e01\\u0e32",
2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Doubler follows an indentical word without the doubler
2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        "\\u0e01\\u0e32",                        "=",    "\\u0e01\\u0e32\\u0e46",
2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        "\\u0e01\\u0e32\\u0e46",                 "<",    "\\u0e01\\u0e32\\u0e01\\u0e32",
2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // \\u0e45 after either \\u0e24 or \\u0e26 is treated as a single
2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // combining character, similar to "c < ch" in traditional spanish.
2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // TODO: beef up this case
2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        "\\u0e24\\u0e29\\u0e35",                 "<",    "\\u0e24\\u0e45\\u0e29\\u0e35",
2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        "\\u0e26\\u0e29\\u0e35",                 "<",    "\\u0e26\\u0e45\\u0e29\\u0e35",
2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Vowels reorder, should compare \\u0e2d and \\u0e34
2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        "\\u0e40\\u0e01\\u0e2d",                 "<",    "\\u0e40\\u0e01\\u0e34",
2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Tones are compared after the rest of the word (e.g. primary ignorable)
2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        "\\u0e01\\u0e32\\u0e01\\u0e48\\u0e32",   "<",    "\\u0e01\\u0e49\\u0e32\\u0e01\\u0e32",
2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Periods are ignored entirely
2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        "\\u0e01.\\u0e01.",                      "<",    "\\u0e01\\u0e32",
2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    };
2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const int32_t TESTS_length = (int32_t)(sizeof(TESTS)/sizeof(TESTS[0]));
2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (coll == 0) {
2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("Error: could not construct Thai collator");
2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    compareArray(*coll, TESTS, TESTS_length);
2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------
2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Internal utilities
2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------
2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid CollationThaiTest::compareArray(Collator& c, const char* tests[],
2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                     int32_t testsLength) {
2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (int32_t i = 0; i < testsLength; i += 3) {
2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        Collator::EComparisonResult expect;
2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (tests[i+1][0] == '<') {
2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          expect = Collator::LESS;
2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if (tests[i+1][0] == '>') {
2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          expect = Collator::GREATER;
2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if (tests[i+1][0] == '=') {
2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          expect = Collator::EQUAL;
2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // expect = Integer.decode(tests[i+1]).intValue();
2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln((UnicodeString)"Error: unknown operator " + tests[i+1]);
2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return;
2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString s1, s2;
2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        parseChars(s1, tests[i]);
2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        parseChars(s2, tests[i+2]);
2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        doTest(&c, s1, s2, expect);
2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0
2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UErrorCode status = U_ZERO_ERROR;
2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int32_t result = c.compare(s1, s2);
2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (sign(result) != sign(expect))
2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            UnicodeString t1, t2;
2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln(UnicodeString("") +
2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                  i/3 + ": compare(" + IntlTest::prettify(s1, t1)
2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                  + " , " + IntlTest::prettify(s2, t2)
2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                  + ") got " + result + "; expected " + expect);
2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            CollationKey k1, k2;
2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c.getCollationKey(s1, k1, status);
2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c.getCollationKey(s2, k2, status);
2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (U_FAILURE(status)) {
2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                errln((UnicodeString)"Fail: getCollationKey returned " + u_errorName(status));
2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return;
2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln((UnicodeString)"  key1: " + prettify(k1, t1) );
2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln((UnicodeString)"  key2: " + prettify(k2, t2) );
2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        else
2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {
2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // Collator.compare worked OK; now try the collation keys
3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            CollationKey k1, k2;
3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c.getCollationKey(s1, k1, status);
3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            c.getCollationKey(s2, k2, status);
3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (U_FAILURE(status)) {
3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                errln((UnicodeString)"Fail: getCollationKey returned " + u_errorName(status));
3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return;
3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            result = k1.compareTo(k2);
3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if (sign(result) != sign(expect)) {
3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UnicodeString t1, t2;
3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                errln(UnicodeString("") +
3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                      i/3 + ": key(" + IntlTest::prettify(s1, t1)
3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                      + ").compareTo(key(" + IntlTest::prettify(s2, t2)
3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                      + ")) got " + result + "; expected " + expect);
3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                errln((UnicodeString)"  " + prettify(k1, t1) + " vs. " + prettify(k2, t2));
3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint8_t CollationThaiTest::sign(int32_t i) {
3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (i < 0) return -1;
3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (i > 0) return 1;
3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return 0;
3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Set a UnicodeString corresponding to the given string.  Use
3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * UnicodeString and the default converter, unless we see the sequence
3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * "\\u", in which case we interpret the subsequent escape.
3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString& CollationThaiTest::parseChars(UnicodeString& result,
3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                             const char* chars) {
3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return result = CharsToUnicodeString(chars);
3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUCollator *thaiColl = NULL;
3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_BEGIN
3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int U_CALLCONV
3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgStrCmp(const void *p1, const void *p2) {
3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  return ucol_strcoll(thaiColl, *(UChar **) p1, -1,  *(UChar **)p2, -1);
3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_END
3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define LINES 6
3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid CollationThaiTest::TestInvalidThai(void) {
3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  const char *tests[LINES] = {
3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E44\\u0E01\\u0E44\\u0E01",
3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E44\\u0E01\\u0E01\\u0E44",
3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E01\\u0E44\\u0E01\\u0E44",
3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E01\\u0E01\\u0E44\\u0E44",
3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E44\\u0E44\\u0E01\\u0E01",
3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E01\\u0E44\\u0E44\\u0E01",
3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  };
3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UChar strings[LINES][20];
3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UChar *toSort[LINES];
3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  int32_t i = 0, j = 0, len = 0;
3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UErrorCode coll_status = U_ZERO_ERROR;
3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UnicodeString iteratorText;
3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  thaiColl = ucol_open ("th_TH", &coll_status);
3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if (U_FAILURE(coll_status)) {
3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    errln("Error opening Thai collator: %s", u_errorName(coll_status));
3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return;
3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  CollationElementIterator* c = ((RuleBasedCollator *)coll)->createCollationElementIterator( iteratorText );
3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  for(i = 0; i < (int32_t)(sizeof(tests)/sizeof(tests[0])); i++) {
3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    len = u_unescape(tests[i], strings[i], 20);
3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    strings[i][len] = 0;
3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    toSort[i] = strings[i];
3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  qsort (toSort, LINES, sizeof (UChar *), StrCmp);
3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  for (i=0; i < LINES; i++)
3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  {
3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    logln("%i", i);
3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      for (j=i+1; j < LINES; j++) {
3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          if (ucol_strcoll (thaiColl, toSort[i], -1, toSort[j], -1) == UCOL_GREATER)
3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          {
3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              // inconsistency ordering found!
3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errln("Inconsistent ordering between strings %i and %i", i, j);
3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          }
3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      }
3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      iteratorText.setTo(toSort[i]);
3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      c->setText(iteratorText, coll_status);
3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      backAndForth(*c);
3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  ucol_close(thaiColl);
4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  delete c;
4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid CollationThaiTest::TestReordering(void) {
4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // Until UCA 4.1, the collation code swapped Thai/Lao prevowels with the following consonants,
4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // resulting in consonant+prevowel == prevowel+consonant.
4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // From UCA 5.0 on, there are order-reversing contractions for prevowel+consonant.
4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // From UCA 5.0 until UCA 6.1, there was a tertiary difference between
4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // consonant+prevowel and prevowel+consonant.
4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // In UCA 6.2, they compare equal again.
4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // The test was modified to using a collator with strength=secondary,
4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // ignoring possible tertiary differences.
4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  const char *tests[] = {
4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E41c\\u0301",       "=", "\\u0E41\\u0107", // composition
4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E41\\U0001D7CE",    "<", "\\u0E41\\U0001D7CF", // supplementaries
4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E41\\U0001D15F",    "=", "\\u0E41\\U0001D158\\U0001D165", // supplementary composition decomps to supplementary
4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E41\\U0002F802",    "=", "\\u0E41\\u4E41", // supplementary composition decomps to BMP
4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E41\\u0301",        "=", "\\u0E41\\u0301", // unsafe (just checking backwards iteration)
4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E41\\u0301\\u0316", "=", "\\u0E41\\u0316\\u0301",
4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0e24\\u0e41",        "=", "\\u0e41\\u0e24", // exiting contraction bug
4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0e3f\\u0e3f\\u0e24\\u0e41", "=", "\\u0e3f\\u0e3f\\u0e41\\u0e24",
4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "abc\\u0E41c\\u0301",       "=", "abc\\u0E41\\u0107", // composition
4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "abc\\u0E41\\U0001D000",    "<", "abc\\u0E41\\U0001D001", // supplementaries
4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "abc\\u0E41\\U0001D15F",    "=", "abc\\u0E41\\U0001D158\\U0001D165", // supplementary composition decomps to supplementary
4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "abc\\u0E41\\U0002F802",    "=", "abc\\u0E41\\u4E41", // supplementary composition decomps to BMP
4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "abc\\u0E41\\u0301",        "=", "abc\\u0E41\\u0301", // unsafe (just checking backwards iteration)
4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "abc\\u0E41\\u0301\\u0316", "=", "abc\\u0E41\\u0316\\u0301",
4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E41c\\u0301abc",       "=", "\\u0E41\\u0107abc", // composition
4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E41\\U0001D000abc",    "<", "\\u0E41\\U0001D001abc", // supplementaries
4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E41\\U0001D15Fabc",    "=", "\\u0E41\\U0001D158\\U0001D165abc", // supplementary composition decomps to supplementary
4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E41\\U0002F802abc",    "=", "\\u0E41\\u4E41abc", // supplementary composition decomps to BMP
4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E41\\u0301abc",        "=", "\\u0E41\\u0301abc", // unsafe (just checking backwards iteration)
4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "\\u0E41\\u0301\\u0316abc", "=", "\\u0E41\\u0316\\u0301abc",
4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "abc\\u0E41c\\u0301abc",       "=", "abc\\u0E41\\u0107abc", // composition
4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "abc\\u0E41\\U0001D000abc",    "<", "abc\\u0E41\\U0001D001abc", // supplementaries
4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "abc\\u0E41\\U0001D15Fabc",    "=", "abc\\u0E41\\U0001D158\\U0001D165abc", // supplementary composition decomps to supplementary
4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "abc\\u0E41\\U0002F802abc",    "=", "abc\\u0E41\\u4E41abc", // supplementary composition decomps to BMP
4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "abc\\u0E41\\u0301abc",        "=", "abc\\u0E41\\u0301abc", // unsafe (just checking backwards iteration)
4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    "abc\\u0E41\\u0301\\u0316abc", "=", "abc\\u0E41\\u0316\\u0301abc",
4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  };
4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  LocalPointer<Collator> coll2(coll->clone());
4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UErrorCode status = U_ZERO_ERROR;
4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  coll2->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(U_FAILURE(status)) {
4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    errln("Unable to set the Thai collator clone to secondary strength");
4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return;
4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  compareArray(*coll2, tests, sizeof(tests)/sizeof(tests[0]));
4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  const char *rule = "& c < ab";
4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  const char *testcontraction[] = { "\\u0E41ab", ">", "\\u0E41c"}; // After UCA 4.1 Thai are normal so won't break a contraction
4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UnicodeString rules;
4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  parseChars(rules, rule);
4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  LocalPointer<RuleBasedCollator> rcoll(new RuleBasedCollator(rules, status));
4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(U_SUCCESS(status)) {
4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    compareArray(*rcoll, testcontraction, 3);
4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  } else {
4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    errln("Couldn't instantiate collator from rules");
4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* #if !UCONFIG_NO_COLLATION */
472