17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
3aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert * Copyright (C) 2002-2015, International Business Machines Corporation and
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Port From:   ICU4C v2.1 : collate/CollationRegressionTest
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Source File: $ICU4CRoot/source/test/intltest/regcoll.cpp
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert **/
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.dev.test.collator;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.BufferedReader;
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.IOException;
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Arrays;
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Comparator;
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Locale;
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.dev.test.TestFmwk;
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.dev.test.TestUtil;
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.CollationElementIterator;
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.CollationKey;
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Collator;
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.RuleBasedCollator;
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class CollationThaiTest extends TestFmwk {
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    final int MAX_FAILURES_TO_SHOW = -1;
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static void main(String[] args) throws Exception {
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        new CollationThaiTest().run(args);
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Odd corner conditions taken from "How to Sort Thai Without Rewriting Sort",
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * by Doug Cooper, http://seasrc.th.net/paper/thaisort.zip
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestCornerCases() {
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String TESTS[] = {
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Shorter words precede longer
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0e01",                               "<",    "\u0e01\u0e01",
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Tone marks are considered after letters (i.e. are primary ignorable)
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0e01\u0e32",                        "<",    "\u0e01\u0e49\u0e32",
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // ditto for other over-marks
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0e01\u0e32",                        "<",    "\u0e01\u0e32\u0e4c",
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // commonly used mark-in-context order.
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // In effect, marks are sorted after each syllable.
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0e01\u0e32\u0e01\u0e49\u0e32",   "<",    "\u0e01\u0e48\u0e32\u0e01\u0e49\u0e32",
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Hyphens and other punctuation follow whitespace but come before letters
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0e01\u0e32",                        "=",    "\u0e01\u0e32-",
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0e01\u0e32-",                       "<",    "\u0e01\u0e32\u0e01\u0e32",
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Doubler follows an indentical word without the doubler
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0e01\u0e32",                        "=",    "\u0e01\u0e32\u0e46",
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0e01\u0e32\u0e46",                 "<",    "\u0e01\u0e32\u0e01\u0e32",
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // \u0e45 after either \u0e24 or \u0e26 is treated as a single
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // combining character, similar to "c < ch" in traditional spanish.
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // TODO: beef up this case
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0e24\u0e29\u0e35",                 "<",    "\u0e24\u0e45\u0e29\u0e35",
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0e26\u0e29\u0e35",                 "<",    "\u0e26\u0e45\u0e29\u0e35",
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Vowels reorder, should compare \u0e2d and \u0e34
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0e40\u0e01\u0e2d",                 "<",    "\u0e40\u0e01\u0e34",
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Tones are compared after the rest of the word (e.g. primary ignorable)
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0e01\u0e32\u0e01\u0e48\u0e32",   "<",    "\u0e01\u0e49\u0e32\u0e01\u0e32",
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Periods are ignored entirely
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0e01.\u0e01.",                      "<",    "\u0e01\u0e32",
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        RuleBasedCollator coll = null;
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            coll = getThaiCollator();
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch (Exception e) {
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            warnln("could not construct Thai collator");
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        compareArray(coll, TESTS);
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    void compareArray(RuleBasedCollator c, String[] tests) {
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i = 0; i < tests.length; i += 3) {
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int expect = 0;
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (tests[i+1].equals("<")) {
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                expect = -1;
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (tests[i+1].equals(">")) {
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                expect = 1;
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (tests[i+1].equals("=")) {
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                expect = 0;
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // expect = Integer.decode(tests[i+1]).intValue();
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                errln("Error: unknown operator " + tests[i+1]);
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String s1 = tests[i];
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String s2 = tests[i+2];
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CollationTest.doTest(this, c, s1, s2, expect);
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int sign(int i ) {
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (i < 0) return -1;
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (i > 0) return 1;
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return 0;
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Read the external dictionary file, which is already in proper
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * sorted order, and confirm that the collator compares each line as
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * preceding the following line.
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestDictionary() {
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        RuleBasedCollator coll = null;
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            coll = getThaiCollator();
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch (Exception e) {
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            warnln("could not construct Thai collator");
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
128aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Read in a dictionary of Thai words
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int line = 0;
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int failed = 0;
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int wordCount = 0;
133aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert        BufferedReader in = null;
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
135aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert            String fileName = "riwords.txt";
136aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert            in = TestUtil.getDataReader(fileName, "UTF-8");
137aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert
138aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert            //
139aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert            // Loop through each word in the dictionary and compare it to the previous
140aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert            // word. They should be in sorted order.
141aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert            //
142aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert            String lastWord = "";
143aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert            String word = in.readLine();
144aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert            while (word != null) {
145aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                line++;
146aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert
147aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                // Skip comments and blank lines
148aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                if (word.length() == 0 || word.charAt(0) == 0x23) {
149aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                    word = in.readLine();
150aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                    continue;
151aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                }
152aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert
153aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                // Show the first 8 words being compared, so we can see what's happening
154aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                ++wordCount;
155aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                if (wordCount <= 8) {
156aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                    logln("Word " + wordCount + ": " + word);
157aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                }
158aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert
159aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                if (lastWord.length() > 0) {
160aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                    // CollationTest.doTest isn't really set up to handle situations where
161aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                    // the result can be equal or greater than the previous, so have to skip for now.
162aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                    // Not a big deal, since we're still testing to make sure everything sorts out
163aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                    // right, just not looking at the colation keys in detail...
164aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                    // CollationTest.doTest(this, coll, lastWord, word, -1);
165aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                    int result = coll.compare(lastWord, word);
166aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert
167aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                    if (result > 0) {
168aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                        failed++;
169aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                        if (MAX_FAILURES_TO_SHOW < 0 || failed <= MAX_FAILURES_TO_SHOW) {
170aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                            String msg = "--------------------------------------------\n" + line + " compare("
171aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                                    + lastWord + ", " + word + ") returned " + result + ", expected -1\n";
172aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                            CollationKey k1, k2;
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            k1 = coll.getCollationKey(lastWord);
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            k2 = coll.getCollationKey(word);
175aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                            msg += "key1: " + CollationTest.prettify(k1) + "\n" + "key2: " + CollationTest.prettify(k2);
176aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                            errln(msg);
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
180aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                lastWord = word;
181aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                word = in.readLine();
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch (IOException e) {
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            errln("IOException " + e.getMessage());
185aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert        } finally {
186aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert            if (in == null) {
187aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                errln("Error: could not open test file. Aborting test.");
188aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                return;
189aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert            } else {
190aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                try {
191aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                    in.close();
192aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                } catch (IOException ignored) {
193aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert                }
194aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert            }
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
196aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (failed != 0) {
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (failed > MAX_FAILURES_TO_SHOW) {
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                errln("Too many failures; only the first " +
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                      MAX_FAILURES_TO_SHOW + " failures were shown");
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            errln("Summary: " + failed + " of " + (line - 1) +
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                  " comparisons failed");
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        logln("Words checked: " + wordCount);
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestInvalidThai()
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String tests[] = { "\u0E44\u0E01\u0E44\u0E01",
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                           "\u0E44\u0E01\u0E01\u0E44",
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                           "\u0E01\u0E44\u0E01\u0E44",
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                           "\u0E01\u0E01\u0E44\u0E44",
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                           "\u0E44\u0E44\u0E01\u0E01",
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                           "\u0E01\u0E44\u0E44\u0E01",
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         };
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        RuleBasedCollator collator;
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StrCmp comparator;
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            collator = getThaiCollator();
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            comparator = new StrCmp();
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch (Exception e) {
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            warnln("could not construct Thai collator");
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Arrays.sort(tests, comparator);
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i = 0; i < tests.length; i ++)
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int j = i + 1; j < tests.length; j ++) {
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (collator.compare(tests[i], tests[j]) > 0) {
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // inconsistency ordering found!
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    errln("Inconsistent ordering between strings " + i
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                          + " and " + j);
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CollationElementIterator iterator
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                = collator.getCollationElementIterator(tests[i]);
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CollationTest.backAndForth(this, iterator);
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void TestReordering()
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String tests[] = {
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0E41c\u0301",      "=", "\u0E41\u0107", // composition
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0E41\uD835\uDFCE", "<", "\u0E41\uD835\uDFCF", // supplementaries
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0E41\uD834\uDD5F", "=", "\u0E41\uD834\uDD58\uD834\uDD65", // supplementary composition decomps to supplementary
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0E41\uD87E\uDC02", "=", "\u0E41\u4E41", // supplementary composition decomps to BMP
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0E41\u0301",       "=", "\u0E41\u0301", // unsafe (just checking backwards iteration)
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0E41\u0301\u0316", "=", "\u0E41\u0316\u0301",
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "abc\u0E41c\u0301",      "=", "abc\u0E41\u0107", // composition
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "abc\u0E41\uD834\uDC00", "<", "abc\u0E41\uD834\uDC01", // supplementaries
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "abc\u0E41\uD834\uDD5F", "=", "abc\u0E41\uD834\uDD58\uD834\uDD65", // supplementary composition decomps to supplementary
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "abc\u0E41\uD87E\uDC02", "=", "abc\u0E41\u4E41", // supplementary composition decomps to BMP
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "abc\u0E41\u0301",       "=", "abc\u0E41\u0301", // unsafe (just checking backwards iteration)
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "abc\u0E41\u0301\u0316", "=", "abc\u0E41\u0316\u0301",
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0E41c\u0301abc",      "=", "\u0E41\u0107abc", // composition
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0E41\uD834\uDC00abc", "<", "\u0E41\uD834\uDC01abc", // supplementaries
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0E41\uD834\uDD5Fabc", "=", "\u0E41\uD834\uDD58\uD834\uDD65abc", // supplementary composition decomps to supplementary
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0E41\uD87E\uDC02abc", "=", "\u0E41\u4E41abc", // supplementary composition decomps to BMP
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0E41\u0301abc",       "=", "\u0E41\u0301abc", // unsafe (just checking backwards iteration)
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u0E41\u0301\u0316abc", "=", "\u0E41\u0316\u0301abc",
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "abc\u0E41c\u0301abc",      "=", "abc\u0E41\u0107abc", // composition
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "abc\u0E41\uD834\uDC00abc", "<", "abc\u0E41\uD834\uDC01abc", // supplementaries
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "abc\u0E41\uD834\uDD5Fabc", "=", "abc\u0E41\uD834\uDD58\uD834\uDD65abc", // supplementary composition decomps to supplementary
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "abc\u0E41\uD87E\uDC02abc", "=", "abc\u0E41\u4E41abc", // supplementary composition decomps to BMP
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "abc\u0E41\u0301abc",       "=", "abc\u0E41\u0301abc", // unsafe (just checking backwards iteration)
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "abc\u0E41\u0301\u0316abc", "=", "abc\u0E41\u0316\u0301abc",
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        RuleBasedCollator collator;
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            collator = (RuleBasedCollator)getThaiCollator();
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch (Exception e) {
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            warnln("could not construct Thai collator");
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        compareArray(collator, tests);
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String rule = "& c < ab";
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String testcontraction[] = { "\u0E41ab", ">", "\u0E41c"};
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            collator = new RuleBasedCollator(rule);
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch (Exception e) {
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            errln("Error: could not construct collator with rule " + rule);
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        compareArray(collator, testcontraction);
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private inner class -------------------------------------------------
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class StrCmp implements Comparator<String>
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public int compare(String string1, String string2)
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return collator.compare(string1, string2);
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StrCmp() throws Exception
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            collator = getThaiCollator();
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Collator collator;
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private data members ------------------------------------------------
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static RuleBasedCollator m_collator_;
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private methods -----------------------------------------------------
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static RuleBasedCollator getThaiCollator() throws Exception
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    {
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (m_collator_ == null) {
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            m_collator_ = (RuleBasedCollator)Collator.getInstance(
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                new Locale("th", "TH", ""));
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return m_collator_;
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
330