17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 3aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert * Copyright (C) 2002-2015, International Business Machines Corporation and 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Port From: ICU4C v2.1 : collate/CollationRegressionTest 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Source File: $ICU4CRoot/source/test/intltest/regcoll.cpp 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert **/ 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.dev.test.collator; 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.BufferedReader; 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.IOException; 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Arrays; 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Comparator; 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Locale; 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.dev.test.TestFmwk; 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.dev.test.TestUtil; 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.CollationElementIterator; 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.CollationKey; 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Collator; 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.RuleBasedCollator; 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class CollationThaiTest extends TestFmwk { 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert final int MAX_FAILURES_TO_SHOW = -1; 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static void main(String[] args) throws Exception { 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert new CollationThaiTest().run(args); 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Odd corner conditions taken from "How to Sort Thai Without Rewriting Sort", 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * by Doug Cooper, http://seasrc.th.net/paper/thaisort.zip 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestCornerCases() { 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String TESTS[] = { 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Shorter words precede longer 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0e01", "<", "\u0e01\u0e01", 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Tone marks are considered after letters (i.e. are primary ignorable) 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0e01\u0e32", "<", "\u0e01\u0e49\u0e32", 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // ditto for other over-marks 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0e01\u0e32", "<", "\u0e01\u0e32\u0e4c", 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // commonly used mark-in-context order. 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // In effect, marks are sorted after each syllable. 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0e01\u0e32\u0e01\u0e49\u0e32", "<", "\u0e01\u0e48\u0e32\u0e01\u0e49\u0e32", 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Hyphens and other punctuation follow whitespace but come before letters 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0e01\u0e32", "=", "\u0e01\u0e32-", 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0e01\u0e32-", "<", "\u0e01\u0e32\u0e01\u0e32", 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Doubler follows an indentical word without the doubler 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0e01\u0e32", "=", "\u0e01\u0e32\u0e46", 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0e01\u0e32\u0e46", "<", "\u0e01\u0e32\u0e01\u0e32", 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \u0e45 after either \u0e24 or \u0e26 is treated as a single 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // combining character, similar to "c < ch" in traditional spanish. 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO: beef up this case 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0e24\u0e29\u0e35", "<", "\u0e24\u0e45\u0e29\u0e35", 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0e26\u0e29\u0e35", "<", "\u0e26\u0e45\u0e29\u0e35", 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Vowels reorder, should compare \u0e2d and \u0e34 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0e40\u0e01\u0e2d", "<", "\u0e40\u0e01\u0e34", 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Tones are compared after the rest of the word (e.g. primary ignorable) 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0e01\u0e32\u0e01\u0e48\u0e32", "<", "\u0e01\u0e49\u0e32\u0e01\u0e32", 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Periods are ignored entirely 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0e01.\u0e01.", "<", "\u0e01\u0e32", 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }; 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert RuleBasedCollator coll = null; 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert coll = getThaiCollator(); 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (Exception e) { 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert warnln("could not construct Thai collator"); 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert compareArray(coll, TESTS); 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert void compareArray(RuleBasedCollator c, String[] tests) { 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < tests.length; i += 3) { 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int expect = 0; 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (tests[i+1].equals("<")) { 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert expect = -1; 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (tests[i+1].equals(">")) { 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert expect = 1; 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (tests[i+1].equals("=")) { 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert expect = 0; 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // expect = Integer.decode(tests[i+1]).intValue(); 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("Error: unknown operator " + tests[i+1]); 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String s1 = tests[i]; 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String s2 = tests[i+2]; 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CollationTest.doTest(this, c, s1, s2, expect); 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int sign(int i ) { 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (i < 0) return -1; 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (i > 0) return 1; 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0; 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Read the external dictionary file, which is already in proper 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * sorted order, and confirm that the collator compares each line as 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * preceding the following line. 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestDictionary() { 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert RuleBasedCollator coll = null; 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert coll = getThaiCollator(); 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (Exception e) { 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert warnln("could not construct Thai collator"); 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 128aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Read in a dictionary of Thai words 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int line = 0; 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int failed = 0; 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int wordCount = 0; 133aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert BufferedReader in = null; 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 135aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert String fileName = "riwords.txt"; 136aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert in = TestUtil.getDataReader(fileName, "UTF-8"); 137aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert 138aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert // 139aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert // Loop through each word in the dictionary and compare it to the previous 140aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert // word. They should be in sorted order. 141aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert // 142aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert String lastWord = ""; 143aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert String word = in.readLine(); 144aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert while (word != null) { 145aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert line++; 146aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert 147aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert // Skip comments and blank lines 148aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert if (word.length() == 0 || word.charAt(0) == 0x23) { 149aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert word = in.readLine(); 150aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert continue; 151aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert } 152aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert 153aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert // Show the first 8 words being compared, so we can see what's happening 154aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert ++wordCount; 155aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert if (wordCount <= 8) { 156aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert logln("Word " + wordCount + ": " + word); 157aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert } 158aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert 159aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert if (lastWord.length() > 0) { 160aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert // CollationTest.doTest isn't really set up to handle situations where 161aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert // the result can be equal or greater than the previous, so have to skip for now. 162aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert // Not a big deal, since we're still testing to make sure everything sorts out 163aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert // right, just not looking at the colation keys in detail... 164aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert // CollationTest.doTest(this, coll, lastWord, word, -1); 165aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert int result = coll.compare(lastWord, word); 166aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert 167aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert if (result > 0) { 168aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert failed++; 169aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert if (MAX_FAILURES_TO_SHOW < 0 || failed <= MAX_FAILURES_TO_SHOW) { 170aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert String msg = "--------------------------------------------\n" + line + " compare(" 171aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert + lastWord + ", " + word + ") returned " + result + ", expected -1\n"; 172aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert CollationKey k1, k2; 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert k1 = coll.getCollationKey(lastWord); 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert k2 = coll.getCollationKey(word); 175aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert msg += "key1: " + CollationTest.prettify(k1) + "\n" + "key2: " + CollationTest.prettify(k2); 176aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert errln(msg); 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 180aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert lastWord = word; 181aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert word = in.readLine(); 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (IOException e) { 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("IOException " + e.getMessage()); 185aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert } finally { 186aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert if (in == null) { 187aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert errln("Error: could not open test file. Aborting test."); 188aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert return; 189aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert } else { 190aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert try { 191aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert in.close(); 192aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert } catch (IOException ignored) { 193aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert } 194aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert } 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 196aacdd6f022693689b3bf76f70670711f3254a441Fredrik Roubert 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (failed != 0) { 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (failed > MAX_FAILURES_TO_SHOW) { 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("Too many failures; only the first " + 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert MAX_FAILURES_TO_SHOW + " failures were shown"); 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("Summary: " + failed + " of " + (line - 1) + 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert " comparisons failed"); 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln("Words checked: " + wordCount); 2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestInvalidThai() 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String tests[] = { "\u0E44\u0E01\u0E44\u0E01", 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0E44\u0E01\u0E01\u0E44", 2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0E01\u0E44\u0E01\u0E44", 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0E01\u0E01\u0E44\u0E44", 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0E44\u0E44\u0E01\u0E01", 2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0E01\u0E44\u0E44\u0E01", 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }; 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert RuleBasedCollator collator; 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StrCmp comparator; 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert collator = getThaiCollator(); 2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert comparator = new StrCmp(); 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (Exception e) { 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert warnln("could not construct Thai collator"); 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Arrays.sort(tests, comparator); 2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < tests.length; i ++) 2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int j = i + 1; j < tests.length; j ++) { 2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (collator.compare(tests[i], tests[j]) > 0) { 2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // inconsistency ordering found! 2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("Inconsistent ordering between strings " + i 2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert + " and " + j); 2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CollationElementIterator iterator 2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert = collator.getCollationElementIterator(tests[i]); 2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CollationTest.backAndForth(this, iterator); 2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestReordering() 2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String tests[] = { 2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0E41c\u0301", "=", "\u0E41\u0107", // composition 2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0E41\uD835\uDFCE", "<", "\u0E41\uD835\uDFCF", // supplementaries 2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0E41\uD834\uDD5F", "=", "\u0E41\uD834\uDD58\uD834\uDD65", // supplementary composition decomps to supplementary 2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0E41\uD87E\uDC02", "=", "\u0E41\u4E41", // supplementary composition decomps to BMP 2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0E41\u0301", "=", "\u0E41\u0301", // unsafe (just checking backwards iteration) 2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0E41\u0301\u0316", "=", "\u0E41\u0316\u0301", 2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "abc\u0E41c\u0301", "=", "abc\u0E41\u0107", // composition 2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "abc\u0E41\uD834\uDC00", "<", "abc\u0E41\uD834\uDC01", // supplementaries 2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "abc\u0E41\uD834\uDD5F", "=", "abc\u0E41\uD834\uDD58\uD834\uDD65", // supplementary composition decomps to supplementary 2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "abc\u0E41\uD87E\uDC02", "=", "abc\u0E41\u4E41", // supplementary composition decomps to BMP 2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "abc\u0E41\u0301", "=", "abc\u0E41\u0301", // unsafe (just checking backwards iteration) 2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "abc\u0E41\u0301\u0316", "=", "abc\u0E41\u0316\u0301", 2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0E41c\u0301abc", "=", "\u0E41\u0107abc", // composition 2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0E41\uD834\uDC00abc", "<", "\u0E41\uD834\uDC01abc", // supplementaries 2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0E41\uD834\uDD5Fabc", "=", "\u0E41\uD834\uDD58\uD834\uDD65abc", // supplementary composition decomps to supplementary 2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0E41\uD87E\uDC02abc", "=", "\u0E41\u4E41abc", // supplementary composition decomps to BMP 2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0E41\u0301abc", "=", "\u0E41\u0301abc", // unsafe (just checking backwards iteration) 2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0E41\u0301\u0316abc", "=", "\u0E41\u0316\u0301abc", 2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "abc\u0E41c\u0301abc", "=", "abc\u0E41\u0107abc", // composition 2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "abc\u0E41\uD834\uDC00abc", "<", "abc\u0E41\uD834\uDC01abc", // supplementaries 2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "abc\u0E41\uD834\uDD5Fabc", "=", "abc\u0E41\uD834\uDD58\uD834\uDD65abc", // supplementary composition decomps to supplementary 2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "abc\u0E41\uD87E\uDC02abc", "=", "abc\u0E41\u4E41abc", // supplementary composition decomps to BMP 2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "abc\u0E41\u0301abc", "=", "abc\u0E41\u0301abc", // unsafe (just checking backwards iteration) 2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "abc\u0E41\u0301\u0316abc", "=", "abc\u0E41\u0316\u0301abc", 2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }; 2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert RuleBasedCollator collator; 2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert collator = (RuleBasedCollator)getThaiCollator(); 2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (Exception e) { 2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert warnln("could not construct Thai collator"); 2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert compareArray(collator, tests); 2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String rule = "& c < ab"; 2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String testcontraction[] = { "\u0E41ab", ">", "\u0E41c"}; 2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert collator = new RuleBasedCollator(rule); 2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (Exception e) { 2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("Error: could not construct collator with rule " + rule); 2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert compareArray(collator, testcontraction); 2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // private inner class ------------------------------------------------- 2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final class StrCmp implements Comparator<String> 3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int compare(String string1, String string2) 3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return collator.compare(string1, string2); 3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StrCmp() throws Exception 3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert collator = getThaiCollator(); 3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Collator collator; 3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // private data members ------------------------------------------------ 3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static RuleBasedCollator m_collator_; 3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // private methods ----------------------------------------------------- 3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static RuleBasedCollator getThaiCollator() throws Exception 3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { 3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (m_collator_ == null) { 3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert m_collator_ = (RuleBasedCollator)Collator.getInstance( 3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert new Locale("th", "TH", "")); 3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return m_collator_; 3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 330