1/********************************************************************
2 * Copyright (c) 2002-2014, International Business Machines Corporation and
3 * others. All Rights Reserved.
4 ********************************************************************/
5
6/**
7 * UCAConformanceTest performs conformance tests defined in the data
8 * files. ICU ships with stub data files, as the whole test are too
9 * long. To do the whole test, download the test files.
10 */
11
12package com.ibm.icu.dev.test.collator;
13
14import java.io.BufferedReader;
15import java.io.IOException;
16
17import com.ibm.icu.dev.test.TestFmwk;
18import com.ibm.icu.dev.test.TestUtil;
19import com.ibm.icu.lang.UCharacter;
20import com.ibm.icu.text.Collator;
21import com.ibm.icu.text.RawCollationKey;
22import com.ibm.icu.text.RuleBasedCollator;
23import com.ibm.icu.text.UTF16;
24import com.ibm.icu.util.ULocale;
25import com.ibm.icu.util.VersionInfo;
26
27public class UCAConformanceTest extends TestFmwk {
28
29    /**
30     * @param args
31     */
32    public static void main(String[] args) {
33        new UCAConformanceTest().run(args);
34    }
35
36    public UCAConformanceTest() {
37    }
38    @Override
39    protected void init() throws Exception{
40        UCA = (RuleBasedCollator)Collator.getInstance(ULocale.ROOT);
41
42        comparer = new UTF16.StringComparator(true, false, UTF16.StringComparator.FOLD_CASE_DEFAULT);
43    }
44    private RuleBasedCollator UCA;
45    private RuleBasedCollator rbUCA;
46    private UTF16.StringComparator comparer;
47    private boolean isAtLeastUCA62 =
48        UCharacter.getUnicodeVersion().compareTo(VersionInfo.UNICODE_6_2) >= 0;
49
50    public void TestTableNonIgnorable() {
51        setCollNonIgnorable(UCA);
52        openTestFile("NON_IGNORABLE");
53        conformanceTest(UCA);
54    }
55
56    public void TestTableShifted() {
57        setCollShifted(UCA);
58        openTestFile("SHIFTED");
59        conformanceTest(UCA);
60    }
61
62    public void TestRulesNonIgnorable() {
63        if(logKnownIssue("cldrbug:6745", "UCARules.txt has problems")) { return; }
64        initRbUCA();
65        if(rbUCA == null) { return; }
66
67        setCollNonIgnorable(rbUCA);
68        openTestFile("NON_IGNORABLE");
69        conformanceTest(rbUCA);
70    }
71
72    public void TestRulesShifted() {
73        logln("This test is currently disabled, as it is impossible to "+
74        "wholly represent fractional UCA using tailoring rules.");
75        return;
76        /*
77        initRbUCA();
78        if(rbUCA == null) { return; }
79
80        setCollShifted(rbUCA);
81        openTestFile("SHIFTED");
82        testConformance(rbUCA);
83        */
84    }
85    BufferedReader in;
86    private void openTestFile(String type)
87    {
88        String collationTest = "CollationTest_";
89        String ext = ".txt";
90        try {
91            in = TestUtil.getDataReader(collationTest+type+"_SHORT"+ext);
92        } catch (Exception e) {
93            try {
94                in = TestUtil.getDataReader(collationTest+type+ext);
95            } catch (Exception e1) {
96                try {
97                    in = TestUtil.getDataReader(collationTest+type+"_STUB"+ext);
98                    logln( "INFO: Working with the stub file.\n"+
99                            "If you need the full conformance test, please\n"+
100                            "download the appropriate data files from:\n"+
101                            "http://unicode.org/cldr/trac/browser/trunk/common/uca");
102                } catch (Exception e11) {
103                    errln("ERROR: Could not find any of the test files");
104                }
105            }
106        }
107    }
108
109    private void setCollNonIgnorable(RuleBasedCollator coll)
110    {
111        if(coll != null) {
112            coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
113            coll.setLowerCaseFirst(false);
114            coll.setCaseLevel(false);
115            coll.setStrength(isAtLeastUCA62 ? Collator.IDENTICAL : Collator.TERTIARY);
116            coll.setAlternateHandlingShifted(false);
117        }
118    }
119
120    private void setCollShifted(RuleBasedCollator coll)
121    {
122        if(coll != null) {
123            coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
124            coll.setLowerCaseFirst(false);
125            coll.setCaseLevel(false);
126            coll.setStrength(isAtLeastUCA62 ? Collator.IDENTICAL : Collator.QUATERNARY);
127            coll.setAlternateHandlingShifted(true);
128        }
129    }
130
131
132
133    private void initRbUCA()
134    {
135        if(rbUCA == null) {
136            String ucarules = UCA.getRules(true);
137            try {
138                rbUCA = new RuleBasedCollator(ucarules);
139            } catch(Exception e) {
140                errln("Failure creating UCA rule-based collator: " + e);
141            }
142        }
143    }
144
145    private String parseString(String line) {
146        int i = 0, value;
147        StringBuilder result = new StringBuilder(), buffer = new StringBuilder();
148
149        for(;;) {
150            while(i < line.length() && Character.isWhitespace(line.charAt(i))) {
151                i++;
152            }
153            while(i < line.length() && Character.isLetterOrDigit(line.charAt(i))) {
154                buffer.append(line.charAt(i));
155                i++;
156            }
157            if(buffer.length() == 0) {
158                // We hit something that was not whitespace/letter/digit.
159                // Should be ';' or end of string.
160                return result.toString();
161            }
162            /* read one code point */
163            value = Integer.parseInt(buffer.toString(), 16);
164            buffer.setLength(0);
165            result.appendCodePoint(value);
166        }
167
168    }
169
170    private static final int IS_SHIFTED = 1;
171    private static final int FROM_RULES = 2;
172
173    private static boolean skipLineBecauseOfBug(String s, int flags) {
174        // Add temporary exceptions here if there are ICU bugs, until we can fix them.
175        // For examples see the ICU 52 version of this file.
176        return false;
177    }
178
179    private static int normalizeResult(int result) {
180        return result < 0 ? -1 : result == 0 ? 0 : 1;
181    }
182
183    private void conformanceTest(RuleBasedCollator coll) {
184        if(in == null || coll == null) {
185            return;
186        }
187        int skipFlags = 0;
188        if(coll.isAlternateHandlingShifted()) {
189            skipFlags |= IS_SHIFTED;
190        }
191        if(coll == rbUCA) {
192            skipFlags |= FROM_RULES;
193        }
194
195        logln("-prop:ucaconfnosortkeys=1 turns off getSortKey() in UCAConformanceTest");
196        boolean withSortKeys = getProperty("ucaconfnosortkeys") == null;
197
198        int lineNo = 0;
199
200        String line = null, oldLine = null, buffer = null, oldB = null;
201        RawCollationKey sk1 = new RawCollationKey(), sk2 = new RawCollationKey();
202        RawCollationKey oldSk = null, newSk = sk1;
203
204        try {
205            while ((line = in.readLine()) != null) {
206                lineNo++;
207                if(line.length() == 0 || line.charAt(0) == '#') {
208                    continue;
209                }
210                buffer = parseString(line);
211
212                if(skipLineBecauseOfBug(buffer, skipFlags)) {
213                    logln("Skipping line " + lineNo + " because of a known bug");
214                    continue;
215                }
216
217                if(withSortKeys) {
218                    coll.getRawCollationKey(buffer, newSk);
219                }
220                if(oldSk != null) {
221                    boolean ok = true;
222                    int skres = withSortKeys ? oldSk.compareTo(newSk) : 0;
223                    int cmpres = coll.compare(oldB, buffer);
224                    int cmpres2 = coll.compare(buffer, oldB);
225
226                    if(cmpres != -cmpres2) {
227                        errln(String.format(
228                                "Compare result not symmetrical on line %i: " +
229                                "previous vs. current (%d) / current vs. previous (%d)",
230                                lineNo, cmpres, cmpres2));
231                        ok = false;
232                    }
233
234                    // TODO: Compare with normalization turned off if the input passes the FCD test.
235
236                    if(withSortKeys && cmpres != normalizeResult(skres)) {
237                        errln("Difference between coll.compare (" + cmpres + ") and sortkey compare (" + skres + ") on line " + lineNo);
238                        ok = false;
239                    }
240
241                    int res = cmpres;
242                    if(res == 0 && !isAtLeastUCA62) {
243                        // Up to UCA 6.1, the collation test files use a custom tie-breaker,
244                        // comparing the raw input strings.
245                        res = comparer.compare(oldB, buffer);
246                        // Starting with UCA 6.2, the collation test files use the standard UCA tie-breaker,
247                        // comparing the NFD versions of the input strings,
248                        // which we do via setting strength=identical.
249                    }
250                    if(res > 0) {
251                        errln("Line " + lineNo + " is not greater or equal than previous line");
252                        ok = false;
253                    }
254
255                    if(!ok) {
256                        errln("  Previous data line " + oldLine);
257                        errln("  Current data line  " + line);
258                        if(withSortKeys) {
259                            errln("  Previous key: " + CollationTest.prettify(oldSk));
260                            errln("  Current key:  " + CollationTest.prettify(newSk));
261                        }
262                    }
263                }
264
265                oldSk = newSk;
266                oldB = buffer;
267                oldLine = line;
268                if(oldSk == sk1) {
269                    newSk = sk2;
270                } else {
271                    newSk = sk1;
272                }
273            }
274        } catch (Exception e) {
275            errln("Unexpected exception "+e);
276        } finally {
277            try {
278                in.close();
279            } catch (IOException ignored) {
280            }
281            in = null;
282        }
283    }
284}
285