1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 2002-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7/**
8 * UCAConformanceTest performs conformance tests defined in the data
9 * files. ICU ships with stub data files, as the whole test are too
10 * long. To do the whole test, download the test files.
11 */
12
13#include "unicode/utypes.h"
14
15#if !UCONFIG_NO_COLLATION
16
17#include "ucaconf.h"
18#include "unicode/ustring.h"
19#include "cmemory.h"
20#include "cstring.h"
21#include "uparse.h"
22
23UCAConformanceTest::UCAConformanceTest() :
24rbUCA(NULL),
25testFile(NULL),
26status(U_ZERO_ERROR)
27{
28    UCA = (RuleBasedCollator *)Collator::createInstance(Locale::getRoot(), status);
29    if(U_FAILURE(status)) {
30        dataerrln("Error - UCAConformanceTest: Unable to open UCA collator! - %s", u_errorName(status));
31    }
32
33    const char *srcDir = IntlTest::getSourceTestData(status);
34    if (U_FAILURE(status)) {
35        dataerrln("Could not open test data %s", u_errorName(status));
36        return;
37    }
38    uprv_strcpy(testDataPath, srcDir);
39    uprv_strcat(testDataPath, "CollationTest_");
40
41    UVersionInfo uniVersion;
42    static const UVersionInfo v62 = { 6, 2, 0, 0 };
43    u_getUnicodeVersion(uniVersion);
44    isAtLeastUCA62 = uprv_memcmp(uniVersion, v62, 4) >= 0;
45}
46
47UCAConformanceTest::~UCAConformanceTest()
48{
49    delete UCA;
50    delete rbUCA;
51    if (testFile) {
52        fclose(testFile);
53    }
54}
55
56void UCAConformanceTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */)
57{
58    if(exec) {
59        logln("TestSuite UCAConformanceTest: ");
60    }
61    TESTCASE_AUTO_BEGIN;
62    TESTCASE_AUTO(TestTableNonIgnorable);
63    TESTCASE_AUTO(TestTableShifted);
64    TESTCASE_AUTO(TestRulesNonIgnorable);
65    TESTCASE_AUTO(TestRulesShifted);
66    TESTCASE_AUTO_END;
67}
68
69void UCAConformanceTest::initRbUCA()
70{
71    if(!rbUCA) {
72        UnicodeString ucarules;
73        if (UCA) {
74            UCA->getRules(UCOL_FULL_RULES, ucarules);
75            rbUCA = new RuleBasedCollator(ucarules, status);
76            if (U_FAILURE(status)) {
77                dataerrln("Failure creating UCA rule-based collator: %s", u_errorName(status));
78                return;
79            }
80        } else {
81            dataerrln("Failure creating UCA rule-based collator: %s", u_errorName(status));
82            return;
83        }
84    }
85}
86
87void UCAConformanceTest::setCollNonIgnorable(Collator *coll)
88{
89    coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
90    coll->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status);
91    coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, status);
92    coll->setAttribute(UCOL_STRENGTH, isAtLeastUCA62 ? UCOL_IDENTICAL : UCOL_TERTIARY, status);
93    coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, status);
94}
95
96void UCAConformanceTest::setCollShifted(Collator *coll)
97{
98    coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
99    coll->setAttribute(UCOL_CASE_FIRST, UCOL_OFF, status);
100    coll->setAttribute(UCOL_CASE_LEVEL, UCOL_OFF, status);
101    coll->setAttribute(UCOL_STRENGTH, isAtLeastUCA62 ? UCOL_IDENTICAL : UCOL_QUATERNARY, status);
102    coll->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
103}
104
105void UCAConformanceTest::openTestFile(const char *type)
106{
107    const char *ext = ".txt";
108    if(testFile) {
109        fclose(testFile);
110    }
111    char buffer[1024];
112    uprv_strcpy(buffer, testDataPath);
113    uprv_strcat(buffer, type);
114    int32_t bufLen = (int32_t)uprv_strlen(buffer);
115
116    // we try to open 3 files:
117    // path/CollationTest_type.txt
118    // path/CollationTest_type_SHORT.txt
119    // path/CollationTest_type_STUB.txt
120    // we are going to test with the first one that we manage to open.
121
122    uprv_strcpy(buffer+bufLen, ext);
123
124    testFile = fopen(buffer, "rb");
125
126    if(testFile == 0) {
127        uprv_strcpy(buffer+bufLen, "_SHORT");
128        uprv_strcat(buffer, ext);
129        testFile = fopen(buffer, "rb");
130
131        if(testFile == 0) {
132            uprv_strcpy(buffer+bufLen, "_STUB");
133            uprv_strcat(buffer, ext);
134            testFile = fopen(buffer, "rb");
135
136            if (testFile == 0) {
137                *(buffer+bufLen) = 0;
138                dataerrln("Could not open any of the conformance test files, tried opening base %s\n", buffer);
139                return;
140            } else {
141                infoln(
142                    "INFO: Working with the stub file.\n"
143                    "If you need the full conformance test, please\n"
144                    "download the appropriate data files from:\n"
145                    "http://source.icu-project.org/repos/icu/tools/trunk/unicodetools/com/ibm/text/data/");
146            }
147        }
148    }
149}
150
151static const uint32_t IS_SHIFTED = 1;
152static const uint32_t FROM_RULES = 2;
153
154static UBool
155skipLineBecauseOfBug(const UChar *s, int32_t length, uint32_t flags) {
156    // TODO: Fix ICU ticket #8052
157    if(length >= 3 &&
158            (s[0] == 0xfb2 || s[0] == 0xfb3) &&
159            s[1] == 0x334 &&
160            (s[2] == 0xf73 || s[2] == 0xf75 || s[2] == 0xf81)) {
161        return TRUE;
162    }
163    // TODO: Fix ICU ticket #9361
164    if((flags & IS_SHIFTED) != 0 && length >= 2 && s[0] == 0xfffe) {
165        return TRUE;
166    }
167    // TODO: Fix tailoring builder, ICU ticket #9593.
168    UChar c;
169    if((flags & FROM_RULES) != 0 && length >= 2 && ((c = s[1]) == 0xedc || c == 0xedd)) {
170        return TRUE;
171    }
172    return FALSE;
173}
174
175static UCollationResult
176normalizeResult(int32_t result) {
177    return result<0 ? UCOL_LESS : result==0 ? UCOL_EQUAL : UCOL_GREATER;
178}
179
180void UCAConformanceTest::testConformance(const Collator *coll)
181{
182    if(testFile == 0) {
183        return;
184    }
185    uint32_t skipFlags = 0;
186    if(coll->getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED) {
187        skipFlags |= IS_SHIFTED;
188    }
189    if(coll == rbUCA) {
190        skipFlags |= FROM_RULES;
191    }
192
193    int32_t line = 0;
194
195    UChar b1[1024], b2[1024];
196    UChar *buffer = b1, *oldB = NULL;
197
198    char lineB1[1024], lineB2[1024];
199    char *lineB = lineB1, *oldLineB = lineB2;
200
201    uint8_t sk1[1024], sk2[1024];
202    uint8_t *oldSk = NULL, *newSk = sk1;
203
204    int32_t oldLen = 0;
205    int32_t oldBlen = 0;
206    uint32_t first = 0;
207
208    while (fgets(lineB, 1024, testFile) != NULL) {
209        // remove trailing whitespace
210        u_rtrim(lineB);
211
212        line++;
213        if(*lineB == 0 || lineB[0] == '#') {
214            continue;
215        }
216        int32_t buflen = u_parseString(lineB, buffer, 1024, &first, &status);
217        if(U_FAILURE(status)) {
218            errln("Error parsing line %ld (%s): %s\n",
219                  (long)line, u_errorName(status), lineB);
220            status = U_ZERO_ERROR;
221        }
222        buffer[buflen] = 0;
223
224        if(skipLineBecauseOfBug(buffer, buflen, skipFlags)) {
225            logln("Skipping line %i because of a known bug", line);
226            continue;
227        }
228
229        int32_t resLen = coll->getSortKey(buffer, buflen, newSk, 1024);
230
231        if(oldSk != NULL) {
232            int32_t skres = strcmp((char *)oldSk, (char *)newSk);
233            int32_t cmpres = coll->compare(oldB, oldBlen, buffer, buflen, status);
234            int32_t cmpres2 = coll->compare(buffer, buflen, oldB, oldBlen, status);
235
236            if(cmpres != -cmpres2) {
237                errln("Compare result not symmetrical on line %i", line);
238            }
239
240            if(cmpres != normalizeResult(skres)) {
241                errln("Difference between coll->compare (%d) and sortkey compare (%d) on line %i",
242                      cmpres, skres, line);
243                errln("  Previous data line %s", oldLineB);
244                errln("  Current data line  %s", lineB);
245            }
246
247            int32_t res = cmpres;
248            if(res == 0 && !isAtLeastUCA62) {
249                // Up to UCA 6.1, the collation test files use a custom tie-breaker,
250                // comparing the raw input strings.
251                res = u_strcmpCodePointOrder(oldB, buffer);
252                // Starting with UCA 6.2, the collation test files use the standard UCA tie-breaker,
253                // comparing the NFD versions of the input strings,
254                // which we do via setting strength=identical.
255            }
256            if(res > 0) {
257                errln("Line %i is not greater or equal than previous line", line);
258                errln("  Previous data line %s", oldLineB);
259                errln("  Current data line  %s", lineB);
260                UnicodeString oldS, newS;
261                prettify(CollationKey(oldSk, oldLen), oldS);
262                prettify(CollationKey(newSk, resLen), newS);
263                errln("  Previous key: "+oldS);
264                errln("  Current key:  "+newS);
265            }
266        }
267
268        // swap buffers
269        oldLineB = lineB;
270        oldB = buffer;
271        oldSk = newSk;
272        if(lineB == lineB1) {
273            lineB = lineB2;
274            buffer = b2;
275            newSk = sk2;
276        } else {
277            lineB = lineB1;
278            buffer = b1;
279            newSk = sk1;
280        }
281        oldLen = resLen;
282        oldBlen = buflen;
283    }
284}
285
286void UCAConformanceTest::TestTableNonIgnorable(/* par */) {
287    if (U_FAILURE(status)) {
288        dataerrln("Error running UCA Conformance Test: %s", u_errorName(status));
289        return;
290    }
291    setCollNonIgnorable(UCA);
292    openTestFile("NON_IGNORABLE");
293    testConformance(UCA);
294}
295
296void UCAConformanceTest::TestTableShifted(/* par */) {
297    if (U_FAILURE(status)) {
298        dataerrln("Error running UCA Conformance Test: %s", u_errorName(status));
299        return;
300    }
301    setCollShifted(UCA);
302    openTestFile("SHIFTED");
303    testConformance(UCA);
304}
305
306void UCAConformanceTest::TestRulesNonIgnorable(/* par */) {
307    initRbUCA();
308
309    if(U_SUCCESS(status)) {
310        setCollNonIgnorable(rbUCA);
311        openTestFile("NON_IGNORABLE");
312        testConformance(rbUCA);
313    }
314}
315
316void UCAConformanceTest::TestRulesShifted(/* par */) {
317    logln("This test is currently disabled, as it is impossible to "
318        "wholly represent fractional UCA using tailoring rules.");
319    return;
320
321    initRbUCA();
322
323    if(U_SUCCESS(status)) {
324        setCollShifted(rbUCA);
325        openTestFile("SHIFTED");
326        testConformance(rbUCA);
327    }
328}
329
330#endif /* #if !UCONFIG_NO_COLLATION */
331