1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2014, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8
9#include "unicode/utypes.h"
10
11#if !UCONFIG_NO_COLLATION
12
13#include "unicode/coll.h"
14#include "unicode/tblcoll.h"
15#include "unicode/unistr.h"
16#include "unicode/sortkey.h"
17#include "g7coll.h"
18#include "sfwdchit.h"
19#include "cmemory.h"
20
21static const UChar testCases[][G7CollationTest::MAX_TOKEN_LEN] = {
22    {  0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
23        0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073 /*'s'*/, 0x0000},                    /* 9 */
24    { 0x0050 /*'P'*/, 0x0061 /*'a'*/, 0x0074/*'t'*/, 0x0000},                                                    /* 1 */
25    { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x00E9, 0x0000},                                    /* 2 */
26    { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0000},                           /* 3 */
27    { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000},            /* 4 */
28    { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000},            /* 5 */
29    { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0064 /*'d'*/, 0x0000},                                                    /* 6 */
30    { 0x0054 /*'T'*/, 0x00F6, 0x006e /*'n'*/, 0x0065 /*'e'*/, 0x0000},                                            /* 7 */
31    { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0066 /*'f'*/, 0x0075 /*'u'*/, 0x0000},                                   /* 8 */
32    { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
33      0x0062  /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000},                                    /* 12 */
34    { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x006e /*'n'*/, 0x0000},                                                    /* 10 */
35    { 0x0050  /*'P'*/, 0x0041 /*'A'*/, 0x0054 /*'T'*/, 0x0000},                                                    /* 11 */
36    { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
37        0x002d /*'-'*/,  0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000},                /* 13 */
38    { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
39        0x002d /*'-'*/,  0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073/*'s'*/, 0x0000},  /* 0 */
40    {0x0070 /*'p'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x0000},                                                    /* 14 */
41    /* Additional tests */
42    { 0x0063 /*'c'*/, 0x007a /*'z'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0000 },                                 /* 15 */
43    { 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0075 /*'u'*/, 0x0072 /*'r'*/, 0x006f /*'o'*/, 0x0000 },                  /* 16 */
44    { 0x0063 /*'c'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x000 },                                                    /* 17 */
45    { 0x0064 /*'d'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x006e /*'n'*/, 0x0000 },                                 /* 18 */
46    { 0x003f /*'?'*/, 0x0000 },                                                                                /* 19 */
47    { 0x0071 /*'q'*/, 0x0075 /*'u'*/, 0x0069 /*'i'*/, 0x0063 /*'c'*/, 0x006b /*'k'*/, 0x0000 },                  /* 20 */
48    { 0x0023 /*'#'*/, 0x0000 },                                                                                /* 21 */
49    { 0x0026 /*'&'*/, 0x0000 },                                                                                /* 22 */
50    {  0x0061 /*'a'*/, 0x002d /*'-'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/,
51                0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000},                                                        /* 24 */
52    { 0x0061 /*'a'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/,
53                0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000},                                                        /* 23 */
54    { 0x0061 /*'a'*/, 0x0062 /*'b'*/, 0x0062 /*'b'*/, 0x006f /*'o'*/, 0x0074 /*'t'*/, 0x0000},                   /* 25 */
55    { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x002d /*'-'*/, 0x0070 /*'p'*/, 0x0000},                                 /* 27 */
56    { 0x0063 /*'c'*/, 0x006f  /*'o'*/, 0x0070 /*'p'*/, 0x0000},                                                /* 28 */
57    { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x006f /*'o'*/, 0x0070 /*'p'*/, 0x0000},                                 /* 26 */
58    { 0x007a /*'z'*/, 0x0065  /*'e'*/, 0x0062 /*'b'*/, 0x0072 /*'r'*/, 0x0061 /*'a'*/, 0x0000}                    /* 29 */
59};
60
61static const int32_t results[G7CollationTest::TESTLOCALES][G7CollationTest::TOTALTESTSET] = {
62    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_US */
63    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_GB */
64    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_CA */
65    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_FR */
66    { 12, 13, 9, 0, 14, 1, 11, 3, 2, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_CA */
67    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* de_DE */
68    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* it_IT */
69    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* ja_JP */
70    /* new table collation with rules "& Z < p, P"  loop to FIXEDTESTSET */
71    { 12, 13, 9, 0, 6, 8, 10, 7, 14, 1, 11, 2, 3, 4, 5, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 },
72    /* new table collation with rules "& C < ch , cH, Ch, CH " loop to TOTALTESTSET */
73    { 19, 22, 21, 23, 24, 25, 12, 13, 9, 0, 17, 26, 28, 27, 15, 16, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 },
74    /* new table collation with rules "& Question-mark ; ? & Hash-mark ; # & Ampersand ; '&'  " loop to TOTALTESTSET */
75    { 23, 24, 25, 22, 12, 13, 9, 0, 17, 16, 26, 28, 27, 15, 18, 21, 14, 1, 11, 2, 3, 4, 5, 19, 20, 6, 8, 10, 7, 29 },
76    /* analogous to Japanese rules " & aa ; a- & ee ; e- & ii ; i- & oo ; o- & uu ; u- " */  /* loop to TOTALTESTSET */
77    { 19, 22, 21, 24, 23, 25, 12, 13, 9, 0, 17, 16, 28, 26, 27, 15, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 }
78};
79
80G7CollationTest::~G7CollationTest() {}
81
82void G7CollationTest::TestG7Locales(/* char* par */)
83{
84    int32_t i;
85    const Locale locales[8] = {
86        Locale("en", "US", ""),
87        Locale("en", "GB", ""),
88        Locale("en", "CA", ""),
89        Locale("fr", "FR", ""),
90        Locale("fr", "CA", ""),
91        Locale("de", "DE", ""),
92        Locale("it", "IT", ""),
93        Locale("ja", "JP", "")
94    };
95
96    for (i = 0; i < UPRV_LENGTHOF(locales); i++)
97    {
98        UnicodeString dispName;
99        UErrorCode status = U_ZERO_ERROR;
100
101        const Locale &locale = locales[i];
102        LocalPointer<Collator> myCollation(Collator::createInstance(locale, status));
103        if(U_FAILURE(status)) {
104          errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
105          return;
106        }
107        myCollation->setStrength(Collator::QUATERNARY);
108        myCollation->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
109        if (U_FAILURE(status)) {
110            errln("Locale %s creation failed - %s", locale.getName(), u_errorName(status));
111            continue;
112        }
113
114        const UnicodeString &rules = ((RuleBasedCollator*)myCollation.getAlias())->getRules();
115        if (rules.isEmpty() &&
116                (locale == Locale::getCanadaFrench() || locale == Locale::getJapanese())) {
117            dataerrln("%s Collator missing rule string", locale.getName());
118            if (logKnownIssue("10671", "TestG7Locales does not test ignore-punctuation")) {
119                continue;
120            }
121        } else {
122            status = U_ZERO_ERROR;
123            RuleBasedCollator *tblColl1 = new RuleBasedCollator(rules, status);
124            if (U_FAILURE(status)) {
125                errln("Recreate %s collation failed - %s", locale.getName(), u_errorName(status));
126                continue;
127            }
128            myCollation.adoptInstead(tblColl1);
129        }
130
131        UnicodeString msg;
132
133        msg += "Locale ";
134        msg += locales[i].getDisplayName(dispName);
135        msg += "tests start :";
136        logln(msg);
137
138        int32_t j, n;
139        for (j = 0; j < FIXEDTESTSET; j++)
140        {
141            for (n = j+1; n < FIXEDTESTSET; n++)
142            {
143                doTest(myCollation.getAlias(), testCases[results[i][j]], testCases[results[i][n]], Collator::LESS);
144            }
145        }
146    }
147}
148
149void G7CollationTest::TestDemo1(/* char* par */)
150{
151    logln("Demo Test 1 : Create a new table collation with rules \"& Z < p, P\"");
152    UErrorCode status = U_ZERO_ERROR;
153    Collator *col = Collator::createInstance("en_US", status);
154    if(U_FAILURE(status)) {
155      delete col;
156      errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
157      return;
158    }
159    const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
160    UnicodeString newRules(" & Z < p, P");
161    newRules.insert(0, baseRules);
162    RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
163
164    if (U_FAILURE(status))
165    {
166        errln( "Demo Test 1 Table Collation object creation failed.");
167        return;
168    }
169
170    int32_t j, n;
171    for (j = 0; j < FIXEDTESTSET; j++)
172    {
173        for (n = j+1; n < FIXEDTESTSET; n++)
174        {
175            doTest(myCollation, testCases[results[8][j]], testCases[results[8][n]], Collator::LESS);
176        }
177    }
178
179    delete myCollation;
180    delete col;
181}
182
183void G7CollationTest::TestDemo2(/* char* par */)
184{
185    logln("Demo Test 2 : Create a new table collation with rules \"& C < ch , cH, Ch, CH\"");
186    UErrorCode status = U_ZERO_ERROR;
187    Collator *col = Collator::createInstance("en_US", status);
188    if(U_FAILURE(status)) {
189      delete col;
190      errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
191      return;
192    }
193    const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
194    UnicodeString newRules("& C < ch , cH, Ch, CH");
195    newRules.insert(0, baseRules);
196    RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
197
198    if (U_FAILURE(status))
199    {
200        errln("Demo Test 2 Table Collation object creation failed.");
201        return;
202    }
203
204    int32_t j, n;
205    for (j = 0; j < TOTALTESTSET; j++)
206    {
207        for (n = j+1; n < TOTALTESTSET; n++)
208        {
209            doTest(myCollation, testCases[results[9][j]], testCases[results[9][n]], Collator::LESS);
210        }
211    }
212
213    delete myCollation;
214    delete col;
215}
216
217void G7CollationTest::TestDemo3(/* char* par */)
218{
219    logln("Demo Test 3 : Create a new table collation with rules \"& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'\"");
220    UErrorCode status = U_ZERO_ERROR;
221    Collator *col = Collator::createInstance("en_US", status);
222    if(U_FAILURE(status)) {
223      errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
224      delete col;
225      return;
226    }
227    const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
228    UnicodeString newRules = "& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'";
229    newRules.insert(0, baseRules);
230    RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
231
232    if (U_FAILURE(status))
233    {
234        errln("Demo Test 3 Table Collation object creation failed.");
235        return;
236    }
237
238    int32_t j, n;
239    for (j = 0; j < TOTALTESTSET; j++)
240    {
241        for (n = j+1; n < TOTALTESTSET; n++)
242        {
243            doTest(myCollation, testCases[results[10][j]], testCases[results[10][n]], Collator::LESS);
244        }
245    }
246
247    delete myCollation;
248    delete col;
249}
250
251void G7CollationTest::TestDemo4(/* char* par */)
252{
253    logln("Demo Test 4 : Create a new table collation with rules \" & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' \"");
254    UErrorCode status = U_ZERO_ERROR;
255    Collator *col = Collator::createInstance("en_US", status);
256    if(U_FAILURE(status)) {
257      delete col;
258      errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
259      return;
260    }
261
262    const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
263    UnicodeString newRules = " & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' ";
264    newRules.insert(0, baseRules);
265    RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
266
267    int32_t j, n;
268    for (j = 0; j < TOTALTESTSET; j++)
269    {
270        for (n = j+1; n < TOTALTESTSET; n++)
271        {
272            doTest(myCollation, testCases[results[11][j]], testCases[results[11][n]], Collator::LESS);
273        }
274    }
275
276    delete myCollation;
277    delete col;
278}
279
280void G7CollationTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
281{
282    if (exec) logln("TestSuite G7CollationTest: ");
283    switch (index) {
284        case 0: name = "TestG7Locales"; if (exec)   TestG7Locales(/* par */); break;
285        case 1: name = "TestDemo1"; if (exec)   TestDemo1(/* par */); break;
286        case 2: name = "TestDemo2"; if (exec)   TestDemo2(/* par */); break;
287        case 3: name = "TestDemo3"; if (exec)   TestDemo3(/* par */); break;
288        case 4: name = "TestDemo4"; if (exec)   TestDemo4(/* par */); break;
289        default: name = ""; break;
290    }
291}
292
293#endif /* #if !UCONFIG_NO_COLLATION */
294