1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7#include "unicode/utypes.h"
8
9#if !UCONFIG_NO_COLLATION
10
11#include "unicode/coll.h"
12#include "unicode/tblcoll.h"
13#include "unicode/unistr.h"
14#include "unicode/sortkey.h"
15#include "g7coll.h"
16#include "sfwdchit.h"
17
18
19static const UChar testCases[][G7CollationTest::MAX_TOKEN_LEN] = {
20    {  0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
21        0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073 /*'s'*/, 0x0000},                    /* 9 */
22    { 0x0050 /*'P'*/, 0x0061 /*'a'*/, 0x0074/*'t'*/, 0x0000},                                                    /* 1 */
23    { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x00E9, 0x0000},                                    /* 2 */
24    { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0000},                           /* 3 */
25    { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000},            /* 4 */
26    { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000},            /* 5 */
27    { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0064 /*'d'*/, 0x0000},                                                    /* 6 */
28    { 0x0054 /*'T'*/, 0x00F6, 0x006e /*'n'*/, 0x0065 /*'e'*/, 0x0000},                                            /* 7 */
29    { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0066 /*'f'*/, 0x0075 /*'u'*/, 0x0000},                                   /* 8 */
30    { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
31      0x0062  /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000},                                    /* 12 */
32    { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x006e /*'n'*/, 0x0000},                                                    /* 10 */
33    { 0x0050  /*'P'*/, 0x0041 /*'A'*/, 0x0054 /*'T'*/, 0x0000},                                                    /* 11 */
34    { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
35        0x002d /*'-'*/,  0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000},                /* 13 */
36    { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
37        0x002d /*'-'*/,  0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073/*'s'*/, 0x0000},  /* 0 */
38    {0x0070 /*'p'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x0000},                                                    /* 14 */
39    /* Additional tests */
40    { 0x0063 /*'c'*/, 0x007a /*'z'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0000 },                                 /* 15 */
41    { 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0075 /*'u'*/, 0x0072 /*'r'*/, 0x006f /*'o'*/, 0x0000 },                  /* 16 */
42    { 0x0063 /*'c'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x000 },                                                    /* 17 */
43    { 0x0064 /*'d'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x006e /*'n'*/, 0x0000 },                                 /* 18 */
44    { 0x003f /*'?'*/, 0x0000 },                                                                                /* 19 */
45    { 0x0071 /*'q'*/, 0x0075 /*'u'*/, 0x0069 /*'i'*/, 0x0063 /*'c'*/, 0x006b /*'k'*/, 0x0000 },                  /* 20 */
46    { 0x0023 /*'#'*/, 0x0000 },                                                                                /* 21 */
47    { 0x0026 /*'&'*/, 0x0000 },                                                                                /* 22 */
48    {  0x0061 /*'a'*/, 0x002d /*'-'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/,
49                0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000},                                                        /* 24 */
50    { 0x0061 /*'a'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/,
51                0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000},                                                        /* 23 */
52    { 0x0061 /*'a'*/, 0x0062 /*'b'*/, 0x0062 /*'b'*/, 0x006f /*'o'*/, 0x0074 /*'t'*/, 0x0000},                   /* 25 */
53    { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x002d /*'-'*/, 0x0070 /*'p'*/, 0x0000},                                 /* 27 */
54    { 0x0063 /*'c'*/, 0x006f  /*'o'*/, 0x0070 /*'p'*/, 0x0000},                                                /* 28 */
55    { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x006f /*'o'*/, 0x0070 /*'p'*/, 0x0000},                                 /* 26 */
56    { 0x007a /*'z'*/, 0x0065  /*'e'*/, 0x0062 /*'b'*/, 0x0072 /*'r'*/, 0x0061 /*'a'*/, 0x0000}                    /* 29 */
57};
58
59static const int32_t results[G7CollationTest::TESTLOCALES][G7CollationTest::TOTALTESTSET] = {
60    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_US */
61    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_GB */
62    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_CA */
63    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_FR */
64    { 12, 13, 9, 0, 14, 1, 11, 3, 2, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_CA */
65    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* de_DE */
66    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* it_IT */
67    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* ja_JP */
68    /* new table collation with rules "& Z < p, P"  loop to FIXEDTESTSET */
69    { 12, 13, 9, 0, 6, 8, 10, 7, 14, 1, 11, 2, 3, 4, 5, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 },
70    /* new table collation with rules "& C < ch , cH, Ch, CH " loop to TOTALTESTSET */
71    { 19, 22, 21, 23, 24, 25, 12, 13, 9, 0, 17, 26, 28, 27, 15, 16, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 },
72    /* new table collation with rules "& Question-mark ; ? & Hash-mark ; # & Ampersand ; '&'  " loop to TOTALTESTSET */
73    { 23, 24, 25, 22, 12, 13, 9, 0, 17, 16, 26, 28, 27, 15, 18, 21, 14, 1, 11, 2, 3, 4, 5, 19, 20, 6, 8, 10, 7, 29 },
74    /* analogous to Japanese rules " & aa ; a- & ee ; e- & ii ; i- & oo ; o- & uu ; u- " */  /* loop to TOTALTESTSET */
75    { 19, 22, 21, 24, 23, 25, 12, 13, 9, 0, 17, 16, 28, 26, 27, 15, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 }
76};
77
78G7CollationTest::~G7CollationTest() {}
79
80void G7CollationTest::TestG7Locales(/* char* par */)
81{
82    int32_t i;
83    const Locale locales[8] = {
84        Locale("en", "US", ""),
85        Locale("en", "GB", ""),
86        Locale("en", "CA", ""),
87        Locale("fr", "FR", ""),
88        Locale("fr", "CA", ""),
89        Locale("de", "DE", ""),
90        Locale("it", "IT", ""),
91        Locale("ja", "JP", "")
92    };
93
94
95    for (i = 0; i < 8; i++)
96    {
97        Collator *myCollation= 0;
98        UnicodeString dispName;
99        UErrorCode status = U_ZERO_ERROR;
100        RuleBasedCollator* tblColl1 = 0;
101
102        myCollation = Collator::createInstance(locales[i], status);
103        if(U_FAILURE(status)) {
104          delete myCollation;
105          errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
106          return;
107        }
108        myCollation->setStrength(Collator::QUATERNARY);
109        myCollation->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
110        if (U_FAILURE(status))
111        {
112            UnicodeString msg;
113
114            msg += "Locale ";
115            msg += locales[i].getDisplayName(dispName);
116            msg += "creation failed.";
117
118            errln(msg);
119            continue;
120        }
121
122//        const UnicodeString& defRules = ((RuleBasedCollator*)myCollation)->getRules();
123        status = U_ZERO_ERROR;
124        tblColl1 = new RuleBasedCollator(((RuleBasedCollator*)myCollation)->getRules(), status);
125        if (U_FAILURE(status))
126        {
127            UnicodeString msg, name;
128
129            msg += "Recreate ";
130            msg += locales[i].getDisplayName(name);
131            msg += "collation failed.";
132
133            errln(msg);
134            continue;
135        }
136
137        UnicodeString msg;
138
139        msg += "Locale ";
140        msg += locales[i].getDisplayName(dispName);
141        msg += "tests start :";
142        logln(msg);
143
144        int32_t j, n;
145        for (j = 0; j < FIXEDTESTSET; j++)
146        {
147            for (n = j+1; n < FIXEDTESTSET; n++)
148            {
149                doTest(tblColl1, testCases[results[i][j]], testCases[results[i][n]], Collator::LESS);
150            }
151        }
152
153        delete myCollation;
154        delete tblColl1;
155    }
156}
157
158void G7CollationTest::TestDemo1(/* char* par */)
159{
160    logln("Demo Test 1 : Create a new table collation with rules \"& Z < p, P\"");
161    UErrorCode status = U_ZERO_ERROR;
162    Collator *col = Collator::createInstance("en_US", status);
163    if(U_FAILURE(status)) {
164      delete col;
165      errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
166      return;
167    }
168    const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
169    UnicodeString newRules(" & Z < p, P");
170    newRules.insert(0, baseRules);
171    RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
172
173    if (U_FAILURE(status))
174    {
175        errln( "Demo Test 1 Table Collation object creation failed.");
176        return;
177    }
178
179    int32_t j, n;
180    for (j = 0; j < FIXEDTESTSET; j++)
181    {
182        for (n = j+1; n < FIXEDTESTSET; n++)
183        {
184            doTest(myCollation, testCases[results[8][j]], testCases[results[8][n]], Collator::LESS);
185        }
186    }
187
188    delete myCollation;
189    delete col;
190}
191
192void G7CollationTest::TestDemo2(/* char* par */)
193{
194    logln("Demo Test 2 : Create a new table collation with rules \"& C < ch , cH, Ch, CH\"");
195    UErrorCode status = U_ZERO_ERROR;
196    Collator *col = Collator::createInstance("en_US", status);
197    if(U_FAILURE(status)) {
198      delete col;
199      errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
200      return;
201    }
202    const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
203    UnicodeString newRules("& C < ch , cH, Ch, CH");
204    newRules.insert(0, baseRules);
205    RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
206
207    if (U_FAILURE(status))
208    {
209        errln("Demo Test 2 Table Collation object creation failed.");
210        return;
211    }
212
213    int32_t j, n;
214    for (j = 0; j < TOTALTESTSET; j++)
215    {
216        for (n = j+1; n < TOTALTESTSET; n++)
217        {
218            doTest(myCollation, testCases[results[9][j]], testCases[results[9][n]], Collator::LESS);
219        }
220    }
221
222    delete myCollation;
223    delete col;
224}
225
226void G7CollationTest::TestDemo3(/* char* par */)
227{
228    logln("Demo Test 3 : Create a new table collation with rules \"& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'\"");
229    UErrorCode status = U_ZERO_ERROR;
230    Collator *col = Collator::createInstance("en_US", status);
231    if(U_FAILURE(status)) {
232      errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
233      delete col;
234      return;
235    }
236    const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
237    UnicodeString newRules = "& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'";
238    newRules.insert(0, baseRules);
239    RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
240
241    if (U_FAILURE(status))
242    {
243        errln("Demo Test 3 Table Collation object creation failed.");
244        return;
245    }
246
247    int32_t j, n;
248    for (j = 0; j < TOTALTESTSET; j++)
249    {
250        for (n = j+1; n < TOTALTESTSET; n++)
251        {
252            doTest(myCollation, testCases[results[10][j]], testCases[results[10][n]], Collator::LESS);
253        }
254    }
255
256    delete myCollation;
257    delete col;
258}
259
260void G7CollationTest::TestDemo4(/* char* par */)
261{
262    logln("Demo Test 4 : Create a new table collation with rules \" & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' \"");
263    UErrorCode status = U_ZERO_ERROR;
264    Collator *col = Collator::createInstance("en_US", status);
265    if(U_FAILURE(status)) {
266      delete col;
267      errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
268      return;
269    }
270
271    const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
272    UnicodeString newRules = " & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' ";
273    newRules.insert(0, baseRules);
274    RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
275
276    int32_t j, n;
277    for (j = 0; j < TOTALTESTSET; j++)
278    {
279        for (n = j+1; n < TOTALTESTSET; n++)
280        {
281            doTest(myCollation, testCases[results[11][j]], testCases[results[11][n]], Collator::LESS);
282        }
283    }
284
285    delete myCollation;
286    delete col;
287}
288
289void G7CollationTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
290{
291    if (exec) logln("TestSuite G7CollationTest: ");
292    switch (index) {
293        case 0: name = "TestG7Locales"; if (exec)   TestG7Locales(/* par */); break;
294        case 1: name = "TestDemo1"; if (exec)   TestDemo1(/* par */); break;
295        case 2: name = "TestDemo2"; if (exec)   TestDemo2(/* par */); break;
296        case 3: name = "TestDemo3"; if (exec)   TestDemo3(/* par */); break;
297        case 4: name = "TestDemo4"; if (exec)   TestDemo4(/* par */); break;
298        default: name = ""; break;
299    }
300}
301
302#endif /* #if !UCONFIG_NO_COLLATION */
303