1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File CG7COLL.C
9*
10* Modification History:
11*        Name                     Description
12*     Madhu Katragadda            Ported for C API
13*********************************************************************************/
14/**
15 * G7CollationTest is a third level test class.  This test performs the examples
16 * mentioned on the IBM Java international demos web site.
17 * Sample Rules: & Z < p , P
18 * Effect :  Making P sort after Z.
19 *
20 * Sample Rules: & c < ch , cH, Ch, CH
21 * Effect : As well as adding sequences of characters that act as a single character (this is
22 * known as contraction), you can also add characters that act like a sequence of
23 * characters (this is known as expansion).
24 *
25 * Sample Rules: & Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'
26 * Effect : Expansion and contraction can actually be combined.
27 *
28 * Sample Rules: & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-'
29 * Effect : sorted sequence as the following,
30 * aardvark
31 * a-rdvark
32 * abbot
33 * coop
34 * co-p
35 * cop
36 */
37#include <stdlib.h>
38#include <string.h>
39#include <stdio.h>
40
41#include "unicode/utypes.h"
42
43#if !UCONFIG_NO_COLLATION
44
45#include "unicode/ucol.h"
46#include "unicode/uloc.h"
47#include "cintltst.h"
48#include "cg7coll.h"
49#include "ccolltst.h"
50#include "callcoll.h"
51#include "unicode/ustring.h"
52
53#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
54
55const char* locales[8] = {
56        "en_US",
57        "en_GB",
58        "en_CA",
59        "fr_FR",
60        "fr_CA",
61        "de_DE",
62        "it_IT",
63        "ja_JP"
64};
65
66
67
68const static UChar testCases[][MAX_TOKEN_LEN] = {
69    {  0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
70        0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073 /*'s'*/, 0x0000},                    /* 9 */
71    { 0x0050 /*'P'*/, 0x0061 /*'a'*/, 0x0074/*'t'*/, 0x0000},                                                    /* 1 */
72    { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x00E9, 0x0000},                                    /* 2 */
73    { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0000},                           /* 3 */
74    { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000},            /* 4 */
75    { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000},            /* 5 */
76    { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0064 /*'d'*/, 0x0000},                                                    /* 6 */
77    { 0x0054 /*'T'*/, 0x00F6, 0x006e /*'n'*/, 0x0065 /*'e'*/, 0x0000},                                            /* 7 */
78    { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0066 /*'f'*/, 0x0075 /*'u'*/, 0x0000},                                   /* 8 */
79    { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
80      0x0062  /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000},                                    /* 12 */
81    { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x006e /*'n'*/, 0x0000},                                                    /* 10 */
82    { 0x0050  /*'P'*/, 0x0041 /*'A'*/, 0x0054 /*'T'*/, 0x0000},                                                    /* 11 */
83    { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
84        0x002d /*'-'*/,  0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000},                /* 13 */
85    { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
86        0x002d /*'-'*/,  0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073/*'s'*/, 0x0000},  /* 0 */
87    {0x0070 /*'p'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x0000},                                                    /* 14 */
88    /* Additional tests */
89    { 0x0063 /*'c'*/, 0x007a /*'z'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0000 },                                 /* 15 */
90    { 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0075 /*'u'*/, 0x0072 /*'r'*/, 0x006f /*'o'*/, 0x0000 },                  /* 16 */
91    { 0x0063 /*'c'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x000 },                                                    /* 17 */
92    { 0x0064 /*'d'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x006e /*'n'*/, 0x0000 },                                 /* 18 */
93    { 0x003f /*'?'*/, 0x0000 },                                                                                /* 19 */
94    { 0x0071 /*'q'*/, 0x0075 /*'u'*/, 0x0069 /*'i'*/, 0x0063 /*'c'*/, 0x006b /*'k'*/, 0x0000 },                  /* 20 */
95    { 0x0023 /*'#'*/, 0x0000 },                                                                                /* 21 */
96    { 0x0026 /*'&'*/, 0x0000 },                                                                                /* 22 */
97    {  0x0061 /*'a'*/, 0x002d /*'-'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/,
98                0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000},                                                        /* 24 */
99    { 0x0061 /*'a'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/,
100                0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000},                                                        /* 23 */
101    { 0x0061 /*'a'*/, 0x0062 /*'b'*/, 0x0062 /*'b'*/, 0x006f /*'o'*/, 0x0074 /*'t'*/, 0x0000},                   /* 25 */
102    { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x002d /*'-'*/, 0x0070 /*'p'*/, 0x0000},                                 /* 27 */
103    { 0x0063 /*'c'*/, 0x006f  /*'o'*/, 0x0070 /*'p'*/, 0x0000},                                                /* 28 */
104    { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x006f /*'o'*/, 0x0070 /*'p'*/, 0x0000},                                 /* 26 */
105    { 0x007a /*'z'*/, 0x0065  /*'e'*/, 0x0062 /*'b'*/, 0x0072 /*'r'*/, 0x0061 /*'a'*/, 0x0000}                    /* 29 */
106};
107
108const static int32_t results[TESTLOCALES][TOTALTESTSET] = {
109    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_US */
110    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_GB */
111    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_CA */
112    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_FR */
113    { 12, 13, 9, 0, 14, 1, 11, 3, 2, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_CA */
114    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* de_DE */
115    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* it_IT */
116    { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* ja_JP */
117    /* new table collation with rules "& Z < p, P"  loop to FIXEDTESTSET */
118    { 12, 13, 9, 0, 6, 8, 10, 7, 14, 1, 11, 2, 3, 4, 5, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 },
119    /* new table collation with rules "& C < ch , cH, Ch, CH " loop to TOTALTESTSET */
120    { 19, 22, 21, 23, 24, 25, 12, 13, 9, 0, 17, 26, 28, 27, 15, 16, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 },
121    /* new table collation with rules "& Question-mark ; ? & Hash-mark ; # & Ampersand ; '&'  " loop to TOTALTESTSET */
122    { 23, 24, 25, 22, 12, 13, 9, 0, 17, 16, 26, 28, 27, 15, 18, 21, 14, 1, 11, 2, 3, 4, 5, 19, 20, 6, 8, 10, 7, 29 },
123    /* analogous to Japanese rules " & aa ; a- & ee ; e- & ii ; i- & oo ; o- & uu ; u- " */  /* loop to TOTALTESTSET */
124    { 19, 22, 21, 24, 23, 25, 12, 13, 9, 0, 17, 16, 28, 26, 27, 15, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 }
125};
126
127void addRuleBasedCollTest(TestNode** root)
128{
129    addTest(root, &TestG7Locales, "tscoll/cg7coll/TestG7Locales");
130    addTest(root, &TestDemo1, "tscoll/cg7coll/TestDemo1");
131    addTest(root, &TestDemo2, "tscoll/cg7coll/TestDemo2");
132    addTest(root, &TestDemo3, "tscoll/cg7coll/TestDemo3");
133    addTest(root, &TestDemo4, "tscoll/cg7coll/TestDemo4");
134
135
136}
137
138static void TestG7Locales()
139{
140    UCollator *myCollation;
141    UErrorCode status = U_ZERO_ERROR;
142    const UChar *defRules;
143    int32_t i, rlen, j, n;
144    log_verbose("Testing  ucol_openRules for all the locales\n");
145    for (i = 0; i < LENGTHOF(locales); i++)
146    {
147        const char *locale = locales[i];
148        status = U_ZERO_ERROR;
149        myCollation = ucol_open(locale, &status);
150        ucol_setAttribute(myCollation, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
151        ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
152
153        if (U_FAILURE(status))
154        {
155            log_err_status(status, "Error in creating collator in %s:  %s\n", locale, myErrorName(status));
156            ucol_close(myCollation);
157            continue;
158        }
159
160        defRules = ucol_getRules(myCollation, &rlen);
161        if (rlen == 0 && (strcmp(locale, "fr_CA") == 0 || strcmp(locale, "ja_JP") == 0)) {
162            log_data_err("%s UCollator missing rule string\n", locale);
163            if (log_knownIssue("10671", "TestG7Locales does not test ignore-punctuation")) {
164                ucol_close(myCollation);
165                continue;
166            }
167        } else {
168            UCollator *tblColl1;
169            status = U_ZERO_ERROR;
170            tblColl1 = ucol_openRules(defRules, rlen, UCOL_OFF,
171                    UCOL_DEFAULT_STRENGTH,NULL, &status);
172            ucol_close(myCollation);
173            if (U_FAILURE(status))
174            {
175                log_err_status(status, "Error in creating collator in %s:  %s\n", locale, myErrorName(status));
176                continue;
177            }
178            myCollation = tblColl1;
179        }
180
181        log_verbose("Locale  %s\n", locales[i]);
182        log_verbose("  tests start...\n");
183
184        j = 0;
185        n = 0;
186        for (j = 0; j < FIXEDTESTSET; j++)
187        {
188            for (n = j+1; n < FIXEDTESTSET; n++)
189            {
190                doTest(myCollation, testCases[results[i][j]], testCases[results[i][n]], UCOL_LESS);
191            }
192        }
193
194        ucol_close(myCollation);
195    }
196}
197
198static void TestDemo1()
199{
200    UCollator *myCollation;
201    int32_t j, n;
202    static const char rules[] = "& Z < p, P";
203    int32_t len=(int32_t)strlen(rules);
204    UChar temp[sizeof(rules)];
205    UErrorCode status = U_ZERO_ERROR;
206    u_uastrcpy(temp, rules);
207
208    log_verbose("Demo Test 1 : Create a new table collation with rules \" & Z < p, P \" \n");
209
210    myCollation = ucol_openRules(temp, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
211
212    if (U_FAILURE(status))
213    {
214        log_err_status(status, "Demo Test 1 Rule collation object creation failed. : %s\n", myErrorName(status));
215        return;
216    }
217
218    for (j = 0; j < FIXEDTESTSET; j++)
219    {
220        for (n = j+1; n < FIXEDTESTSET; n++)
221        {
222            doTest(myCollation, testCases[results[8][j]], testCases[results[8][n]], UCOL_LESS);
223        }
224    }
225
226    ucol_close(myCollation);
227}
228
229static void TestDemo2()
230{
231    UCollator *myCollation;
232    int32_t j, n;
233    static const char rules[] = "& C < ch , cH, Ch, CH";
234    int32_t len=(int32_t)strlen(rules);
235    UChar temp[sizeof(rules)];
236    UErrorCode status = U_ZERO_ERROR;
237    u_uastrcpy(temp, rules);
238
239    log_verbose("Demo Test 2 : Create a new table collation with rules \"& C < ch , cH, Ch, CH\"");
240
241    myCollation = ucol_openRules(temp, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
242
243    if (U_FAILURE(status))
244    {
245        log_err_status(status, "Demo Test 2 Rule collation object creation failed.: %s\n", myErrorName(status));
246        return;
247    }
248    for (j = 0; j < TOTALTESTSET; j++)
249    {
250        for (n = j+1; n < TOTALTESTSET; n++)
251        {
252            doTest(myCollation, testCases[results[9][j]], testCases[results[9][n]], UCOL_LESS);
253        }
254    }
255    ucol_close(myCollation);
256}
257
258static void TestDemo3()
259{
260    UCollator *myCollation;
261    int32_t j, n;
262    static const char rules[] = "& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'";
263    int32_t len=(int32_t)strlen(rules);
264    UChar temp[sizeof(rules)];
265    UErrorCode status = U_ZERO_ERROR;
266    u_uastrcpy(temp, rules);
267
268    log_verbose("Demo Test 3 : Create a new table collation with rules \"& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'\" \n");
269
270    myCollation = ucol_openRules(temp, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
271
272    if (U_FAILURE(status))
273    {
274        log_err_status(status, "Demo Test 3 Rule collation object creation failed.: %s\n", myErrorName(status));
275        return;
276    }
277
278    for (j = 0; j < TOTALTESTSET; j++)
279    {
280        for (n = j+1; n < TOTALTESTSET; n++)
281        {
282            doTest(myCollation, testCases[results[10][j]], testCases[results[10][n]], UCOL_LESS);
283        }
284    }
285    ucol_close(myCollation);
286}
287
288static void TestDemo4()
289{
290    UCollator *myCollation;
291    int32_t j, n;
292    static const char rules[] = " & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' ";
293    int32_t len=(int32_t)strlen(rules);
294    UChar temp[sizeof(rules)];
295    UErrorCode status = U_ZERO_ERROR;
296    u_uastrcpy(temp, rules);
297
298    log_verbose("Demo Test 4 : Create a new table collation with rules \" & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' \"\n");
299
300    myCollation = ucol_openRules(temp, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
301
302    if (U_FAILURE(status))
303    {
304        log_err_status(status, "Demo Test 4 Rule collation object creation failed.: %s\n", myErrorName(status));
305        return;
306    }
307    for (j = 0; j < TOTALTESTSET; j++)
308    {
309        for (n = j+1; n < TOTALTESTSET; n++)
310        {
311            doTest(myCollation, testCases[results[11][j]], testCases[results[11][n]], UCOL_LESS);
312        }
313    }
314    ucol_close(myCollation);
315}
316
317#endif /* #if !UCONFIG_NO_COLLATION */
318