1/********************************************************************
2 * Copyright (c) 1997-2013, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************/
5/*****************************************************************************
6*
7* File CAPITEST.C
8*
9* Modification History:
10*        Name                     Description
11*     Madhu Katragadda             Ported for C API
12*     Brian Rower                  Added TestOpenVsOpenRules
13******************************************************************************
14*//* C API TEST For COLLATOR */
15
16#include "unicode/utypes.h"
17
18#if !UCONFIG_NO_COLLATION
19
20#include <stdio.h>
21#include <stdlib.h>
22#include <string.h>
23#include "unicode/uloc.h"
24#include "unicode/ulocdata.h"
25#include "unicode/ustring.h"
26#include "unicode/ures.h"
27#include "unicode/ucoleitr.h"
28#include "cintltst.h"
29#include "capitst.h"
30#include "ccolltst.h"
31#include "putilimp.h"
32#include "cmemory.h"
33#include "cstring.h"
34
35static void TestAttribute(void);
36static void TestDefault(void);
37static void TestDefaultKeyword(void);
38static void TestBengaliSortKey(void);
39        int TestBufferSize();    /* defined in "colutil.c" */
40
41
42static char* U_EXPORT2 ucol_sortKeyToString(const UCollator *coll, const uint8_t *sortkey, char *buffer, uint32_t len) {
43    uint32_t position = 0;
44    uint8_t b;
45
46    if (position + 1 < len)
47        position += sprintf(buffer + position, "[");
48    while ((b = *sortkey++) != 0) {
49        if (b == 1 && position + 5 < len) {
50            position += sprintf(buffer + position, "%02X . ", b);
51        } else if (b != 1 && position + 3 < len) {
52            position += sprintf(buffer + position, "%02X ", b);
53        }
54    }
55    if (position + 3 < len)
56        position += sprintf(buffer + position, "%02X]", b);
57    return buffer;
58}
59
60void addCollAPITest(TestNode** root)
61{
62    /* WEIVTODO: return tests here */
63    addTest(root, &TestProperty,      "tscoll/capitst/TestProperty");
64    addTest(root, &TestRuleBasedColl, "tscoll/capitst/TestRuleBasedColl");
65    addTest(root, &TestCompare,       "tscoll/capitst/TestCompare");
66    addTest(root, &TestSortKey,       "tscoll/capitst/TestSortKey");
67    addTest(root, &TestHashCode,      "tscoll/capitst/TestHashCode");
68    addTest(root, &TestElemIter,      "tscoll/capitst/TestElemIter");
69    addTest(root, &TestGetAll,        "tscoll/capitst/TestGetAll");
70    /*addTest(root, &TestGetDefaultRules, "tscoll/capitst/TestGetDefaultRules");*/
71    addTest(root, &TestDecomposition, "tscoll/capitst/TestDecomposition");
72    addTest(root, &TestSafeClone, "tscoll/capitst/TestSafeClone");
73    addTest(root, &TestCloneBinary, "tscoll/capitst/TestCloneBinary");
74    addTest(root, &TestGetSetAttr, "tscoll/capitst/TestGetSetAttr");
75    addTest(root, &TestBounds, "tscoll/capitst/TestBounds");
76    addTest(root, &TestGetLocale, "tscoll/capitst/TestGetLocale");
77    addTest(root, &TestSortKeyBufferOverrun, "tscoll/capitst/TestSortKeyBufferOverrun");
78    addTest(root, &TestAttribute, "tscoll/capitst/TestAttribute");
79    addTest(root, &TestGetTailoredSet, "tscoll/capitst/TestGetTailoredSet");
80    addTest(root, &TestMergeSortKeys, "tscoll/capitst/TestMergeSortKeys");
81    addTest(root, &TestShortString, "tscoll/capitst/TestShortString");
82    /* BEGIN android-removed
83       To save space, Android does not include the collation tailoring rules.
84       We skip the tailing tests for collations. */
85    /* addTest(root, &TestGetContractionsAndUnsafes, "tscoll/capitst/TestGetContractionsAndUnsafes"); */
86    /* END android-removed */
87    addTest(root, &TestOpenBinary, "tscoll/capitst/TestOpenBinary");
88    addTest(root, &TestDefault, "tscoll/capitst/TestDefault");
89    addTest(root, &TestDefaultKeyword, "tscoll/capitst/TestDefaultKeyword");
90    /* BEGIN android-removed
91       To save space, Android does not build full collation tables and tailing rules.
92       We skip the related tests. */
93    /* addTest(root, &TestOpenVsOpenRules, "tscoll/capitst/TestOpenVsOpenRules"); */
94    /* addTest(root, &TestGetKeywordValuesForLocale, "tscoll/capitst/TestGetKeywordValuesForLocale"); */
95    /* END android-removed */
96    addTest(root, &TestBengaliSortKey, "tscoll/capitst/TestBengaliSortKey");
97    addTest(root, &TestGetKeywordValuesForLocale, "tscoll/capitst/TestGetKeywordValuesForLocale");
98}
99
100void TestGetSetAttr(void) {
101  UErrorCode status = U_ZERO_ERROR;
102  UCollator *coll = ucol_open(NULL, &status);
103  struct attrTest {
104    UColAttribute att;
105    UColAttributeValue val[5];
106    uint32_t valueSize;
107    UColAttributeValue nonValue;
108  } attrs[] = {
109    {UCOL_FRENCH_COLLATION, {UCOL_ON, UCOL_OFF}, 2, UCOL_SHIFTED},
110    {UCOL_ALTERNATE_HANDLING, {UCOL_NON_IGNORABLE, UCOL_SHIFTED}, 2, UCOL_OFF},/* attribute for handling variable elements*/
111    {UCOL_CASE_FIRST, {UCOL_OFF, UCOL_LOWER_FIRST, UCOL_UPPER_FIRST}, 3, UCOL_SHIFTED},/* who goes first, lower case or uppercase */
112    {UCOL_CASE_LEVEL, {UCOL_ON, UCOL_OFF}, 2, UCOL_SHIFTED},/* do we have an extra case level */
113    {UCOL_NORMALIZATION_MODE, {UCOL_ON, UCOL_OFF}, 2, UCOL_SHIFTED},/* attribute for normalization */
114    {UCOL_DECOMPOSITION_MODE, {UCOL_ON, UCOL_OFF}, 2, UCOL_SHIFTED},
115    {UCOL_STRENGTH,         {UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL}, 5, UCOL_SHIFTED},/* attribute for strength */
116    {UCOL_HIRAGANA_QUATERNARY_MODE, {UCOL_ON, UCOL_OFF}, 2, UCOL_SHIFTED},/* when turned on, this attribute */
117  };
118  UColAttribute currAttr;
119  UColAttributeValue value;
120  uint32_t i = 0, j = 0;
121
122  if (coll == NULL) {
123    log_err_status(status, "Unable to open collator. %s\n", u_errorName(status));
124    return;
125  }
126  for(i = 0; i<sizeof(attrs)/sizeof(attrs[0]); i++) {
127    currAttr = attrs[i].att;
128    ucol_setAttribute(coll, currAttr, UCOL_DEFAULT, &status);
129    if(U_FAILURE(status)) {
130      log_err_status(status, "ucol_setAttribute with the default value returned error: %s\n", u_errorName(status));
131      break;
132    }
133    value = ucol_getAttribute(coll, currAttr, &status);
134    if(U_FAILURE(status)) {
135      log_err("ucol_getAttribute returned error: %s\n", u_errorName(status));
136      break;
137    }
138    for(j = 0; j<attrs[i].valueSize; j++) {
139      ucol_setAttribute(coll, currAttr, attrs[i].val[j], &status);
140      if(U_FAILURE(status)) {
141        log_err("ucol_setAttribute with the value %i returned error: %s\n", attrs[i].val[j], u_errorName(status));
142        break;
143      }
144    }
145    status = U_ZERO_ERROR;
146    ucol_setAttribute(coll, currAttr, attrs[i].nonValue, &status);
147    if(U_SUCCESS(status)) {
148      log_err("ucol_setAttribute with the bad value didn't return an error\n");
149      break;
150    }
151    status = U_ZERO_ERROR;
152
153    ucol_setAttribute(coll, currAttr, value, &status);
154    if(U_FAILURE(status)) {
155      log_err("ucol_setAttribute with the default valuereturned error: %s\n", u_errorName(status));
156      break;
157    }
158  }
159  status = U_ZERO_ERROR;
160  value = ucol_getAttribute(coll, UCOL_ATTRIBUTE_COUNT, &status);
161  if(U_SUCCESS(status)) {
162    log_err("ucol_getAttribute for UCOL_ATTRIBUTE_COUNT didn't return an error\n");
163  }
164  status = U_ZERO_ERROR;
165  ucol_setAttribute(coll, UCOL_ATTRIBUTE_COUNT, UCOL_DEFAULT, &status);
166  if(U_SUCCESS(status)) {
167    log_err("ucol_setAttribute for UCOL_ATTRIBUTE_COUNT didn't return an error\n");
168  }
169  status = U_ZERO_ERROR;
170  ucol_close(coll);
171}
172
173
174static void doAssert(int condition, const char *message)
175{
176    if (condition==0) {
177        log_err("ERROR :  %s\n", message);
178    }
179}
180
181#define UTF8_BUF_SIZE 128
182
183static void doStrcoll(const UCollator* coll, const UChar* src, int32_t srcLen, const UChar* tgt, int32_t tgtLen,
184                    UCollationResult expected, const char *message) {
185    UErrorCode err = U_ZERO_ERROR;
186    char srcU8[UTF8_BUF_SIZE], tgtU8[UTF8_BUF_SIZE];
187    int32_t srcU8Len = -1, tgtU8Len = -1;
188    int32_t len = 0;
189
190    if (ucol_strcoll(coll, src, srcLen, tgt, tgtLen) != expected) {
191        log_err("ERROR :  %s\n", message);
192    }
193
194    u_strToUTF8(srcU8, UTF8_BUF_SIZE, &len, src, srcLen, &err);
195    if (U_FAILURE(err) || len >= UTF8_BUF_SIZE) {
196        log_err("ERROR : UTF-8 conversion error\n");
197        return;
198    }
199    if (srcLen >= 0) {
200        srcU8Len = len;
201    }
202    u_strToUTF8(tgtU8, UTF8_BUF_SIZE, &len, tgt, tgtLen, &err);
203    if (U_FAILURE(err) || len >= UTF8_BUF_SIZE) {
204        log_err("ERROR : UTF-8 conversion error\n");
205        return;
206    }
207    if (tgtLen >= 0) {
208        tgtU8Len = len;
209    }
210
211    if (ucol_strcollUTF8(coll, srcU8, srcU8Len, tgtU8, tgtU8Len, &err) != expected
212        || U_FAILURE(err)) {
213        log_err("ERROR: %s (strcollUTF8)\n", message);
214    }
215}
216
217#if 0
218/* We don't have default rules, at least not in the previous sense */
219void TestGetDefaultRules(){
220    uint32_t size=0;
221    UErrorCode status=U_ZERO_ERROR;
222    UCollator *coll=NULL;
223    int32_t len1 = 0, len2=0;
224    uint8_t *binColData = NULL;
225
226    UResourceBundle *res = NULL;
227    UResourceBundle *binColl = NULL;
228    uint8_t *binResult = NULL;
229
230
231    const UChar * defaultRulesArray=ucol_getDefaultRulesArray(&size);
232    log_verbose("Test the function ucol_getDefaultRulesArray()\n");
233
234    coll = ucol_openRules(defaultRulesArray, size, UCOL_ON, UCOL_PRIMARY, &status);
235    if(U_SUCCESS(status) && coll !=NULL) {
236        binColData = (uint8_t*)ucol_cloneRuleData(coll, &len1, &status);
237
238    }
239
240
241    status=U_ZERO_ERROR;
242    res=ures_open(NULL, "root", &status);
243    if(U_FAILURE(status)){
244        log_err("ERROR: Failed to get resource for \"root Locale\" with %s", myErrorName(status));
245        return;
246    }
247    binColl=ures_getByKey(res, "%%Collation", binColl, &status);
248    if(U_SUCCESS(status)){
249        binResult=(uint8_t*)ures_getBinary(binColl,  &len2, &status);
250        if(U_FAILURE(status)){
251            log_err("ERROR: ures_getBinary() failed\n");
252        }
253    }else{
254        log_err("ERROR: ures_getByKey(locale(default), %%Collation) failed");
255    }
256
257
258    if(len1 != len2){
259        log_err("Error: ucol_getDefaultRulesArray() failed to return the correct length.\n");
260    }
261    if(memcmp(binColData, binResult, len1) != 0){
262        log_err("Error: ucol_getDefaultRulesArray() failed\n");
263    }
264
265    free(binColData);
266    ures_close(binColl);
267    ures_close(res);
268    ucol_close(coll);
269
270}
271#endif
272
273/* Collator Properties
274 ucol_open, ucol_strcoll,  getStrength/setStrength
275 getDecomposition/setDecomposition, getDisplayName*/
276void TestProperty()
277{
278    UCollator *col, *ruled;
279    UChar *disName;
280    int32_t len = 0;
281    UChar source[12], target[12];
282    int32_t tempLength;
283    UErrorCode status = U_ZERO_ERROR;
284    /*
285     * Expected version of the English collator.
286     * Currently, the major/minor version numbers change when the builder code
287     * changes,
288     * number 2 is from the tailoring data version and
289     * number 3 is the UCA version.
290     * This changes with every UCA version change, and the expected value
291     * needs to be adjusted.
292     * Same in intltest/apicoll.cpp.
293     */
294    UVersionInfo currVersionArray = {0x31, 0xC0, 0x05, 0x2A};  /* from ICU 4.4/UCA 5.2 */
295    UVersionInfo versionArray = {0, 0, 0, 0};
296    UVersionInfo versionUCAArray = {0, 0, 0, 0};
297    UVersionInfo versionUCDArray = {0, 0, 0, 0};
298
299    log_verbose("The property tests begin : \n");
300    log_verbose("Test ucol_strcoll : \n");
301    col = ucol_open("en_US", &status);
302    if (U_FAILURE(status)) {
303        log_err_status(status, "Default Collator creation failed.: %s\n", myErrorName(status));
304        return;
305    }
306
307    ucol_getVersion(col, versionArray);
308    /* Check for a version greater than some value rather than equality
309     * so that we need not update the expected version each time. */
310    if (uprv_memcmp(versionArray, currVersionArray, 4)<0) {
311      log_err("Testing ucol_getVersion() - unexpected result: %02x.%02x.%02x.%02x\n",
312              versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
313    } else {
314      log_verbose("ucol_getVersion() result: %02x.%02x.%02x.%02x\n",
315                  versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
316    }
317
318    /* Assume that the UCD and UCA versions are the same,
319     * rather than hardcoding (and updating each time) a particular UCA version. */
320    u_getUnicodeVersion(versionUCDArray);
321    ucol_getUCAVersion(col, versionUCAArray);
322    if (0!=uprv_memcmp(versionUCAArray, versionUCDArray, 4)) {
323      log_err("Testing ucol_getUCAVersion() - unexpected result: %hu.%hu.%hu.%hu\n",
324              versionUCAArray[0], versionUCAArray[1], versionUCAArray[2], versionUCAArray[3]);
325    }
326
327    u_uastrcpy(source, "ab");
328    u_uastrcpy(target, "abc");
329
330    doStrcoll(col, source, u_strlen(source), target, u_strlen(target), UCOL_LESS, "ab < abc comparison failed");
331
332    u_uastrcpy(source, "ab");
333    u_uastrcpy(target, "AB");
334
335    doStrcoll(col, source, u_strlen(source), target, u_strlen(target), UCOL_LESS, "ab < AB comparison failed");
336
337    u_uastrcpy(source, "blackbird");
338    u_uastrcpy(target, "black-bird");
339
340    doStrcoll(col, source, u_strlen(source), target, u_strlen(target), UCOL_GREATER, "black-bird > blackbird comparison failed");
341
342    u_uastrcpy(source, "black bird");
343    u_uastrcpy(target, "black-bird");
344
345    doStrcoll(col, source, u_strlen(source), target, u_strlen(target), UCOL_LESS, "black bird < black-bird comparison failed");
346
347    u_uastrcpy(source, "Hello");
348    u_uastrcpy(target, "hello");
349
350    doStrcoll(col, source, u_strlen(source), target, u_strlen(target), UCOL_GREATER, "Hello > hello comparison failed");
351
352    log_verbose("Test ucol_strcoll ends.\n");
353
354    log_verbose("testing ucol_getStrength() method ...\n");
355    doAssert( (ucol_getStrength(col) == UCOL_TERTIARY), "collation object has the wrong strength");
356    doAssert( (ucol_getStrength(col) != UCOL_PRIMARY), "collation object's strength is primary difference");
357
358    log_verbose("testing ucol_setStrength() method ...\n");
359    ucol_setStrength(col, UCOL_SECONDARY);
360    doAssert( (ucol_getStrength(col) != UCOL_TERTIARY), "collation object's strength is secondary difference");
361    doAssert( (ucol_getStrength(col) != UCOL_PRIMARY), "collation object's strength is primary difference");
362    doAssert( (ucol_getStrength(col) == UCOL_SECONDARY), "collation object has the wrong strength");
363
364
365    log_verbose("Get display name for the default collation in German : \n");
366
367    len=ucol_getDisplayName("en_US", "de_DE", NULL, 0,  &status);
368    if(status==U_BUFFER_OVERFLOW_ERROR){
369        status=U_ZERO_ERROR;
370        disName=(UChar*)malloc(sizeof(UChar) * (len+1));
371        ucol_getDisplayName("en_US", "de_DE", disName, len+1,  &status);
372        log_verbose("the display name for default collation in german: %s\n", austrdup(disName) );
373        free(disName);
374    }
375    if(U_FAILURE(status)){
376        log_err("ERROR: in getDisplayName: %s\n", myErrorName(status));
377        return;
378    }
379    log_verbose("Default collation getDisplayName ended.\n");
380
381    /* BEGIN android-removed
382       To save space, Android does not include the collation tailoring rules.
383       Skip the related tests.
384
385    ruled = ucol_open("da_DK", &status);
386    log_verbose("ucol_getRules() testing ...\n");
387    ucol_getRules(ruled, &tempLength);
388    doAssert( tempLength != 0, "getRules() result incorrect" );
389    log_verbose("getRules tests end.\n");
390    {
391        UChar *buffer = (UChar *)malloc(200000*sizeof(UChar));
392        int32_t bufLen = 200000;
393        buffer[0] = '\0';
394        log_verbose("ucol_getRulesEx() testing ...\n");
395        tempLength = ucol_getRulesEx(col,UCOL_TAILORING_ONLY,buffer,bufLen );
396        doAssert( tempLength == 0x00, "getRulesEx() result incorrect" );
397        log_verbose("getRules tests end.\n");
398
399        log_verbose("ucol_getRulesEx() testing ...\n");
400        tempLength=ucol_getRulesEx(col,UCOL_FULL_RULES,buffer,bufLen );
401        doAssert( tempLength != 0, "getRulesEx() result incorrect" );
402        log_verbose("getRules tests end.\n");
403        free(buffer);
404    }
405    ucol_close(ruled);
406    ucol_close(col);
407
408    END android-removed */
409
410    log_verbose("open an collator for french locale");
411    col = ucol_open("fr_FR", &status);
412    if (U_FAILURE(status)) {
413       log_err("ERROR: Creating French collation failed.: %s\n", myErrorName(status));
414        return;
415    }
416    ucol_setStrength(col, UCOL_PRIMARY);
417    log_verbose("testing ucol_getStrength() method again ...\n");
418    doAssert( (ucol_getStrength(col) != UCOL_TERTIARY), "collation object has the wrong strength");
419    doAssert( (ucol_getStrength(col) == UCOL_PRIMARY), "collation object's strength is not primary difference");
420
421    log_verbose("testing French ucol_setStrength() method ...\n");
422    ucol_setStrength(col, UCOL_TERTIARY);
423    doAssert( (ucol_getStrength(col) == UCOL_TERTIARY), "collation object's strength is not tertiary difference");
424    doAssert( (ucol_getStrength(col) != UCOL_PRIMARY), "collation object's strength is primary difference");
425    doAssert( (ucol_getStrength(col) != UCOL_SECONDARY), "collation object's strength is secondary difference");
426    ucol_close(col);
427
428    log_verbose("Get display name for the french collation in english : \n");
429    len=ucol_getDisplayName("fr_FR", "en_US", NULL, 0,  &status);
430    if(status==U_BUFFER_OVERFLOW_ERROR){
431        status=U_ZERO_ERROR;
432        disName=(UChar*)malloc(sizeof(UChar) * (len+1));
433        ucol_getDisplayName("fr_FR", "en_US", disName, len+1,  &status);
434        log_verbose("the display name for french collation in english: %s\n", austrdup(disName) );
435        free(disName);
436    }
437    if(U_FAILURE(status)){
438        log_err("ERROR: in getDisplayName: %s\n", myErrorName(status));
439        return;
440    }
441    log_verbose("Default collation getDisplayName ended.\n");
442
443}
444
445/* Test RuleBasedCollator and getRules*/
446void TestRuleBasedColl()
447{
448    UCollator *col1, *col2, *col3, *col4;
449    UCollationElements *iter1, *iter2;
450    UChar ruleset1[60];
451    UChar ruleset2[50];
452    UChar teststr[10];
453    UChar teststr2[10];
454    const UChar *rule1, *rule2, *rule3, *rule4;
455    int32_t tempLength;
456    UErrorCode status = U_ZERO_ERROR;
457    u_uastrcpy(ruleset1, "&9 < a, A < b, B < c, C; ch, cH, Ch, CH < d, D, e, E");
458    u_uastrcpy(ruleset2, "&9 < a, A < b, B < c, C < d, D, e, E");
459
460
461    col1 = ucol_openRules(ruleset1, u_strlen(ruleset1), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, NULL,&status);
462    if (U_FAILURE(status)) {
463        log_err_status(status, "RuleBased Collator creation failed.: %s\n", myErrorName(status));
464        return;
465    }
466    else
467        log_verbose("PASS: RuleBased Collator creation passed\n");
468
469    status = U_ZERO_ERROR;
470    col2 = ucol_openRules(ruleset2, u_strlen(ruleset2),  UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, NULL, &status);
471    if (U_FAILURE(status)) {
472        log_err("RuleBased Collator creation failed.: %s\n", myErrorName(status));
473        return;
474    }
475    else
476        log_verbose("PASS: RuleBased Collator creation passed\n");
477
478
479    status = U_ZERO_ERROR;
480    col3= ucol_open(NULL, &status);
481    if (U_FAILURE(status)) {
482        log_err("Default Collator creation failed.: %s\n", myErrorName(status));
483        return;
484    }
485    else
486        log_verbose("PASS: Default Collator creation passed\n");
487
488    rule1 = ucol_getRules(col1, &tempLength);
489    rule2 = ucol_getRules(col2, &tempLength);
490    rule3 = ucol_getRules(col3, &tempLength);
491
492    doAssert((u_strcmp(rule1, rule2) != 0), "Default collator getRules failed");
493    doAssert((u_strcmp(rule2, rule3) != 0), "Default collator getRules failed");
494    doAssert((u_strcmp(rule1, rule3) != 0), "Default collator getRules failed");
495
496    col4=ucol_openRules(rule2, u_strlen(rule2), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, NULL, &status);
497    if (U_FAILURE(status)) {
498        log_err("RuleBased Collator creation failed.: %s\n", myErrorName(status));
499        return;
500    }
501    rule4= ucol_getRules(col4, &tempLength);
502    doAssert((u_strcmp(rule2, rule4) == 0), "Default collator getRules failed");
503
504    ucol_close(col1);
505    ucol_close(col2);
506    ucol_close(col3);
507    ucol_close(col4);
508
509    /* tests that modifier ! is always ignored */
510    u_uastrcpy(ruleset1, "!&a<b");
511    teststr[0] = 0x0e40;
512    teststr[1] = 0x0e01;
513    teststr[2] = 0x0e2d;
514    col1 = ucol_openRules(ruleset1, u_strlen(ruleset1), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, NULL, &status);
515    if (U_FAILURE(status)) {
516        log_err("RuleBased Collator creation failed.: %s\n", myErrorName(status));
517        return;
518    }
519    col2 = ucol_open("en_US", &status);
520    if (U_FAILURE(status)) {
521        log_err("en_US Collator creation failed.: %s\n", myErrorName(status));
522        return;
523    }
524    iter1 = ucol_openElements(col1, teststr, 3, &status);
525    iter2 = ucol_openElements(col2, teststr, 3, &status);
526    if(U_FAILURE(status)) {
527        log_err("ERROR: CollationElement iterator creation failed.: %s\n", myErrorName(status));
528        return;
529    }
530    while (TRUE) {
531        /* testing with en since thai has its own tailoring */
532        uint32_t ce = ucol_next(iter1, &status);
533        uint32_t ce2 = ucol_next(iter2, &status);
534        if(U_FAILURE(status)) {
535            log_err("ERROR: CollationElement iterator creation failed.: %s\n", myErrorName(status));
536            return;
537        }
538        if (ce2 != ce) {
539             log_err("! modifier test failed");
540        }
541        if (ce == UCOL_NULLORDER) {
542            break;
543        }
544    }
545    ucol_closeElements(iter1);
546    ucol_closeElements(iter2);
547    ucol_close(col1);
548    ucol_close(col2);
549    /* test that we can start a rule without a & or < */
550    u_uastrcpy(ruleset1, "< z < a");
551    col1 = ucol_openRules(ruleset1, u_strlen(ruleset1), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, NULL, &status);
552    if (U_FAILURE(status)) {
553        log_err("RuleBased Collator creation failed.: %s\n", myErrorName(status));
554        return;
555    }
556    u_uastrcpy(teststr, "z");
557    u_uastrcpy(teststr2, "a");
558    if (ucol_greaterOrEqual(col1, teststr, 1, teststr2, 1)) {
559        log_err("Rule \"z < a\" fails");
560    }
561    ucol_close(col1);
562}
563
564void TestCompare()
565{
566    UErrorCode status = U_ZERO_ERROR;
567    UCollator *col;
568    UChar* test1;
569    UChar* test2;
570
571    log_verbose("The compare tests begin : \n");
572    status=U_ZERO_ERROR;
573    col = ucol_open("en_US", &status);
574    if(U_FAILURE(status)) {
575        log_err_status(status, "ucal_open() collation creation failed.: %s\n", myErrorName(status));
576        return;
577    }
578    test1=(UChar*)malloc(sizeof(UChar) * 6);
579    test2=(UChar*)malloc(sizeof(UChar) * 6);
580    u_uastrcpy(test1, "Abcda");
581    u_uastrcpy(test2, "abcda");
582
583    log_verbose("Use tertiary comparison level testing ....\n");
584
585    doAssert( (!ucol_equal(col, test1, u_strlen(test1), test2, u_strlen(test2))), "Result should be \"Abcda\" != \"abcda\" ");
586    doAssert( (ucol_greater(col, test1, u_strlen(test1), test2, u_strlen(test2))), "Result should be \"Abcda\" >>> \"abcda\" ");
587    doAssert( (ucol_greaterOrEqual(col, test1, u_strlen(test1), test2, u_strlen(test2))), "Result should be \"Abcda\" >>> \"abcda\"");
588
589    ucol_setStrength(col, UCOL_SECONDARY);
590    log_verbose("Use secondary comparison level testing ....\n");
591
592    doAssert( (ucol_equal(col, test1, u_strlen(test1), test2, u_strlen(test2) )), "Result should be \"Abcda\" == \"abcda\"");
593    doAssert( (!ucol_greater(col, test1, u_strlen(test1), test2, u_strlen(test2))), "Result should be \"Abcda\" == \"abcda\"");
594    doAssert( (ucol_greaterOrEqual(col, test1, u_strlen(test1), test2, u_strlen(test2) )), "Result should be \"Abcda\" == \"abcda\"");
595
596    ucol_setStrength(col, UCOL_PRIMARY);
597    log_verbose("Use primary comparison level testing ....\n");
598
599    doAssert( (ucol_equal(col, test1, u_strlen(test1), test2, u_strlen(test2))), "Result should be \"Abcda\" == \"abcda\"");
600    doAssert( (!ucol_greater(col, test1, u_strlen(test1), test2, u_strlen(test2))), "Result should be \"Abcda\" == \"abcda\"");
601    doAssert( (ucol_greaterOrEqual(col, test1, u_strlen(test1), test2, u_strlen(test2))), "Result should be \"Abcda\" == \"abcda\"");
602
603
604    log_verbose("The compare tests end.\n");
605    ucol_close(col);
606    free(test1);
607    free(test2);
608
609}
610/*
611---------------------------------------------
612 tests decomposition setting
613*/
614void TestDecomposition() {
615    UErrorCode status = U_ZERO_ERROR;
616    UCollator *en_US, *el_GR, *vi_VN;
617    en_US = ucol_open("en_US", &status);
618    el_GR = ucol_open("el_GR", &status);
619    vi_VN = ucol_open("vi_VN", &status);
620
621    if (U_FAILURE(status)) {
622        log_err_status(status, "ERROR: collation creation failed.: %s\n", myErrorName(status));
623        return;
624    }
625
626    if (ucol_getAttribute(vi_VN, UCOL_NORMALIZATION_MODE, &status) != UCOL_ON ||
627        U_FAILURE(status))
628    {
629        log_err("ERROR: vi_VN collation did not have canonical decomposition for normalization!\n");
630    }
631
632    status = U_ZERO_ERROR;
633    if (ucol_getAttribute(el_GR, UCOL_NORMALIZATION_MODE, &status) != UCOL_ON ||
634        U_FAILURE(status))
635    {
636        log_err("ERROR: el_GR collation did not have canonical decomposition for normalization!\n");
637    }
638
639    status = U_ZERO_ERROR;
640    if (ucol_getAttribute(en_US, UCOL_NORMALIZATION_MODE, &status) != UCOL_OFF ||
641        U_FAILURE(status))
642    {
643        log_err("ERROR: en_US collation had canonical decomposition for normalization!\n");
644    }
645
646    ucol_close(en_US);
647    ucol_close(el_GR);
648    ucol_close(vi_VN);
649}
650
651#define CLONETEST_COLLATOR_COUNT 4
652
653void TestSafeClone() {
654    UChar test1[6];
655    UChar test2[6];
656    static const UChar umlautUStr[] = {0x00DC, 0};
657    static const UChar oeStr[] = {0x0055, 0x0045, 0};
658    UCollator * someCollators [CLONETEST_COLLATOR_COUNT];
659    UCollator * someClonedCollators [CLONETEST_COLLATOR_COUNT];
660    UCollator * col;
661    UErrorCode err = U_ZERO_ERROR;
662    int8_t idx = 6;    /* Leave this here to test buffer alingment in memory*/
663    uint8_t buffer [CLONETEST_COLLATOR_COUNT] [U_COL_SAFECLONE_BUFFERSIZE];
664    int32_t bufferSize = U_COL_SAFECLONE_BUFFERSIZE;
665    const char sampleRuleChars[] = "&Z < CH";
666    UChar sampleRule[sizeof(sampleRuleChars)];
667
668    if (TestBufferSize()) {
669        log_err("U_COL_SAFECLONE_BUFFERSIZE should be larger than sizeof(UCollator)\n");
670        return;
671    }
672
673    u_uastrcpy(test1, "abCda");
674    u_uastrcpy(test2, "abcda");
675    u_uastrcpy(sampleRule, sampleRuleChars);
676
677    /* one default collator & two complex ones */
678    someCollators[0] = ucol_open("en_US", &err);
679    someCollators[1] = ucol_open("ko", &err);
680    someCollators[2] = ucol_open("ja_JP", &err);
681    someCollators[3] = ucol_openRules(sampleRule, -1, UCOL_ON, UCOL_TERTIARY, NULL, &err);
682    if(U_FAILURE(err)) {
683        for (idx = 0; idx < CLONETEST_COLLATOR_COUNT; idx++) {
684            ucol_close(someCollators[idx]);
685        }
686        log_data_err("Couldn't open one or more collators\n");
687        return;
688    }
689
690    /* Check the various error & informational states: */
691
692    /* Null status - just returns NULL */
693    if (0 != ucol_safeClone(someCollators[0], buffer[0], &bufferSize, 0))
694    {
695        log_err("FAIL: Cloned Collator failed to deal correctly with null status\n");
696    }
697    /* error status - should return 0 & keep error the same */
698    err = U_MEMORY_ALLOCATION_ERROR;
699    if (0 != ucol_safeClone(someCollators[0], buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR)
700    {
701        log_err("FAIL: Cloned Collator failed to deal correctly with incoming error status\n");
702    }
703    err = U_ZERO_ERROR;
704
705    /* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/
706    if (0 != ucol_safeClone(someCollators[0], buffer[0], 0, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
707    {
708        log_err("FAIL: Cloned Collator failed to deal correctly with null bufferSize pointer\n");
709    }
710    err = U_ZERO_ERROR;
711
712    /* buffer size pointer is 0 - fill in pbufferSize with a size */
713    bufferSize = 0;
714    if (0 != ucol_safeClone(someCollators[0], buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0)
715    {
716        log_err("FAIL: Cloned Collator failed a sizing request ('preflighting')\n");
717    }
718    /* Verify our define is large enough  */
719    if (U_COL_SAFECLONE_BUFFERSIZE < bufferSize)
720    {
721        log_err("FAIL: Pre-calculated buffer size is too small\n");
722    }
723    /* Verify we can use this run-time calculated size */
724    if (0 == (col = ucol_safeClone(someCollators[0], buffer[0], &bufferSize, &err)) || U_FAILURE(err))
725    {
726        log_err("FAIL: Collator can't be cloned with run-time size\n");
727    }
728    if (col) ucol_close(col);
729    /* size one byte too small - should allocate & let us know */
730    --bufferSize;
731    if (0 == (col = ucol_safeClone(someCollators[0], 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
732    {
733        log_err("FAIL: Cloned Collator failed to deal correctly with too-small buffer size\n");
734    }
735    if (col) ucol_close(col);
736    err = U_ZERO_ERROR;
737    bufferSize = U_COL_SAFECLONE_BUFFERSIZE;
738
739
740    /* Null buffer pointer - return Collator & set error to U_SAFECLONE_ALLOCATED_ERROR */
741    if (0 == (col = ucol_safeClone(someCollators[0], 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
742    {
743        log_err("FAIL: Cloned Collator failed to deal correctly with null buffer pointer\n");
744    }
745    if (col) ucol_close(col);
746    err = U_ZERO_ERROR;
747
748    /* Null Collator - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
749    if (0 != ucol_safeClone(0, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
750    {
751        log_err("FAIL: Cloned Collator failed to deal correctly with null Collator pointer\n");
752    }
753
754    err = U_ZERO_ERROR;
755
756    /* Test that a cloned collator doesn't accidentally use UCA. */
757    col=ucol_open("de@collation=phonebook", &err);
758    bufferSize = U_COL_SAFECLONE_BUFFERSIZE;
759    someClonedCollators[0] = ucol_safeClone(col, buffer[0], &bufferSize, &err);
760    doAssert( (ucol_greater(col, umlautUStr, u_strlen(umlautUStr), oeStr, u_strlen(oeStr))), "Original German phonebook collation sorts differently than expected");
761    doAssert( (ucol_greater(someClonedCollators[0], umlautUStr, u_strlen(umlautUStr), oeStr, u_strlen(oeStr))), "Cloned German phonebook collation sorts differently than expected");
762    if (!ucol_equals(someClonedCollators[0], col)) {
763        log_err("FAIL: Cloned German phonebook collator is not equal to original.\n");
764    }
765    ucol_close(col);
766    ucol_close(someClonedCollators[0]);
767
768    err = U_ZERO_ERROR;
769
770    /* change orig & clone & make sure they are independent */
771
772    for (idx = 0; idx < CLONETEST_COLLATOR_COUNT; idx++)
773    {
774        ucol_setStrength(someCollators[idx], UCOL_IDENTICAL);
775        bufferSize = 1;
776        err = U_ZERO_ERROR;
777        ucol_close(ucol_safeClone(someCollators[idx], buffer[idx], &bufferSize, &err));
778        if (err != U_SAFECLONE_ALLOCATED_WARNING) {
779            log_err("FAIL: collator number %d was not allocated.\n", idx);
780            log_err("FAIL: status of Collator[%d] is %d  (hex: %x).\n", idx, err, err);
781        }
782
783        bufferSize = U_COL_SAFECLONE_BUFFERSIZE;
784        err = U_ZERO_ERROR;
785        someClonedCollators[idx] = ucol_safeClone(someCollators[idx], buffer[idx], &bufferSize, &err);
786        if (someClonedCollators[idx] == NULL
787            || someClonedCollators[idx] < (UCollator *)buffer[idx]
788            || someClonedCollators[idx] > (UCollator *)(buffer[idx]+(U_COL_SAFECLONE_BUFFERSIZE-1)))
789        {
790            /* TODO: The use of U_COL_SAFECLONE_BUFFERSIZE will be deprecated per #9932.
791               In the meantime, just turn the following former error into a log message. */
792            log_verbose("NOTE: Cloned collator did not use provided buffer, index %d, status %s, clone NULL? %d\n",
793                                                        idx, myErrorName(err), someClonedCollators[idx] == NULL);
794        }
795        if (!ucol_equals(someClonedCollators[idx], someCollators[idx])) {
796            log_err("FAIL: Cloned collator is not equal to original at index = %d.\n", idx);
797        }
798
799        /* Check the usability */
800        ucol_setStrength(someCollators[idx], UCOL_PRIMARY);
801        ucol_setAttribute(someCollators[idx], UCOL_CASE_LEVEL, UCOL_OFF, &err);
802
803        doAssert( (ucol_equal(someCollators[idx], test1, u_strlen(test1), test2, u_strlen(test2))), "Result should be \"abcda\" == \"abCda\"");
804
805        /* Close the original to make sure that the clone is usable. */
806        ucol_close(someCollators[idx]);
807
808        ucol_setStrength(someClonedCollators[idx], UCOL_TERTIARY);
809        ucol_setAttribute(someClonedCollators[idx], UCOL_CASE_LEVEL, UCOL_OFF, &err);
810        doAssert( (ucol_greater(someClonedCollators[idx], test1, u_strlen(test1), test2, u_strlen(test2))), "Result should be \"abCda\" >>> \"abcda\" ");
811
812        ucol_close(someClonedCollators[idx]);
813    }
814}
815
816void TestCloneBinary(){
817    UErrorCode err = U_ZERO_ERROR;
818    UCollator * col = ucol_open("en_US", &err);
819    UCollator * c;
820    int32_t size;
821    uint8_t * buffer;
822
823    if (U_FAILURE(err)) {
824        log_data_err("Couldn't open collator. Error: %s\n", u_errorName(err));
825        return;
826    }
827
828    size = ucol_cloneBinary(col, NULL, 0, &err);
829    if(size==0 || err!=U_BUFFER_OVERFLOW_ERROR) {
830        log_err("ucol_cloneBinary - couldn't check size. Error: %s\n", u_errorName(err));
831        return;
832    }
833    err = U_ZERO_ERROR;
834
835    buffer = (uint8_t *) malloc(size);
836    ucol_cloneBinary(col, buffer, size, &err);
837    if(U_FAILURE(err)) {
838        log_err("ucol_cloneBinary - couldn't clone.. Error: %s\n", u_errorName(err));
839        free(buffer);
840        return;
841    }
842
843    /* how to check binary result ? */
844
845    c = ucol_openBinary(buffer, size, col, &err);
846    if(U_FAILURE(err)) {
847        log_err("ucol_openBinary failed. Error: %s\n", u_errorName(err));
848    } else {
849        UChar t[] = {0x41, 0x42, 0x43, 0};  /* ABC */
850        uint8_t  *k1, *k2;
851        int l1, l2;
852        l1 = ucol_getSortKey(col, t, -1, NULL,0);
853        l2 = ucol_getSortKey(c, t, -1, NULL,0);
854        k1 = (uint8_t *) malloc(sizeof(uint8_t) * l1);
855        k2 = (uint8_t *) malloc(sizeof(uint8_t) * l2);
856        ucol_getSortKey(col, t, -1, k1, l1);
857        ucol_getSortKey(col, t, -1, k2, l2);
858        if (strcmp((char *)k1,(char *)k2) != 0){
859            log_err("ucol_openBinary - new collator should equal to old one\n");
860        };
861        free(k1);
862        free(k2);
863    }
864    free(buffer);
865    ucol_close(c);
866    ucol_close(col);
867}
868
869
870static void TestBengaliSortKey(void)
871{
872  const char *curLoc = "bn";
873  UChar str1[] = { 0x09BE, 0 };
874  UChar str2[] = { 0x0B70, 0 };
875  UCollator *c2 = NULL;
876  const UChar *rules;
877  int32_t rulesLength=-1;
878  uint8_t *sortKey1;
879  int32_t sortKeyLen1 = 0;
880  uint8_t *sortKey2;
881  int32_t sortKeyLen2 = 0;
882  UErrorCode status = U_ZERO_ERROR;
883  char sortKeyStr1[2048];
884  uint32_t sortKeyStrLen1 = sizeof(sortKeyStr1)/sizeof(sortKeyStr1[0]);
885  char sortKeyStr2[2048];
886  uint32_t sortKeyStrLen2 = sizeof(sortKeyStr2)/sizeof(sortKeyStr2[0]);
887  UCollationResult result;
888
889  static UChar preRules[41] = { 0x26, 0x9fa, 0x3c, 0x98c, 0x3c, 0x9e1, 0x3c, 0x98f, 0x3c, 0x990, 0x3c, 0x993, 0x3c, 0x994, 0x3c, 0x9bc, 0x3c, 0x982, 0x3c, 0x983, 0x3c, 0x981, 0x3c, 0x9b0, 0x3c, 0x9b8, 0x3c, 0x9b9, 0x3c, 0x9bd, 0x3c, 0x9be, 0x3c, 0x9bf, 0x3c, 0x9c8, 0x3c, 0x9cb, 0x3d, 0x9cb , 0};
890
891  rules = preRules;
892
893  log_verbose("Rules: %s\n", aescstrdup(rules, rulesLength));
894
895  c2 = ucol_openRules(rules, rulesLength, UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, NULL, &status);
896  if (U_FAILURE(status)) {
897    log_data_err("ERROR: Creating collator from rules failed with locale: %s : %s\n", curLoc, myErrorName(status));
898    return;
899  }
900
901  sortKeyLen1 = ucol_getSortKey(c2, str1, -1, NULL, 0);
902  sortKey1 = (uint8_t*)malloc(sortKeyLen1+1);
903  ucol_getSortKey(c2,str1,-1,sortKey1, sortKeyLen1+1);
904  ucol_sortKeyToString(c2, sortKey1, sortKeyStr1, sortKeyStrLen1);
905
906
907  sortKeyLen2 = ucol_getSortKey(c2, str2, -1, NULL, 0);
908  sortKey2 = (uint8_t*)malloc(sortKeyLen2+1);
909  ucol_getSortKey(c2,str2,-1,sortKey2, sortKeyLen2+1);
910
911  ucol_sortKeyToString(c2, sortKey2, sortKeyStr2, sortKeyStrLen2);
912
913
914
915  result=ucol_strcoll(c2, str1, -1, str2, -1);
916  if(result!=UCOL_LESS) {
917    log_err("Error: %s was not less than %s: result=%d.\n", aescstrdup(str1,-1), aescstrdup(str2,-1), result);
918    log_info("[%s] -> %s (%d, from rule)\n", aescstrdup(str1,-1), sortKeyStr1, sortKeyLen1);
919    log_info("[%s] -> %s (%d, from rule)\n", aescstrdup(str2,-1), sortKeyStr2, sortKeyLen2);
920  } else {
921    log_verbose("OK: %s was  less than %s: result=%d.\n", aescstrdup(str1,-1), aescstrdup(str2,-1), result);
922    log_verbose("[%s] -> %s (%d, from rule)\n", aescstrdup(str1,-1), sortKeyStr1, sortKeyLen1);
923    log_verbose("[%s] -> %s (%d, from rule)\n", aescstrdup(str2,-1), sortKeyStr2, sortKeyLen2);
924  }
925
926  free(sortKey1);
927  free(sortKey2);
928  ucol_close(c2);
929
930}
931
932/*
933    TestOpenVsOpenRules ensures that collators from ucol_open and ucol_openRules
934    will generate identical sort keys
935*/
936void TestOpenVsOpenRules(){
937
938    /* create an array of all the locales */
939    int32_t numLocales = uloc_countAvailable();
940    int32_t sizeOfStdSet;
941    uint32_t adder;
942    UChar str[41]; /* create an array of UChar of size maximum strSize + 1 */
943    USet *stdSet;
944    char* curLoc;
945    UCollator * c1;
946    UCollator * c2;
947    const UChar* rules;
948    int32_t rulesLength;
949    int32_t sortKeyLen1, sortKeyLen2;
950    uint8_t *sortKey1 = NULL, *sortKey2 = NULL;
951    char sortKeyStr1[512], sortKeyStr2[512];
952    uint32_t sortKeyStrLen1 = sizeof(sortKeyStr1) / sizeof(sortKeyStr1[0]),
953             sortKeyStrLen2 = sizeof(sortKeyStr2) / sizeof(sortKeyStr2[0]);
954    ULocaleData *uld;
955    int32_t x, y, z;
956    USet *eSet;
957    int32_t eSize;
958    int strSize;
959
960    UErrorCode err = U_ZERO_ERROR;
961
962    /* create a set of standard characters that aren't very interesting...
963    and then we can find some interesting ones later */
964
965    stdSet = uset_open(0x61, 0x7A);
966    uset_addRange(stdSet, 0x41, 0x5A);
967    uset_addRange(stdSet, 0x30, 0x39);
968    sizeOfStdSet = uset_size(stdSet);
969
970    adder = 1;
971    if(getTestOption(QUICK_OPTION))
972    {
973        adder = 10;
974    }
975
976    for(x = 0; x < numLocales; x+=adder){
977        curLoc = (char *)uloc_getAvailable(x);
978        log_verbose("Processing %s\n", curLoc);
979
980        /* create a collator the normal API way */
981        c1 = ucol_open(curLoc, &err);
982        if (U_FAILURE(err)) {
983            log_err("ERROR: Normal collation creation failed with locale: %s : %s\n", curLoc, myErrorName(err));
984            return;
985        }
986
987        /* grab the rules */
988        rules = ucol_getRules(c1, &rulesLength);
989
990        /* use those rules to create a collator from rules */
991        c2 = ucol_openRules(rules, rulesLength, UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, NULL, &err);
992        if (U_FAILURE(err)) {
993            log_err("ERROR: Creating collator from rules failed with locale: %s : %s\n", curLoc, myErrorName(err));
994            return;
995        }
996
997        uld = ulocdata_open(curLoc, &err);
998
999        /*now that we have some collators, we get several strings */
1000
1001        for(y = 0; y < 5; y++){
1002
1003            /* get a set of ALL the characters in this locale */
1004            eSet =  ulocdata_getExemplarSet(uld, NULL, 0, ULOCDATA_ES_STANDARD, &err);
1005            eSize = uset_size(eSet);
1006
1007            /* make a string with these characters in it */
1008            strSize = (rand()%40) + 1;
1009
1010            for(z = 0; z < strSize; z++){
1011                str[z] = uset_charAt(eSet, rand()%eSize);
1012            }
1013
1014            /* change the set to only include 'abnormal' characters (not A-Z, a-z, 0-9 */
1015            uset_removeAll(eSet, stdSet);
1016            eSize = uset_size(eSet);
1017
1018            /* if there are some non-normal characters left, put a few into the string, just to make sure we have some */
1019            if(eSize > 0){
1020                str[2%strSize] = uset_charAt(eSet, rand()%eSize);
1021                str[3%strSize] = uset_charAt(eSet, rand()%eSize);
1022                str[5%strSize] = uset_charAt(eSet, rand()%eSize);
1023                str[10%strSize] = uset_charAt(eSet, rand()%eSize);
1024                str[13%strSize] = uset_charAt(eSet, rand()%eSize);
1025            }
1026            /* terminate the string */
1027            str[strSize-1] = '\0';
1028            log_verbose("String used: %S\n", str);
1029
1030            /* get sort keys for both of them, and check that the keys are identicle */
1031            sortKeyLen1 = ucol_getSortKey(c1, str, u_strlen(str),  NULL, 0);
1032            sortKey1 = (uint8_t*)malloc(sizeof(uint8_t) * (sortKeyLen1 + 1));
1033            /*memset(sortKey1, 0xFE, sortKeyLen1);*/
1034            ucol_getSortKey(c1, str, u_strlen(str), sortKey1, sortKeyLen1 + 1);
1035            ucol_sortKeyToString(c1, sortKey1, sortKeyStr1, sortKeyStrLen1);
1036
1037            sortKeyLen2 = ucol_getSortKey(c2, str, u_strlen(str),  NULL, 0);
1038            sortKey2 = (uint8_t*)malloc(sizeof(uint8_t) * (sortKeyLen2 + 1));
1039            /*memset(sortKey2, 0xFE, sortKeyLen2);*/
1040            ucol_getSortKey(c2, str, u_strlen(str), sortKey2, sortKeyLen2 + 1);
1041            ucol_sortKeyToString(c2, sortKey2, sortKeyStr2, sortKeyStrLen2);
1042
1043            /* Check that the lengths are the same */
1044            if (sortKeyLen1 != sortKeyLen2) {
1045                log_err("ERROR : Sort key lengths %d and %d for text '%s' in locale '%s' do not match.\n",
1046                    sortKeyLen1, sortKeyLen2, str, curLoc);
1047            }
1048
1049            /* check that the keys are the same */
1050            if (memcmp(sortKey1, sortKey2, sortKeyLen1) != 0) {
1051                log_err("ERROR : Sort keys '%s' and '%s' for text '%s' in locale '%s' are not equivalent.\n",
1052                    sortKeyStr1, sortKeyStr2, str, curLoc);
1053            }
1054
1055            /* clean up after each string */
1056            free(sortKey1);
1057            free(sortKey2);
1058            uset_close(eSet);
1059        }
1060        /* clean up after each locale */
1061        ulocdata_close(uld);
1062        ucol_close(c1);
1063        ucol_close(c2);
1064    }
1065    /* final clean up */
1066    uset_close(stdSet);
1067}
1068/*
1069----------------------------------------------------------------------------
1070 ctor -- Tests the getSortKey
1071*/
1072void TestSortKey()
1073{
1074    uint8_t *sortk1 = NULL, *sortk2 = NULL, *sortk3 = NULL, *sortkEmpty = NULL;
1075    int32_t sortklen, osortklen;
1076    UCollator *col;
1077    UChar *test1, *test2, *test3;
1078    UErrorCode status = U_ZERO_ERROR;
1079    char toStringBuffer[256], *resultP;
1080    uint32_t toStringLen=sizeof(toStringBuffer)/sizeof(toStringBuffer[0]);
1081
1082
1083    uint8_t s1[] = { 0x9f, 0x00 };
1084    uint8_t s2[] = { 0x61, 0x00 };
1085    int  strcmpResult;
1086
1087    strcmpResult = strcmp((const char *)s1, (const char *)s2);
1088    log_verbose("strcmp(0x9f..., 0x61...) = %d\n", strcmpResult);
1089
1090    if(strcmpResult <= 0) {
1091      log_err("ERR: expected strcmp(\"9f 00\", \"61 00\") to be >=0 (GREATER).. got %d. Calling strcmp() for sortkeys may not work! \n",
1092              strcmpResult);
1093    }
1094
1095
1096    log_verbose("testing SortKey begins...\n");
1097    /* this is supposed to open default date format, but later on it treats it like it is "en_US"
1098       - very bad if you try to run the tests on machine where default locale is NOT "en_US" */
1099    /* col = ucol_open(NULL, &status); */
1100    col = ucol_open("en_US", &status);
1101    if (U_FAILURE(status)) {
1102        log_err_status(status, "ERROR: Default collation creation failed.: %s\n", myErrorName(status));
1103        return;
1104    }
1105
1106
1107    if(ucol_getStrength(col) != UCOL_DEFAULT_STRENGTH)
1108    {
1109        log_err("ERROR: default collation did not have UCOL_DEFAULT_STRENGTH !\n");
1110    }
1111    /* Need to use identical strength */
1112    ucol_setAttribute(col, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
1113
1114    test1=(UChar*)malloc(sizeof(UChar) * 6);
1115    test2=(UChar*)malloc(sizeof(UChar) * 6);
1116    test3=(UChar*)malloc(sizeof(UChar) * 6);
1117
1118    memset(test1,0xFE, sizeof(UChar)*6);
1119    memset(test2,0xFE, sizeof(UChar)*6);
1120    memset(test3,0xFE, sizeof(UChar)*6);
1121
1122
1123    u_uastrcpy(test1, "Abcda");
1124    u_uastrcpy(test2, "abcda");
1125    u_uastrcpy(test3, "abcda");
1126
1127    log_verbose("Use tertiary comparison level testing ....\n");
1128
1129    sortklen=ucol_getSortKey(col, test1, u_strlen(test1),  NULL, 0);
1130    sortk1=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen+1));
1131    memset(sortk1,0xFE, sortklen);
1132    ucol_getSortKey(col, test1, u_strlen(test1), sortk1, sortklen+1);
1133
1134    sortklen=ucol_getSortKey(col, test2, u_strlen(test2),  NULL, 0);
1135    sortk2=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen+1));
1136    memset(sortk2,0xFE, sortklen);
1137    ucol_getSortKey(col, test2, u_strlen(test2), sortk2, sortklen+1);
1138
1139    osortklen = sortklen;
1140    sortklen=ucol_getSortKey(col, test2, u_strlen(test3),  NULL, 0);
1141    sortk3=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen+1));
1142    memset(sortk3,0xFE, sortklen);
1143    ucol_getSortKey(col, test2, u_strlen(test2), sortk3, sortklen+1);
1144
1145    doAssert( (sortklen == osortklen), "Sortkey length should be the same (abcda, abcda)");
1146
1147    doAssert( (memcmp(sortk1, sortk2, sortklen) > 0), "Result should be \"Abcda\" > \"abcda\"");
1148    doAssert( (memcmp(sortk2, sortk1, sortklen) < 0), "Result should be \"abcda\" < \"Abcda\"");
1149    doAssert( (memcmp(sortk2, sortk3, sortklen) == 0), "Result should be \"abcda\" ==  \"abcda\"");
1150
1151    resultP = ucol_sortKeyToString(col, sortk3, toStringBuffer, toStringLen);
1152    doAssert( (resultP != 0), "sortKeyToString failed!");
1153
1154#if 1 /* verobse log of sortkeys */
1155    {
1156      char junk2[1000];
1157      char junk3[1000];
1158      int i;
1159
1160      strcpy(junk2, "abcda[2] ");
1161      strcpy(junk3, " abcda[3] ");
1162
1163      for(i=0;i<sortklen;i++)
1164        {
1165          sprintf(junk2+strlen(junk2), "%02X ",(int)( 0xFF & sortk2[i]));
1166          sprintf(junk3+strlen(junk3), "%02X ",(int)( 0xFF & sortk3[i]));
1167        }
1168
1169      log_verbose("%s\n", junk2);
1170      log_verbose("%s\n", junk3);
1171    }
1172#endif
1173
1174    free(sortk1);
1175    free(sortk2);
1176    free(sortk3);
1177
1178    log_verbose("Use secondary comparision level testing ...\n");
1179    ucol_setStrength(col, UCOL_SECONDARY);
1180    sortklen=ucol_getSortKey(col, test1, u_strlen(test1),  NULL, 0);
1181    sortk1=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen+1));
1182    ucol_getSortKey(col, test1, u_strlen(test1), sortk1, sortklen+1);
1183    sortklen=ucol_getSortKey(col, test2, u_strlen(test2),  NULL, 0);
1184    sortk2=(uint8_t*)malloc(sizeof(uint8_t) * (sortklen+1));
1185    ucol_getSortKey(col, test2, u_strlen(test2), sortk2, sortklen+1);
1186
1187    doAssert( !(memcmp(sortk1, sortk2, sortklen) > 0), "Result should be \"Abcda\" == \"abcda\"");
1188    doAssert( !(memcmp(sortk2, sortk1, sortklen) < 0), "Result should be \"abcda\" == \"Abcda\"");
1189    doAssert( (memcmp(sortk1, sortk2, sortklen) == 0), "Result should be \"abcda\" ==  \"abcda\"");
1190
1191    log_verbose("getting sortkey for an empty string\n");
1192    ucol_setAttribute(col, UCOL_STRENGTH, UCOL_TERTIARY, &status);
1193    sortklen = ucol_getSortKey(col, test1, 0, NULL, 0);
1194    sortkEmpty = (uint8_t*)malloc(sizeof(uint8_t) * sortklen+1);
1195    sortklen = ucol_getSortKey(col, test1, 0, sortkEmpty, sortklen+1);
1196    if(sortklen != 3 || sortkEmpty[0] != 1 || sortkEmpty[0] != 1 || sortkEmpty[2] != 0) {
1197      log_err("Empty string generated wrong sortkey!\n");
1198    }
1199    free(sortkEmpty);
1200
1201    log_verbose("testing passing invalid string\n");
1202    sortklen = ucol_getSortKey(col, NULL, 0, NULL, 0);
1203    if(sortklen != 0) {
1204      log_err("Invalid string didn't return sortkey size of 0\n");
1205    }
1206
1207
1208    log_verbose("testing sortkey ends...\n");
1209    ucol_close(col);
1210    free(test1);
1211    free(test2);
1212    free(test3);
1213    free(sortk1);
1214    free(sortk2);
1215
1216}
1217void TestHashCode()
1218{
1219    uint8_t *sortk1, *sortk2, *sortk3;
1220    int32_t sortk1len, sortk2len, sortk3len;
1221    UCollator *col;
1222    UChar *test1, *test2, *test3;
1223    UErrorCode status = U_ZERO_ERROR;
1224    log_verbose("testing getHashCode begins...\n");
1225    col = ucol_open("en_US", &status);
1226    if (U_FAILURE(status)) {
1227        log_err_status(status, "ERROR: Default collation creation failed.: %s\n", myErrorName(status));
1228        return;
1229    }
1230    test1=(UChar*)malloc(sizeof(UChar) * 6);
1231    test2=(UChar*)malloc(sizeof(UChar) * 6);
1232    test3=(UChar*)malloc(sizeof(UChar) * 6);
1233    u_uastrcpy(test1, "Abcda");
1234    u_uastrcpy(test2, "abcda");
1235    u_uastrcpy(test3, "abcda");
1236
1237    log_verbose("Use tertiary comparison level testing ....\n");
1238    sortk1len=ucol_getSortKey(col, test1, u_strlen(test1),  NULL, 0);
1239    sortk1=(uint8_t*)malloc(sizeof(uint8_t) * (sortk1len+1));
1240    ucol_getSortKey(col, test1, u_strlen(test1), sortk1, sortk1len+1);
1241    sortk2len=ucol_getSortKey(col, test2, u_strlen(test2),  NULL, 0);
1242    sortk2=(uint8_t*)malloc(sizeof(uint8_t) * (sortk2len+1));
1243    ucol_getSortKey(col, test2, u_strlen(test2), sortk2, sortk2len+1);
1244    sortk3len=ucol_getSortKey(col, test2, u_strlen(test3),  NULL, 0);
1245    sortk3=(uint8_t*)malloc(sizeof(uint8_t) * (sortk3len+1));
1246    ucol_getSortKey(col, test2, u_strlen(test2), sortk3, sortk3len+1);
1247
1248
1249    log_verbose("ucol_hashCode() testing ...\n");
1250
1251    doAssert( ucol_keyHashCode(sortk1, sortk1len) != ucol_keyHashCode(sortk2, sortk2len), "Hash test1 result incorrect" );
1252    doAssert( !(ucol_keyHashCode(sortk1, sortk1len) == ucol_keyHashCode(sortk2, sortk2len)), "Hash test2 result incorrect" );
1253    doAssert( ucol_keyHashCode(sortk2, sortk2len) == ucol_keyHashCode(sortk3, sortk3len), "Hash result not equal" );
1254
1255    log_verbose("hashCode tests end.\n");
1256    ucol_close(col);
1257    free(sortk1);
1258    free(sortk2);
1259    free(sortk3);
1260    free(test1);
1261    free(test2);
1262    free(test3);
1263
1264
1265}
1266/*
1267 *----------------------------------------------------------------------------
1268 * Tests the UCollatorElements API.
1269 *
1270 */
1271void TestElemIter()
1272{
1273    int32_t offset;
1274    int32_t order1, order2, order3;
1275    UChar *testString1, *testString2;
1276    UCollator *col;
1277    UCollationElements *iterator1, *iterator2, *iterator3;
1278    UErrorCode status = U_ZERO_ERROR;
1279    log_verbose("testing UCollatorElements begins...\n");
1280    col = ucol_open("en_US", &status);
1281    ucol_setAttribute(col, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1282    if (U_FAILURE(status)) {
1283        log_err_status(status, "ERROR: Default collation creation failed.: %s\n", myErrorName(status));
1284        return;
1285    }
1286
1287    testString1=(UChar*)malloc(sizeof(UChar) * 150);
1288    testString2=(UChar*)malloc(sizeof(UChar) * 150);
1289    u_uastrcpy(testString1, "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?");
1290    u_uastrcpy(testString2, "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?");
1291
1292    log_verbose("Constructors and comparison testing....\n");
1293
1294    iterator1 = ucol_openElements(col, testString1, u_strlen(testString1), &status);
1295    if(U_FAILURE(status)) {
1296        log_err("ERROR: Default collationElement iterator creation failed.: %s\n", myErrorName(status));
1297        ucol_close(col);
1298        return;
1299    }
1300    else{ log_verbose("PASS: Default collationElement iterator1 creation passed\n");}
1301
1302    iterator2 = ucol_openElements(col, testString1, u_strlen(testString1), &status);
1303    if(U_FAILURE(status)) {
1304        log_err("ERROR: Default collationElement iterator creation failed.: %s\n", myErrorName(status));
1305        ucol_close(col);
1306        return;
1307    }
1308    else{ log_verbose("PASS: Default collationElement iterator2 creation passed\n");}
1309
1310    iterator3 = ucol_openElements(col, testString2, u_strlen(testString2), &status);
1311    if(U_FAILURE(status)) {
1312        log_err("ERROR: Default collationElement iterator creation failed.: %s\n", myErrorName(status));
1313        ucol_close(col);
1314        return;
1315    }
1316    else{ log_verbose("PASS: Default collationElement iterator3 creation passed\n");}
1317
1318    offset=ucol_getOffset(iterator1);
1319    ucol_setOffset(iterator1, 6, &status);
1320    if (U_FAILURE(status)) {
1321        log_err("Error in setOffset for UCollatorElements iterator.: %s\n", myErrorName(status));
1322        return;
1323    }
1324    if(ucol_getOffset(iterator1)==6)
1325        log_verbose("setOffset and getOffset working fine\n");
1326    else{
1327        log_err("error in set and get Offset got %d instead of 6\n", ucol_getOffset(iterator1));
1328    }
1329
1330    ucol_setOffset(iterator1, 0, &status);
1331    order1 = ucol_next(iterator1, &status);
1332    if (U_FAILURE(status)) {
1333        log_err("Somehow ran out of memory stepping through the iterator1.: %s\n", myErrorName(status));
1334        return;
1335    }
1336    order2=ucol_getOffset(iterator2);
1337    doAssert((order1 != order2), "The first iterator advance failed");
1338    order2 = ucol_next(iterator2, &status);
1339    if (U_FAILURE(status)) {
1340        log_err("Somehow ran out of memory stepping through the iterator2.: %s\n", myErrorName(status));
1341        return;
1342    }
1343    order3 = ucol_next(iterator3, &status);
1344    if (U_FAILURE(status)) {
1345        log_err("Somehow ran out of memory stepping through the iterator3.: %s\n", myErrorName(status));
1346        return;
1347    }
1348
1349    doAssert((order1 == order2), "The second iterator advance failed should be the same as first one");
1350
1351doAssert( (ucol_primaryOrder(order1) == ucol_primaryOrder(order3)), "The primary orders should be identical");
1352doAssert( (ucol_secondaryOrder(order1) == ucol_secondaryOrder(order3)), "The secondary orders should be identical");
1353doAssert( (ucol_tertiaryOrder(order1) == ucol_tertiaryOrder(order3)), "The tertiary orders should be identical");
1354
1355    order1=ucol_next(iterator1, &status);
1356    if (U_FAILURE(status)) {
1357        log_err("Somehow ran out of memory stepping through the iterator2.: %s\n", myErrorName(status));
1358        return;
1359    }
1360    order3=ucol_next(iterator3, &status);
1361    if (U_FAILURE(status)) {
1362        log_err("Somehow ran out of memory stepping through the iterator2.: %s\n", myErrorName(status));
1363        return;
1364    }
1365doAssert( (ucol_primaryOrder(order1) == ucol_primaryOrder(order3)), "The primary orders should be identical");
1366doAssert( (ucol_tertiaryOrder(order1) != ucol_tertiaryOrder(order3)), "The tertiary orders should be different");
1367
1368    order1=ucol_next(iterator1, &status);
1369    if (U_FAILURE(status)) {
1370        log_err("Somehow ran out of memory stepping through the iterator2.: %s\n", myErrorName(status));
1371        return;
1372    }
1373    order3=ucol_next(iterator3, &status);
1374    if (U_FAILURE(status)) {
1375        log_err("Somehow ran out of memory stepping through the iterator2.: %s\n", myErrorName(status));
1376        return;
1377    }
1378    /* this here, my friends, is either pure lunacy or something so obsolete that even it's mother
1379     * doesn't care about it. Essentialy, this test complains if secondary values for 'I' and '_'
1380     * are the same. According to the UCA, this is not true. Therefore, remove the test.
1381     * Besides, if primary strengths for two code points are different, it doesn't matter one bit
1382     * what is the relation between secondary or any other strengths.
1383     * killed by weiv 06/11/2002.
1384     */
1385    /*
1386    doAssert( ((order1 & UCOL_SECONDARYMASK) != (order3 & UCOL_SECONDARYMASK)), "The secondary orders should be different");
1387    */
1388    doAssert( (order1 != UCOL_NULLORDER), "Unexpected end of iterator reached");
1389
1390    free(testString1);
1391    free(testString2);
1392    ucol_closeElements(iterator1);
1393    ucol_closeElements(iterator2);
1394    ucol_closeElements(iterator3);
1395    ucol_close(col);
1396
1397    log_verbose("testing CollationElementIterator ends...\n");
1398}
1399
1400void TestGetLocale() {
1401  UErrorCode status = U_ZERO_ERROR;
1402  const char *rules = "&a<x<y<z";
1403  UChar rlz[256] = {0};
1404  uint32_t rlzLen = u_unescape(rules, rlz, 256);
1405
1406  UCollator *coll = NULL;
1407  const char *locale = NULL;
1408
1409  int32_t i = 0;
1410
1411  /* Now that the collation tree is separate, actual==valid at all times. [alan] */
1412  static const struct {
1413    const char* requestedLocale;
1414    const char* validLocale;
1415    const char* actualLocale;
1416  } testStruct[] = {
1417    { "sr_RS", "sr_Cyrl_RS", "sr" },
1418    { "sh_YU", "sr_Latn_RS", "sr_Latn" }, /* was sh, then aliased to hr, now sr_Latn via import per cldrbug 5647: */
1419    { "en_BE_FOO", "en_BE", "root" },
1420    { "de_DE_NONEXISTANT", "de_DE", "de" }
1421  };
1422
1423  /* test opening collators for different locales */
1424  for(i = 0; i<sizeof(testStruct)/sizeof(testStruct[0]); i++) {
1425    status = U_ZERO_ERROR;
1426    coll = ucol_open(testStruct[i].requestedLocale, &status);
1427    if(U_FAILURE(status)) {
1428      log_err_status(status, "Failed to open collator for %s with %s\n", testStruct[i].requestedLocale, u_errorName(status));
1429      ucol_close(coll);
1430      continue;
1431    }
1432   locale = ucol_getLocaleByType(coll, ULOC_REQUESTED_LOCALE, &status);
1433    if(strcmp(locale, testStruct[i].requestedLocale) != 0) {
1434      log_err("[Coll %s]: Error in requested locale, expected %s, got %s\n", testStruct[i].requestedLocale, testStruct[i].requestedLocale, locale);
1435    }
1436    locale = ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status);
1437    if(strcmp(locale, testStruct[i].validLocale) != 0) {
1438      log_err("[Coll %s]: Error in valid locale, expected %s, got %s\n", testStruct[i].requestedLocale, testStruct[i].validLocale, locale);
1439    }
1440    locale = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, &status);
1441    if(strcmp(locale, testStruct[i].actualLocale) != 0) {
1442      log_err("[Coll %s]: Error in actual locale, expected %s, got %s\n", testStruct[i].requestedLocale, testStruct[i].actualLocale, locale);
1443    }
1444    ucol_close(coll);
1445  }
1446
1447  /* completely non-existant locale for collator should get a default collator */
1448  {
1449    UCollator *defaultColl = ucol_open(NULL, &status);
1450    coll = ucol_open("blahaha", &status);
1451    if(U_SUCCESS(status)) {
1452      if(strcmp(ucol_getLocaleByType(coll, ULOC_REQUESTED_LOCALE, &status), "blahaha")) {
1453        log_err("Nonexisting locale didn't preserve the requested locale\n");
1454      }
1455      if(strcmp(ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status),
1456        ucol_getLocaleByType(defaultColl, ULOC_VALID_LOCALE, &status))) {
1457        log_err("Valid locale for nonexisting locale locale collator differs "
1458          "from valid locale for default collator\n");
1459      }
1460      if(strcmp(ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, &status),
1461        ucol_getLocaleByType(defaultColl, ULOC_ACTUAL_LOCALE, &status))) {
1462        log_err("Actual locale for nonexisting locale locale collator differs "
1463          "from actual locale for default collator\n");
1464      }
1465      ucol_close(coll);
1466      ucol_close(defaultColl);
1467    } else {
1468      log_data_err("Couldn't open collators\n");
1469    }
1470  }
1471
1472
1473
1474  /* collator instantiated from rules should have all three locales NULL */
1475  coll = ucol_openRules(rlz, rlzLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
1476  locale = ucol_getLocaleByType(coll, ULOC_REQUESTED_LOCALE, &status);
1477  if(locale != NULL) {
1478    log_err("For collator instantiated from rules, requested locale returned %s instead of NULL\n", locale);
1479  }
1480  locale = ucol_getLocaleByType(coll, ULOC_VALID_LOCALE, &status);
1481  if(locale != NULL) {
1482    log_err("For collator instantiated from rules,  valid locale returned %s instead of NULL\n", locale);
1483  }
1484  locale = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, &status);
1485  if(locale != NULL) {
1486    log_err("For collator instantiated from rules, actual locale returned %s instead of NULL\n", locale);
1487  }
1488  ucol_close(coll);
1489
1490}
1491
1492
1493void TestGetAll()
1494{
1495    int32_t i, count;
1496    count=ucol_countAvailable();
1497    /* use something sensible w/o hardcoding the count */
1498    if(count < 0){
1499        log_err("Error in countAvailable(), it returned %d\n", count);
1500    }
1501    else{
1502        log_verbose("PASS: countAvailable() successful, it returned %d\n", count);
1503    }
1504    for(i=0;i<count;i++)
1505        log_verbose("%s\n", ucol_getAvailable(i));
1506
1507
1508}
1509
1510
1511struct teststruct {
1512    const char *original;
1513    uint8_t key[256];
1514} ;
1515
1516static int compare_teststruct(const void *string1, const void *string2) {
1517    return(strcmp((const char *)((struct teststruct *)string1)->key, (const char *)((struct teststruct *)string2)->key));
1518}
1519
1520void TestBounds() {
1521    UErrorCode status = U_ZERO_ERROR;
1522
1523    UCollator *coll = ucol_open("sh", &status);
1524
1525    uint8_t sortkey[512], lower[512], upper[512];
1526    UChar buffer[512];
1527
1528    static const char * const test[] = {
1529        "John Smith",
1530        "JOHN SMITH",
1531        "john SMITH",
1532        "j\\u00F6hn sm\\u00EFth",
1533        "J\\u00F6hn Sm\\u00EFth",
1534        "J\\u00D6HN SM\\u00CFTH",
1535        "john smithsonian",
1536        "John Smithsonian",
1537    };
1538
1539    struct teststruct tests[] = {
1540        {"\\u010CAKI MIHALJ" } ,
1541        {"\\u010CAKI MIHALJ" } ,
1542        {"\\u010CAKI PIRO\\u0160KA" },
1543        {"\\u010CABAI ANDRIJA" } ,
1544        {"\\u010CABAI LAJO\\u0160" } ,
1545        {"\\u010CABAI MARIJA" } ,
1546        {"\\u010CABAI STEVAN" } ,
1547        {"\\u010CABAI STEVAN" } ,
1548        {"\\u010CABARKAPA BRANKO" } ,
1549        {"\\u010CABARKAPA MILENKO" } ,
1550        {"\\u010CABARKAPA MIROSLAV" } ,
1551        {"\\u010CABARKAPA SIMO" } ,
1552        {"\\u010CABARKAPA STANKO" } ,
1553        {"\\u010CABARKAPA TAMARA" } ,
1554        {"\\u010CABARKAPA TOMA\\u0160" } ,
1555        {"\\u010CABDARI\\u0106 NIKOLA" } ,
1556        {"\\u010CABDARI\\u0106 ZORICA" } ,
1557        {"\\u010CABI NANDOR" } ,
1558        {"\\u010CABOVI\\u0106 MILAN" } ,
1559        {"\\u010CABRADI AGNEZIJA" } ,
1560        {"\\u010CABRADI IVAN" } ,
1561        {"\\u010CABRADI JELENA" } ,
1562        {"\\u010CABRADI LJUBICA" } ,
1563        {"\\u010CABRADI STEVAN" } ,
1564        {"\\u010CABRDA MARTIN" } ,
1565        {"\\u010CABRILO BOGDAN" } ,
1566        {"\\u010CABRILO BRANISLAV" } ,
1567        {"\\u010CABRILO LAZAR" } ,
1568        {"\\u010CABRILO LJUBICA" } ,
1569        {"\\u010CABRILO SPASOJA" } ,
1570        {"\\u010CADE\\u0160 ZDENKA" } ,
1571        {"\\u010CADESKI BLAGOJE" } ,
1572        {"\\u010CADOVSKI VLADIMIR" } ,
1573        {"\\u010CAGLJEVI\\u0106 TOMA" } ,
1574        {"\\u010CAGOROVI\\u0106 VLADIMIR" } ,
1575        {"\\u010CAJA VANKA" } ,
1576        {"\\u010CAJI\\u0106 BOGOLJUB" } ,
1577        {"\\u010CAJI\\u0106 BORISLAV" } ,
1578        {"\\u010CAJI\\u0106 RADOSLAV" } ,
1579        {"\\u010CAK\\u0160IRAN MILADIN" } ,
1580        {"\\u010CAKAN EUGEN" } ,
1581        {"\\u010CAKAN EVGENIJE" } ,
1582        {"\\u010CAKAN IVAN" } ,
1583        {"\\u010CAKAN JULIJAN" } ,
1584        {"\\u010CAKAN MIHAJLO" } ,
1585        {"\\u010CAKAN STEVAN" } ,
1586        {"\\u010CAKAN VLADIMIR" } ,
1587        {"\\u010CAKAN VLADIMIR" } ,
1588        {"\\u010CAKAN VLADIMIR" } ,
1589        {"\\u010CAKARA ANA" } ,
1590        {"\\u010CAKAREVI\\u0106 MOMIR" } ,
1591        {"\\u010CAKAREVI\\u0106 NEDELJKO" } ,
1592        {"\\u010CAKI \\u0160ANDOR" } ,
1593        {"\\u010CAKI AMALIJA" } ,
1594        {"\\u010CAKI ANDRA\\u0160" } ,
1595        {"\\u010CAKI LADISLAV" } ,
1596        {"\\u010CAKI LAJO\\u0160" } ,
1597        {"\\u010CAKI LASLO" } ,
1598    };
1599
1600
1601
1602    int32_t i = 0, j = 0, k = 0, buffSize = 0, skSize = 0, lowerSize = 0, upperSize = 0;
1603    int32_t arraySize = sizeof(tests)/sizeof(tests[0]);
1604
1605    if(U_SUCCESS(status) && coll) {
1606        for(i = 0; i<arraySize; i++) {
1607            buffSize = u_unescape(tests[i].original, buffer, 512);
1608            skSize = ucol_getSortKey(coll, buffer, buffSize, tests[i].key, 512);
1609        }
1610
1611        qsort(tests, arraySize, sizeof(struct teststruct), compare_teststruct);
1612
1613        for(i = 0; i < arraySize-1; i++) {
1614            for(j = i+1; j < arraySize; j++) {
1615                lowerSize = ucol_getBound(tests[i].key, -1, UCOL_BOUND_LOWER, 1, lower, 512, &status);
1616                upperSize = ucol_getBound(tests[j].key, -1, UCOL_BOUND_UPPER, 1, upper, 512, &status);
1617                for(k = i; k <= j; k++) {
1618                    if(strcmp((const char *)lower, (const char *)tests[k].key) > 0) {
1619                        log_err("Problem with lower! j = %i (%s vs %s)\n", k, tests[k].original, tests[i].original);
1620                    }
1621                    if(strcmp((const char *)upper, (const char *)tests[k].key) <= 0) {
1622                        log_err("Problem with upper! j = %i (%s vs %s)\n", k, tests[k].original, tests[j].original);
1623                    }
1624                }
1625            }
1626        }
1627
1628
1629#if 0
1630        for(i = 0; i < 1000; i++) {
1631            lowerRND = (rand()/(RAND_MAX/arraySize));
1632            upperRND = lowerRND + (rand()/(RAND_MAX/(arraySize-lowerRND)));
1633
1634            lowerSize = ucol_getBound(tests[lowerRND].key, -1, UCOL_BOUND_LOWER, 1, lower, 512, &status);
1635            upperSize = ucol_getBound(tests[upperRND].key, -1, UCOL_BOUND_UPPER_LONG, 1, upper, 512, &status);
1636
1637            for(j = lowerRND; j<=upperRND; j++) {
1638                if(strcmp(lower, tests[j].key) > 0) {
1639                    log_err("Problem with lower! j = %i (%s vs %s)\n", j, tests[j].original, tests[lowerRND].original);
1640                }
1641                if(strcmp(upper, tests[j].key) <= 0) {
1642                    log_err("Problem with upper! j = %i (%s vs %s)\n", j, tests[j].original, tests[upperRND].original);
1643                }
1644            }
1645        }
1646#endif
1647
1648
1649
1650
1651
1652        for(i = 0; i<sizeof(test)/sizeof(test[0]); i++) {
1653            buffSize = u_unescape(test[i], buffer, 512);
1654            skSize = ucol_getSortKey(coll, buffer, buffSize, sortkey, 512);
1655            lowerSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_LOWER, 1, lower, 512, &status);
1656            upperSize = ucol_getBound(sortkey, skSize, UCOL_BOUND_UPPER_LONG, 1, upper, 512, &status);
1657            for(j = i+1; j<sizeof(test)/sizeof(test[0]); j++) {
1658                buffSize = u_unescape(test[j], buffer, 512);
1659                skSize = ucol_getSortKey(coll, buffer, buffSize, sortkey, 512);
1660                if(strcmp((const char *)lower, (const char *)sortkey) > 0) {
1661                    log_err("Problem with lower! i = %i, j = %i (%s vs %s)\n", i, j, test[i], test[j]);
1662                }
1663                if(strcmp((const char *)upper, (const char *)sortkey) <= 0) {
1664                    log_err("Problem with upper! i = %i, j = %i (%s vs %s)\n", i, j, test[i], test[j]);
1665                }
1666            }
1667        }
1668        ucol_close(coll);
1669    } else {
1670        log_data_err("Couldn't open collator\n");
1671    }
1672
1673}
1674
1675static void doOverrunTest(UCollator *coll, const UChar *uString, int32_t strLen) {
1676    int32_t skLen = 0, skLen2 = 0;
1677    uint8_t sortKey[256];
1678    int32_t i, j;
1679    uint8_t filler = 0xFF;
1680
1681    skLen = ucol_getSortKey(coll, uString, strLen, NULL, 0);
1682
1683    for(i = 0; i < skLen; i++) {
1684        memset(sortKey, filler, 256);
1685        skLen2 = ucol_getSortKey(coll, uString, strLen, sortKey, i);
1686        if(skLen != skLen2) {
1687            log_err("For buffer size %i, got different sortkey length. Expected %i got %i\n", i, skLen, skLen2);
1688        }
1689        for(j = i; j < 256; j++) {
1690            if(sortKey[j] != filler) {
1691                log_err("Something run over index %i\n", j);
1692                break;
1693            }
1694        }
1695    }
1696}
1697
1698/* j1865 reports that if a shorter buffer is passed to
1699* to get sort key, a buffer overrun happens in some
1700* cases. This test tries to check this.
1701*/
1702void TestSortKeyBufferOverrun(void) {
1703    UErrorCode status = U_ZERO_ERROR;
1704    const char* cString = "A very Merry liTTle-lamB..";
1705    UChar uString[256];
1706    int32_t strLen = 0;
1707    UCollator *coll = ucol_open("root", &status);
1708    strLen = u_unescape(cString, uString, 256);
1709
1710    if(U_SUCCESS(status)) {
1711        log_verbose("testing non ignorable\n");
1712        ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
1713        doOverrunTest(coll, uString, strLen);
1714
1715        log_verbose("testing shifted\n");
1716        ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1717        doOverrunTest(coll, uString, strLen);
1718
1719        log_verbose("testing shifted quaternary\n");
1720        ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1721        doOverrunTest(coll, uString, strLen);
1722
1723        log_verbose("testing with french secondaries\n");
1724        ucol_setAttribute(coll, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
1725        ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
1726        ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
1727        doOverrunTest(coll, uString, strLen);
1728
1729    }
1730    ucol_close(coll);
1731}
1732
1733static void TestAttribute()
1734{
1735    UErrorCode error = U_ZERO_ERROR;
1736    UCollator *coll = ucol_open(NULL, &error);
1737
1738    if (U_FAILURE(error)) {
1739        log_err_status(error, "Creation of default collator failed\n");
1740        return;
1741    }
1742
1743    ucol_setAttribute(coll, UCOL_FRENCH_COLLATION, UCOL_OFF, &error);
1744    if (ucol_getAttribute(coll, UCOL_FRENCH_COLLATION, &error) != UCOL_OFF ||
1745        U_FAILURE(error)) {
1746        log_err_status(error, "Setting and retrieving of the french collation failed\n");
1747    }
1748
1749    ucol_setAttribute(coll, UCOL_FRENCH_COLLATION, UCOL_ON, &error);
1750    if (ucol_getAttribute(coll, UCOL_FRENCH_COLLATION, &error) != UCOL_ON ||
1751        U_FAILURE(error)) {
1752        log_err_status(error, "Setting and retrieving of the french collation failed\n");
1753    }
1754
1755    ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &error);
1756    if (ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &error) != UCOL_SHIFTED ||
1757        U_FAILURE(error)) {
1758        log_err_status(error, "Setting and retrieving of the alternate handling failed\n");
1759    }
1760
1761    ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &error);
1762    if (ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &error) != UCOL_NON_IGNORABLE ||
1763        U_FAILURE(error)) {
1764        log_err_status(error, "Setting and retrieving of the alternate handling failed\n");
1765    }
1766
1767    ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &error);
1768    if (ucol_getAttribute(coll, UCOL_CASE_FIRST, &error) != UCOL_LOWER_FIRST ||
1769        U_FAILURE(error)) {
1770        log_err_status(error, "Setting and retrieving of the case first attribute failed\n");
1771    }
1772
1773    ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &error);
1774    if (ucol_getAttribute(coll, UCOL_CASE_FIRST, &error) != UCOL_UPPER_FIRST ||
1775        U_FAILURE(error)) {
1776        log_err_status(error, "Setting and retrieving of the case first attribute failed\n");
1777    }
1778
1779    ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &error);
1780    if (ucol_getAttribute(coll, UCOL_CASE_LEVEL, &error) != UCOL_ON ||
1781        U_FAILURE(error)) {
1782        log_err_status(error, "Setting and retrieving of the case level attribute failed\n");
1783    }
1784
1785    ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &error);
1786    if (ucol_getAttribute(coll, UCOL_CASE_LEVEL, &error) != UCOL_OFF ||
1787        U_FAILURE(error)) {
1788        log_err_status(error, "Setting and retrieving of the case level attribute failed\n");
1789    }
1790
1791    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &error);
1792    if (ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, &error) != UCOL_ON ||
1793        U_FAILURE(error)) {
1794        log_err_status(error, "Setting and retrieving of the normalization on/off attribute failed\n");
1795    }
1796
1797    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &error);
1798    if (ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, &error) != UCOL_OFF ||
1799        U_FAILURE(error)) {
1800        log_err_status(error, "Setting and retrieving of the normalization on/off attribute failed\n");
1801    }
1802
1803    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &error);
1804    if (ucol_getAttribute(coll, UCOL_STRENGTH, &error) != UCOL_PRIMARY ||
1805        U_FAILURE(error)) {
1806        log_err_status(error, "Setting and retrieving of the collation strength failed\n");
1807    }
1808
1809    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_SECONDARY, &error);
1810    if (ucol_getAttribute(coll, UCOL_STRENGTH, &error) != UCOL_SECONDARY ||
1811        U_FAILURE(error)) {
1812        log_err_status(error, "Setting and retrieving of the collation strength failed\n");
1813    }
1814
1815    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &error);
1816    if (ucol_getAttribute(coll, UCOL_STRENGTH, &error) != UCOL_TERTIARY ||
1817        U_FAILURE(error)) {
1818        log_err_status(error, "Setting and retrieving of the collation strength failed\n");
1819    }
1820
1821    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &error);
1822    if (ucol_getAttribute(coll, UCOL_STRENGTH, &error) != UCOL_QUATERNARY ||
1823        U_FAILURE(error)) {
1824        log_err_status(error, "Setting and retrieving of the collation strength failed\n");
1825    }
1826
1827    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &error);
1828    if (ucol_getAttribute(coll, UCOL_STRENGTH, &error) != UCOL_IDENTICAL ||
1829        U_FAILURE(error)) {
1830        log_err_status(error, "Setting and retrieving of the collation strength failed\n");
1831    }
1832
1833    ucol_close(coll);
1834}
1835
1836void TestGetTailoredSet() {
1837  struct {
1838    const char *rules;
1839    const char *tests[20];
1840    int32_t testsize;
1841  } setTest[] = {
1842    { "&a < \\u212b", { "\\u212b", "A\\u030a", "\\u00c5" }, 3},
1843    { "& S < \\u0161 <<< \\u0160", { "\\u0161", "s\\u030C", "\\u0160", "S\\u030C" }, 4}
1844  };
1845
1846  int32_t i = 0, j = 0;
1847  UErrorCode status = U_ZERO_ERROR;
1848  UParseError pError;
1849
1850  UCollator *coll = NULL;
1851  UChar buff[1024];
1852  int32_t buffLen = 0;
1853  USet *set = NULL;
1854
1855  for(i = 0; i < sizeof(setTest)/sizeof(setTest[0]); i++) {
1856    buffLen = u_unescape(setTest[i].rules, buff, 1024);
1857    coll = ucol_openRules(buff, buffLen, UCOL_DEFAULT, UCOL_DEFAULT, &pError, &status);
1858    if(U_SUCCESS(status)) {
1859      set = ucol_getTailoredSet(coll, &status);
1860      if(uset_size(set) != setTest[i].testsize) {
1861        log_err("Tailored set size different (%d) than expected (%d)\n", uset_size(set), setTest[i].testsize);
1862      }
1863      for(j = 0; j < setTest[i].testsize; j++) {
1864        buffLen = u_unescape(setTest[i].tests[j], buff, 1024);
1865        if(!uset_containsString(set, buff, buffLen)) {
1866          log_err("Tailored set doesn't contain %s... It should\n", setTest[i].tests[j]);
1867        }
1868      }
1869      uset_close(set);
1870    } else {
1871      log_err_status(status, "Couldn't open collator with rules %s\n", setTest[i].rules);
1872    }
1873    ucol_close(coll);
1874  }
1875}
1876
1877static int tMemCmp(const uint8_t *first, const uint8_t *second) {
1878   int32_t firstLen = (int32_t)strlen((const char *)first);
1879   int32_t secondLen = (int32_t)strlen((const char *)second);
1880   return memcmp(first, second, uprv_min(firstLen, secondLen));
1881}
1882static const char * strengthsC[] = {
1883     "UCOL_PRIMARY",
1884     "UCOL_SECONDARY",
1885     "UCOL_TERTIARY",
1886     "UCOL_QUATERNARY",
1887     "UCOL_IDENTICAL"
1888};
1889
1890void TestMergeSortKeys(void) {
1891   UErrorCode status = U_ZERO_ERROR;
1892   UCollator *coll = ucol_open("en", &status);
1893   if(U_SUCCESS(status)) {
1894
1895     const char* cases[] = {
1896       "abc",
1897         "abcd",
1898         "abcde"
1899     };
1900     uint32_t casesSize = sizeof(cases)/sizeof(cases[0]);
1901     const char* prefix = "foo";
1902     const char* suffix = "egg";
1903     char outBuff1[256], outBuff2[256];
1904
1905     uint8_t **sortkeys = (uint8_t **)malloc(casesSize*sizeof(uint8_t *));
1906     uint8_t **mergedPrefixkeys = (uint8_t **)malloc(casesSize*sizeof(uint8_t *));
1907     uint8_t **mergedSuffixkeys = (uint8_t **)malloc(casesSize*sizeof(uint8_t *));
1908     uint32_t *sortKeysLen = (uint32_t *)malloc(casesSize*sizeof(uint32_t));
1909     uint8_t prefixKey[256], suffixKey[256];
1910     uint32_t prefixKeyLen = 0, suffixKeyLen = 0, i = 0;
1911     UChar buffer[256];
1912     uint32_t unescapedLen = 0, l1 = 0, l2 = 0;
1913     UColAttributeValue strength;
1914
1915     log_verbose("ucol_mergeSortkeys test\n");
1916     log_verbose("Testing order of the test cases\n");
1917     genericLocaleStarter("en", cases, casesSize);
1918
1919     for(i = 0; i<casesSize; i++) {
1920       sortkeys[i] = (uint8_t *)malloc(256*sizeof(uint8_t));
1921       mergedPrefixkeys[i] = (uint8_t *)malloc(256*sizeof(uint8_t));
1922       mergedSuffixkeys[i] = (uint8_t *)malloc(256*sizeof(uint8_t));
1923     }
1924
1925     unescapedLen = u_unescape(prefix, buffer, 256);
1926     prefixKeyLen = ucol_getSortKey(coll, buffer, unescapedLen, prefixKey, 256);
1927
1928     unescapedLen = u_unescape(suffix, buffer, 256);
1929     suffixKeyLen = ucol_getSortKey(coll, buffer, unescapedLen, suffixKey, 256);
1930
1931     log_verbose("Massaging data with prefixes and different strengths\n");
1932     strength = UCOL_PRIMARY;
1933     while(strength <= UCOL_IDENTICAL) {
1934       log_verbose("Strength %s\n", strengthsC[strength<=UCOL_QUATERNARY?strength:4]);
1935       ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status);
1936       for(i = 0; i<casesSize; i++) {
1937         unescapedLen = u_unescape(cases[i], buffer, 256);
1938         sortKeysLen[i] = ucol_getSortKey(coll, buffer, unescapedLen, sortkeys[i], 256);
1939         ucol_mergeSortkeys(prefixKey, prefixKeyLen, sortkeys[i], sortKeysLen[i], mergedPrefixkeys[i], 256);
1940         ucol_mergeSortkeys(sortkeys[i], sortKeysLen[i], suffixKey, suffixKeyLen, mergedSuffixkeys[i], 256);
1941         if(i>0) {
1942           if(tMemCmp(mergedPrefixkeys[i-1], mergedPrefixkeys[i]) >= 0) {
1943             log_err("Error while comparing prefixed keys @ strength %s:\n", strengthsC[strength<=UCOL_QUATERNARY?strength:4]);
1944             log_err("%s\n%s\n",
1945                         ucol_sortKeyToString(coll, mergedPrefixkeys[i-1], outBuff1, l1),
1946                         ucol_sortKeyToString(coll, mergedPrefixkeys[i], outBuff2, l2));
1947           }
1948           if(tMemCmp(mergedSuffixkeys[i-1], mergedSuffixkeys[i]) >= 0) {
1949             log_err("Error while comparing suffixed keys @ strength %s:\n", strengthsC[strength<=UCOL_QUATERNARY?strength:4]);
1950             log_err("%s\n%s\n",
1951                         ucol_sortKeyToString(coll, mergedSuffixkeys[i-1], outBuff1, l1),
1952                         ucol_sortKeyToString(coll, mergedSuffixkeys[i], outBuff2, l2));
1953           }
1954         }
1955       }
1956       if(strength == UCOL_QUATERNARY) {
1957         strength = UCOL_IDENTICAL;
1958       } else {
1959         strength++;
1960       }
1961     }
1962
1963     {
1964       uint8_t smallBuf[3];
1965       uint32_t reqLen = 0;
1966       log_verbose("testing buffer overflow\n");
1967       reqLen = ucol_mergeSortkeys(prefixKey, prefixKeyLen, suffixKey, suffixKeyLen, smallBuf, 3);
1968       if(reqLen != (prefixKeyLen+suffixKeyLen)) {
1969         log_err("Wrong preflight size for merged sortkey\n");
1970       }
1971     }
1972
1973     {
1974       UChar empty = 0;
1975       uint8_t emptyKey[20], abcKey[50], mergedKey[100];
1976       int32_t emptyKeyLen = 0, abcKeyLen = 0, mergedKeyLen = 0;
1977
1978       log_verbose("testing merging with sortkeys generated for empty strings\n");
1979       emptyKeyLen = ucol_getSortKey(coll, &empty, 0, emptyKey, 20);
1980       unescapedLen = u_unescape(cases[0], buffer, 256);
1981       abcKeyLen = ucol_getSortKey(coll, buffer, unescapedLen, abcKey, 50);
1982       mergedKeyLen = ucol_mergeSortkeys(emptyKey, emptyKeyLen, abcKey, abcKeyLen, mergedKey, 100);
1983       if(mergedKey[0] != 2) {
1984         log_err("Empty sortkey didn't produce a level separator\n");
1985       }
1986       /* try with zeros */
1987       mergedKeyLen = ucol_mergeSortkeys(emptyKey, 0, abcKey, abcKeyLen, mergedKey, 100);
1988       if(mergedKeyLen != 0 || mergedKey[0] != 0) {
1989         log_err("Empty key didn't produce null mergedKey\n");
1990       }
1991       mergedKeyLen = ucol_mergeSortkeys(abcKey, abcKeyLen, emptyKey, 0, mergedKey, 100);
1992       if(mergedKeyLen != 0 || mergedKey[0] != 0) {
1993         log_err("Empty key didn't produce null mergedKey\n");
1994       }
1995
1996     }
1997
1998     for(i = 0; i<casesSize; i++) {
1999       free(sortkeys[i]);
2000       free(mergedPrefixkeys[i]);
2001       free(mergedSuffixkeys[i]);
2002     }
2003     free(sortkeys);
2004     free(mergedPrefixkeys);
2005     free(mergedSuffixkeys);
2006     free(sortKeysLen);
2007     ucol_close(coll);
2008     /* need to finish this up */
2009   } else {
2010     log_data_err("Couldn't open collator");
2011   }
2012}
2013static void TestShortString(void)
2014{
2015    struct {
2016        const char *input;
2017        const char *expectedOutput;
2018        const char *locale;
2019        UErrorCode expectedStatus;
2020        int32_t    expectedOffset;
2021        uint32_t   expectedIdentifier;
2022    } testCases[] = {
2023        /*
2024         * The following expectedOutput contains a collation weight (2700 from UCA 6.0)
2025         * which is the primary weight for the T character (U+0041) in the input.
2026         * When that character gets a different weight in FractionalUCA.txt,
2027         * the expectedOutput needs to be adjusted.
2028         * That is, when we upgrade to a new UCA version or change collation
2029         * in such a way that the absolute weight for 'A' changes,
2030         * we will get a test failure here and need to adjust the test case.
2031         */
2032        {"LDE_RDE_KPHONEBOOK_T0041_ZLATN","B2700_KPHONEBOOK_LDE", "de@collation=phonebook", U_USING_FALLBACK_WARNING, 0, 0 },
2033
2034        {"LEN_RUS_NO_AS_S4","AS_LROOT_NO_S4", NULL, U_USING_DEFAULT_WARNING, 0, 0 },
2035        {"LDE_VPHONEBOOK_EO_SI","EO_KPHONEBOOK_LDE_SI", "de@collation=phonebook", U_ZERO_ERROR, 0, 0 },
2036        {"LDE_Kphonebook","KPHONEBOOK_LDE", "de@collation=phonebook", U_ZERO_ERROR, 0, 0 },
2037        {"Xqde_DE@collation=phonebookq_S3_EX","KPHONEBOOK_LDE", "de@collation=phonebook", U_USING_FALLBACK_WARNING, 0, 0 },
2038        {"LFR_FO", "FO_LROOT", NULL, U_USING_DEFAULT_WARNING, 0, 0 },
2039        {"SO_LX_AS", "", NULL, U_ILLEGAL_ARGUMENT_ERROR, 8, 0 },
2040        {"S3_ASS_MMM", "", NULL, U_ILLEGAL_ARGUMENT_ERROR, 5, 0 }
2041    };
2042
2043    int32_t i = 0;
2044    UCollator *coll = NULL, *fromNormalized = NULL;
2045    UParseError parseError;
2046    UErrorCode status = U_ZERO_ERROR;
2047    char fromShortBuffer[256], normalizedBuffer[256], fromNormalizedBuffer[256];
2048    const char* locale = NULL;
2049
2050
2051    for(i = 0; i < sizeof(testCases)/sizeof(testCases[0]); i++) {
2052        status = U_ZERO_ERROR;
2053        if(testCases[i].locale) {
2054            locale = testCases[i].locale;
2055        } else {
2056            locale = NULL;
2057        }
2058
2059        coll = ucol_openFromShortString(testCases[i].input, FALSE, &parseError, &status);
2060        if(status != testCases[i].expectedStatus) {
2061            log_err_status(status, "Got status '%s' that is different from expected '%s' for '%s'\n",
2062                u_errorName(status), u_errorName(testCases[i].expectedStatus), testCases[i].input);
2063            continue;
2064        }
2065
2066        if(U_SUCCESS(status)) {
2067            ucol_getShortDefinitionString(coll, locale, fromShortBuffer, 256, &status);
2068
2069            if(strcmp(fromShortBuffer, testCases[i].expectedOutput)) {
2070                log_err("Got short string '%s' from the collator. Expected '%s' for input '%s'\n",
2071                    fromShortBuffer, testCases[i].expectedOutput, testCases[i].input);
2072            }
2073
2074            ucol_normalizeShortDefinitionString(testCases[i].input, normalizedBuffer, 256, &parseError, &status);
2075            fromNormalized = ucol_openFromShortString(normalizedBuffer, FALSE, &parseError, &status);
2076            ucol_getShortDefinitionString(fromNormalized, locale, fromNormalizedBuffer, 256, &status);
2077
2078            if(strcmp(fromShortBuffer, fromNormalizedBuffer)) {
2079                log_err("Strings obtained from collators instantiated by short string ('%s') and from normalized string ('%s') differ\n",
2080                    fromShortBuffer, fromNormalizedBuffer);
2081            }
2082
2083
2084            if(!ucol_equals(coll, fromNormalized)) {
2085                log_err("Collator from short string ('%s') differs from one obtained through a normalized version ('%s')\n",
2086                    testCases[i].input, normalizedBuffer);
2087            }
2088
2089            ucol_close(fromNormalized);
2090            ucol_close(coll);
2091
2092        } else {
2093            if(parseError.offset != testCases[i].expectedOffset) {
2094                log_err("Got parse error offset %i, but expected %i instead for '%s'\n",
2095                    parseError.offset, testCases[i].expectedOffset, testCases[i].input);
2096            }
2097        }
2098    }
2099
2100}
2101
2102static void
2103doSetsTest(const char *locale, const USet *ref, USet *set, const char* inSet, const char* outSet, UErrorCode *status) {
2104    UChar buffer[512];
2105    int32_t bufLen;
2106
2107    uset_clear(set);
2108    bufLen = u_unescape(inSet, buffer, 512);
2109    uset_applyPattern(set, buffer, bufLen, 0, status);
2110    if(U_FAILURE(*status)) {
2111        log_err("%s: Failure setting pattern %s\n", locale, u_errorName(*status));
2112    }
2113
2114    if(!uset_containsAll(ref, set)) {
2115        log_err("%s: Some stuff from %s is not present in the set\n", locale, inSet);
2116    }
2117
2118    uset_clear(set);
2119    bufLen = u_unescape(outSet, buffer, 512);
2120    uset_applyPattern(set, buffer, bufLen, 0, status);
2121    if(U_FAILURE(*status)) {
2122        log_err("%s: Failure setting pattern %s\n", locale, u_errorName(*status));
2123    }
2124
2125    if(!uset_containsNone(ref, set)) {
2126        log_err("%s: Some stuff from %s is present in the set\n", locale, outSet);
2127    }
2128}
2129
2130
2131
2132
2133static void
2134TestGetContractionsAndUnsafes(void)
2135{
2136    static struct {
2137        const char* locale;
2138        const char* inConts;
2139        const char* outConts;
2140        const char* inExp;
2141        const char* outExp;
2142        const char* unsafeCodeUnits;
2143        const char* safeCodeUnits;
2144    } tests[] = {
2145        { "ru",
2146            "[{\\u0418\\u0306}{\\u0438\\u0306}]",
2147            "[\\u0439\\u0457]",
2148            "[\\u00e6]",
2149            "[ae]",
2150            "[\\u0418\\u0438]",
2151            "[aAbB\\u0430\\u0410\\u0433\\u0413]"
2152        },
2153        { "uk",
2154            "[{\\u0406\\u0308}{\\u0456\\u0308}{\\u0418\\u0306}{\\u0438\\u0306}]",
2155            "[\\u0407\\u0419\\u0439\\u0457]",
2156            "[\\u00e6]",
2157            "[ae]",
2158            "[\\u0406\\u0456\\u0418\\u0438]",
2159            "[aAbBxv]",
2160        },
2161        { "sh",
2162            "[{C\\u0301}{C\\u030C}{C\\u0341}{DZ\\u030C}{Dz\\u030C}{D\\u017D}{D\\u017E}{lj}{nj}]",
2163            "[{\\u309d\\u3099}{\\u30fd\\u3099}]",
2164            "[\\u00e6]",
2165            "[a]",
2166            "[nlcdzNLCDZ]",
2167            "[jabv]"
2168        },
2169        { "ja",
2170          "[{\\u3053\\u3099\\u309D}{\\u3053\\u3099\\u309D\\u3099}{\\u3053\\u3099\\u309E}{\\u3053\\u3099\\u30FC}{\\u3053\\u309D}{\\u3053\\u309D\\u3099}{\\u3053\\u309E}{\\u3053\\u30FC}{\\u30B3\\u3099\\u30FC}{\\u30B3\\u3099\\u30FD}{\\u30B3\\u3099\\u30FD\\u3099}{\\u30B3\\u3099\\u30FE}{\\u30B3\\u30FC}{\\u30B3\\u30FD}{\\u30B3\\u30FD\\u3099}{\\u30B3\\u30FE}]",
2171          "[{\\u30FD\\u3099}{\\u309D\\u3099}{\\u3053\\u3099}{\\u30B3\\u3099}{lj}{nj}]",
2172            "[\\u30FE\\u00e6]",
2173            "[a]",
2174            "[\\u3099]",
2175            "[]"
2176        }
2177    };
2178
2179
2180
2181
2182    UErrorCode status = U_ZERO_ERROR;
2183    UCollator *coll = NULL;
2184    int32_t i = 0;
2185    int32_t noConts = 0;
2186    USet *conts = uset_open(0,0);
2187    USet *exp = uset_open(0, 0);
2188    USet *set  = uset_open(0,0);
2189    int32_t setBufferLen = 65536;
2190    UChar buffer[65536];
2191    int32_t setLen = 0;
2192
2193    for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
2194        log_verbose("Testing locale: %s\n", tests[i].locale);
2195        coll = ucol_open(tests[i].locale, &status);
2196        if (coll == NULL || U_FAILURE(status)) {
2197            log_err_status(status, "Unable to open collator for locale %s ==> %s\n", tests[i].locale, u_errorName(status));
2198            continue;
2199        }
2200        ucol_getContractionsAndExpansions(coll, conts, exp, TRUE, &status);
2201        doSetsTest(tests[i].locale, conts, set, tests[i].inConts, tests[i].outConts, &status);
2202        setLen = uset_toPattern(conts, buffer, setBufferLen, TRUE, &status);
2203        if(U_SUCCESS(status)) {
2204            /*log_verbose("Contractions %i: %s\n", uset_getItemCount(conts), aescstrdup(buffer, setLen));*/
2205        } else {
2206            log_err("error %s. %i\n", u_errorName(status), setLen);
2207            status = U_ZERO_ERROR;
2208        }
2209        doSetsTest(tests[i].locale, exp, set, tests[i].inExp, tests[i].outExp, &status);
2210        setLen = uset_toPattern(exp, buffer, setBufferLen, TRUE, &status);
2211        if(U_SUCCESS(status)) {
2212            /*log_verbose("Expansions %i: %s\n", uset_getItemCount(exp), aescstrdup(buffer, setLen));*/
2213        } else {
2214            log_err("error %s. %i\n", u_errorName(status), setLen);
2215            status = U_ZERO_ERROR;
2216        }
2217
2218        noConts = ucol_getUnsafeSet(coll, conts, &status);
2219        doSetsTest(tests[i].locale, conts, set, tests[i].unsafeCodeUnits, tests[i].safeCodeUnits, &status);
2220        setLen = uset_toPattern(conts, buffer, setBufferLen, TRUE, &status);
2221        if(U_SUCCESS(status)) {
2222            log_verbose("Unsafe %i: %s\n", uset_getItemCount(exp), aescstrdup(buffer, setLen));
2223        } else {
2224            log_err("error %s. %i\n", u_errorName(status), setLen);
2225            status = U_ZERO_ERROR;
2226        }
2227
2228        ucol_close(coll);
2229    }
2230
2231
2232    uset_close(conts);
2233    uset_close(exp);
2234    uset_close(set);
2235}
2236
2237static void
2238TestOpenBinary(void)
2239{
2240    UErrorCode status = U_ZERO_ERROR;
2241    /*
2242    char rule[] = "&h < d < c < b";
2243    char *wUCA[] = { "a", "h", "d", "c", "b", "i" };
2244    char *noUCA[] = {"d", "c", "b", "a", "h", "i" };
2245    */
2246    /* we have to use Cyrillic letters because latin-1 always gets copied */
2247    const char rule[] = "&\\u0452 < \\u0434 < \\u0433 < \\u0432"; /* &dje < d < g < v */
2248    const char *wUCA[] = { "\\u0430", "\\u0452", "\\u0434", "\\u0433", "\\u0432", "\\u0435" }; /* a, dje, d, g, v, e */
2249    const char *noUCA[] = {"\\u0434", "\\u0433", "\\u0432", "\\u0430", "\\u0435", "\\u0452" }; /* d, g, v, a, e, dje */
2250
2251    UChar uRules[256];
2252    int32_t uRulesLen = u_unescape(rule, uRules, 256);
2253
2254    UCollator *coll = ucol_openRules(uRules, uRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2255    UCollator *UCA = NULL;
2256    UCollator *cloneNOUCA = NULL, *cloneWUCA = NULL;
2257
2258    uint8_t imageBuffer[32768];
2259    uint8_t *image = imageBuffer;
2260    int32_t imageBufferCapacity = 32768;
2261
2262    int32_t imageSize;
2263
2264    if((coll==NULL)||(U_FAILURE(status))) {
2265        log_data_err("could not load collators or error occured: %s\n",
2266            u_errorName(status));
2267        return;
2268    }
2269    UCA = ucol_open("root", &status);
2270    if((UCA==NULL)||(U_FAILURE(status))) {
2271        log_data_err("could not load UCA collator or error occured: %s\n",
2272            u_errorName(status));
2273        return;
2274    }
2275    imageSize = ucol_cloneBinary(coll, image, imageBufferCapacity, &status);
2276    if(U_FAILURE(status)) {
2277        image = (uint8_t *)malloc(imageSize*sizeof(uint8_t));
2278        status = U_ZERO_ERROR;
2279        imageSize = ucol_cloneBinary(coll, imageBuffer, imageSize, &status);
2280    }
2281
2282
2283    cloneWUCA = ucol_openBinary(image, imageSize, UCA, &status);
2284    cloneNOUCA = ucol_openBinary(image, imageSize, NULL, &status);
2285
2286    genericOrderingTest(coll, wUCA, sizeof(wUCA)/sizeof(wUCA[0]));
2287
2288    genericOrderingTest(cloneWUCA, wUCA, sizeof(wUCA)/sizeof(wUCA[0]));
2289    genericOrderingTest(cloneNOUCA, noUCA, sizeof(noUCA)/sizeof(noUCA[0]));
2290
2291    if(image != imageBuffer) {
2292        free(image);
2293    }
2294    ucol_close(coll);
2295    ucol_close(cloneNOUCA);
2296    ucol_close(cloneWUCA);
2297    ucol_close(UCA);
2298}
2299
2300static void TestDefault(void) {
2301    /* Tests for code coverage. */
2302    UErrorCode status = U_ZERO_ERROR;
2303    UCollator *coll = ucol_open("es@collation=pinyin", &status);
2304    if (coll == NULL || status == U_FILE_ACCESS_ERROR) {
2305        log_data_err("Unable to open collator es@collation=pinyin\n");
2306        return;
2307    }
2308    if (status != U_USING_DEFAULT_WARNING) {
2309        /* What do you mean that you know about using pinyin collation in Spanish!? This should be in the zh locale. */
2310        log_err("es@collation=pinyin should return U_USING_DEFAULT_WARNING, but returned %s\n", u_errorName(status));
2311    }
2312    ucol_close(coll);
2313    if (ucol_getKeywordValues("funky", &status) != NULL) {
2314        log_err("Collators should not know about the funky keyword.\n");
2315    }
2316    if (status != U_ILLEGAL_ARGUMENT_ERROR) {
2317        log_err("funky keyword didn't fail as expected %s\n", u_errorName(status));
2318    }
2319    if (ucol_getKeywordValues("collation", &status) != NULL) {
2320        log_err("ucol_getKeywordValues should not work when given a bad status.\n");
2321    }
2322}
2323
2324static void TestDefaultKeyword(void) {
2325    /* Tests for code coverage. */
2326    UErrorCode status = U_ZERO_ERROR;
2327    const char *loc = "zh_TW@collation=default";
2328    UCollator *coll = ucol_open(loc, &status);
2329    if(U_FAILURE(status)) {
2330        log_info("Warning: ucol_open(%s, ...) returned %s, at least it didn't crash.\n", loc, u_errorName(status));
2331    } else if (status != U_USING_FALLBACK_WARNING) {
2332        /* Hmm, skip the following test for CLDR 1.9 data and/or ICU 4.6, no longer seems to apply */
2333        #if 0
2334        log_err("ucol_open(%s, ...) should return an error or some sort of U_USING_FALLBACK_WARNING, but returned %s\n", loc, u_errorName(status));
2335        #endif
2336    }
2337    ucol_close(coll);
2338}
2339
2340static void TestGetKeywordValuesForLocale(void) {
2341#define INCLUDE_UNIHAN_COLLATION 0
2342#define PREFERRED_SIZE 16
2343#define MAX_NUMBER_OF_KEYWORDS 8
2344    const char *PREFERRED[PREFERRED_SIZE][MAX_NUMBER_OF_KEYWORDS+1] = {
2345            { "und",            "standard", "search", NULL, NULL, NULL, NULL, NULL, NULL },
2346            { "en_US",          "standard", "search", NULL, NULL, NULL, NULL, NULL, NULL },
2347            { "en_029",         "standard", "search", NULL, NULL, NULL, NULL, NULL, NULL },
2348            { "de_DE",          "standard", "phonebook", "search", NULL, NULL, NULL, NULL, NULL },
2349            { "de_Latn_DE",     "standard", "phonebook", "search", NULL, NULL, NULL, NULL, NULL },
2350#if INCLUDE_UNIHAN_COLLATION
2351            { "zh",             "pinyin", "big5han", "gb2312han", "stroke", "zhuyin", "unihan", "search", "standard" },
2352            { "zh_Hans",        "pinyin", "big5han", "gb2312han", "stroke", "zhuyin", "unihan", "search", "standard" },
2353            { "zh_CN",          "pinyin", "big5han", "gb2312han", "stroke", "zhuyin", "unihan", "search", "standard" },
2354            { "zh_Hant",        "stroke", "big5han", "gb2312han", "pinyin", "zhuyin", "unihan", "search", "standard" },
2355            { "zh_TW",          "stroke", "big5han", "gb2312han", "pinyin", "zhuyin", "unihan", "search", "standard" },
2356            { "zh__PINYIN",     "pinyin", "big5han", "gb2312han", "stroke", "zhuyin", "unihan", "search", "standard" },
2357#else
2358            // BEGIN android-changed.  No big5han, gb2312han, or zhuyin in Android
2359            { "zh",             "pinyin", "stroke", "search", "standard", NULL, NULL, NULL, NULL },
2360            { "zh_Hans",        "pinyin", "stroke", "search", "standard", NULL, NULL, NULL, NULL },
2361            { "zh_CN",          "pinyin", "stroke", "search", "standard", NULL, NULL, NULL, NULL },
2362            { "zh_Hant",        "stroke", "pinyin", "search", "standard", NULL, NULL, NULL, NULL },
2363            { "zh_TW",          "stroke", "pinyin", "search", "standard", NULL, NULL, NULL, NULL },
2364            { "zh__PINYIN",     "pinyin", "stroke", "search", "standard", NULL, NULL, NULL, NULL },
2365            // END android-changed
2366#endif
2367            { "es_ES",          "standard", "search", "traditional", NULL, NULL, NULL, NULL, NULL },
2368            { "es__TRADITIONAL","traditional", "search", "standard", NULL, NULL, NULL, NULL, NULL },
2369            { "und@collation=phonebook",    "standard", "search", NULL, NULL, NULL, NULL, NULL, NULL },
2370            { "de_DE@collation=big5han",    "standard", "phonebook", "search", NULL, NULL, NULL, NULL, NULL},
2371
2372            { "zzz@collation=xxx",          "standard", "search", NULL, NULL, NULL, NULL, NULL, NULL }
2373    };
2374#if INCLUDE_UNIHAN_COLLATION
2375    const int32_t expectedLength[PREFERRED_SIZE] = { 2, 2, 2, 3, 3, 8, 8, 8, 8, 8, 8, 3, 3, 2, 3, 2 };
2376#else
2377    // BEGIN android-change
2378    // Fewer cases are expected for "zh*" since Android removes collation sequences to save space.
2379    const int32_t expectedLength[PREFERRED_SIZE] = { 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 2, 3, 2 };
2380    // END android-change
2381#endif
2382
2383    UErrorCode status = U_ZERO_ERROR;
2384    UEnumeration *keywordValues = NULL;
2385    int32_t i, n, size, valueLength;
2386    const char *locale = NULL, *value = NULL;
2387    UBool errorOccurred = FALSE;
2388
2389    for (i = 0; i < PREFERRED_SIZE; i++) {
2390        locale = PREFERRED[i][0];
2391        value = NULL;
2392        valueLength = 0;
2393        size = 0;
2394
2395        keywordValues = ucol_getKeywordValuesForLocale("collation", locale, TRUE, &status);
2396        if (keywordValues == NULL || U_FAILURE(status)) {
2397            log_err_status(status, "Error getting keyword values: %s\n", u_errorName(status));
2398            break;
2399        }
2400        size = uenum_count(keywordValues, &status);
2401
2402        if (size == expectedLength[i]) {
2403            for (n = 0; n < expectedLength[i]; n++) {
2404                if ((value = uenum_next(keywordValues, &valueLength, &status)) != NULL && U_SUCCESS(status)) {
2405                    if (uprv_strcmp(value, PREFERRED[i][n+1]) != 0) {
2406                        log_err("Keyword values differ: Got [%s] Expected [%s] for locale: %s\n", value, PREFERRED[i][n+1], locale);
2407                        errorOccurred = TRUE;
2408                        break;
2409                    }
2410
2411                } else {
2412                    log_err("While getting keyword value from locale: %s got this error: %s\n", locale, u_errorName(status));
2413                    errorOccurred = TRUE;
2414                    break;
2415                }
2416            }
2417            if (errorOccurred) {
2418                break;
2419            }
2420        } else {
2421            log_err("Number of keywords (%d) does not match expected size (%d) for locale: %s\n", size, expectedLength[i], locale);
2422            break;
2423        }
2424        uenum_close(keywordValues);
2425        keywordValues = NULL;
2426    }
2427    if (keywordValues != NULL) {
2428        uenum_close(keywordValues);
2429    }
2430}
2431
2432
2433#endif /* #if !UCONFIG_NO_COLLATION */
2434