1
2/********************************************************************
3 * COPYRIGHT:
4 * Copyright (c) 2001-2015, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************/
7/*******************************************************************************
8*
9* File cmsccoll.C
10*
11*******************************************************************************/
12/**
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where
14 * to fit.
15 */
16
17#include <stdio.h>
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_COLLATION
22
23#include "unicode/ucol.h"
24#include "unicode/ucoleitr.h"
25#include "unicode/uloc.h"
26#include "cintltst.h"
27#include "ccolltst.h"
28#include "callcoll.h"
29#include "unicode/ustring.h"
30#include "string.h"
31#include "ucol_imp.h"
32#include "cmemory.h"
33#include "cstring.h"
34#include "uassert.h"
35#include "unicode/parseerr.h"
36#include "unicode/ucnv.h"
37#include "unicode/ures.h"
38#include "unicode/uscript.h"
39#include "unicode/utf16.h"
40#include "uparse.h"
41#include "putilimp.h"
42
43
44#define LEN(a) (sizeof(a)/sizeof(a[0]))
45
46#define MAX_TOKEN_LEN 16
47
48typedef UCollationResult tst_strcoll(void *collator, const int object,
49                        const UChar *source, const int sLen,
50                        const UChar *target, const int tLen);
51
52
53
54const static char cnt1[][10] = {
55
56  "AA",
57  "AC",
58  "AZ",
59  "AQ",
60  "AB",
61  "ABZ",
62  "ABQ",
63  "Z",
64  "ABC",
65  "Q",
66  "B"
67};
68
69const static char cnt2[][10] = {
70  "DA",
71  "DAD",
72  "DAZ",
73  "MAR",
74  "Z",
75  "DAVIS",
76  "MARK",
77  "DAV",
78  "DAVI"
79};
80
81static void IncompleteCntTest(void)
82{
83  UErrorCode status = U_ZERO_ERROR;
84  UChar temp[90];
85  UChar t1[90];
86  UChar t2[90];
87
88  UCollator *coll =  NULL;
89  uint32_t i = 0, j = 0;
90  uint32_t size = 0;
91
92  u_uastrcpy(temp, " & Z < ABC < Q < B");
93
94  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
95
96  if(U_SUCCESS(status)) {
97    size = sizeof(cnt1)/sizeof(cnt1[0]);
98    for(i = 0; i < size-1; i++) {
99      for(j = i+1; j < size; j++) {
100        UCollationElements *iter;
101        u_uastrcpy(t1, cnt1[i]);
102        u_uastrcpy(t2, cnt1[j]);
103        doTest(coll, t1, t2, UCOL_LESS);
104        /* synwee : added collation element iterator test */
105        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
106        if (U_FAILURE(status)) {
107          log_err("Creation of iterator failed\n");
108          break;
109        }
110        backAndForth(iter);
111        ucol_closeElements(iter);
112      }
113    }
114  }
115
116  ucol_close(coll);
117
118
119  u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
120  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
121
122  if(U_SUCCESS(status)) {
123    size = sizeof(cnt2)/sizeof(cnt2[0]);
124    for(i = 0; i < size-1; i++) {
125      for(j = i+1; j < size; j++) {
126        UCollationElements *iter;
127        u_uastrcpy(t1, cnt2[i]);
128        u_uastrcpy(t2, cnt2[j]);
129        doTest(coll, t1, t2, UCOL_LESS);
130
131        /* synwee : added collation element iterator test */
132        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
133        if (U_FAILURE(status)) {
134          log_err("Creation of iterator failed\n");
135          break;
136        }
137        backAndForth(iter);
138        ucol_closeElements(iter);
139      }
140    }
141  }
142
143  ucol_close(coll);
144
145
146}
147
148const static char shifted[][20] = {
149  "black bird",
150  "black-bird",
151  "blackbird",
152  "black Bird",
153  "black-Bird",
154  "blackBird",
155  "black birds",
156  "black-birds",
157  "blackbirds"
158};
159
160const static UCollationResult shiftedTert[] = {
161  UCOL_EQUAL,
162  UCOL_EQUAL,
163  UCOL_EQUAL,
164  UCOL_LESS,
165  UCOL_EQUAL,
166  UCOL_EQUAL,
167  UCOL_LESS,
168  UCOL_EQUAL,
169  UCOL_EQUAL
170};
171
172const static char nonignorable[][20] = {
173  "black bird",
174  "black Bird",
175  "black birds",
176  "black-bird",
177  "black-Bird",
178  "black-birds",
179  "blackbird",
180  "blackBird",
181  "blackbirds"
182};
183
184static void BlackBirdTest(void) {
185  UErrorCode status = U_ZERO_ERROR;
186  UChar t1[90];
187  UChar t2[90];
188
189  uint32_t i = 0, j = 0;
190  uint32_t size = 0;
191  UCollator *coll = ucol_open("en_US", &status);
192
193  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
194  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
195
196  if(U_SUCCESS(status)) {
197    size = sizeof(nonignorable)/sizeof(nonignorable[0]);
198    for(i = 0; i < size-1; i++) {
199      for(j = i+1; j < size; j++) {
200        u_uastrcpy(t1, nonignorable[i]);
201        u_uastrcpy(t2, nonignorable[j]);
202        doTest(coll, t1, t2, UCOL_LESS);
203      }
204    }
205  }
206
207  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
208  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
209
210  if(U_SUCCESS(status)) {
211    size = sizeof(shifted)/sizeof(shifted[0]);
212    for(i = 0; i < size-1; i++) {
213      for(j = i+1; j < size; j++) {
214        u_uastrcpy(t1, shifted[i]);
215        u_uastrcpy(t2, shifted[j]);
216        doTest(coll, t1, t2, UCOL_LESS);
217      }
218    }
219  }
220
221  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
222  if(U_SUCCESS(status)) {
223    size = sizeof(shifted)/sizeof(shifted[0]);
224    for(i = 1; i < size; i++) {
225      u_uastrcpy(t1, shifted[i-1]);
226      u_uastrcpy(t2, shifted[i]);
227      doTest(coll, t1, t2, shiftedTert[i]);
228    }
229  }
230
231  ucol_close(coll);
232}
233
234const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
235    {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
236    {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
237    {0x0041/*'A'*/, 0x0300, 0x0000},
238    {0x00C0, 0x0301, 0x0000},
239    /* this would work with forced normalization */
240    {0x00C0, 0x0316, 0x0000}
241};
242
243const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
244    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
245    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
246    {0x00C0, 0},
247    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
248    /* this would work with forced normalization */
249    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
250};
251
252const static UCollationResult results[] = {
253    UCOL_GREATER,
254    UCOL_EQUAL,
255    UCOL_EQUAL,
256    UCOL_GREATER,
257    UCOL_EQUAL
258};
259
260static void FunkyATest(void)
261{
262
263    int32_t i;
264    UErrorCode status = U_ZERO_ERROR;
265    UCollator  *myCollation;
266    myCollation = ucol_open("en_US", &status);
267    if(U_FAILURE(status)){
268        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
269        return;
270    }
271    log_verbose("Testing some A letters, for some reason\n");
272    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
273    ucol_setStrength(myCollation, UCOL_TERTIARY);
274    for (i = 0; i < 4 ; i++)
275    {
276        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
277    }
278    ucol_close(myCollation);
279}
280
281UColAttributeValue caseFirst[] = {
282    UCOL_OFF,
283    UCOL_LOWER_FIRST,
284    UCOL_UPPER_FIRST
285};
286
287
288UColAttributeValue alternateHandling[] = {
289    UCOL_NON_IGNORABLE,
290    UCOL_SHIFTED
291};
292
293UColAttributeValue caseLevel[] = {
294    UCOL_OFF,
295    UCOL_ON
296};
297
298UColAttributeValue strengths[] = {
299    UCOL_PRIMARY,
300    UCOL_SECONDARY,
301    UCOL_TERTIARY,
302    UCOL_QUATERNARY,
303    UCOL_IDENTICAL
304};
305
306#if 0
307static const char * strengthsC[] = {
308    "UCOL_PRIMARY",
309    "UCOL_SECONDARY",
310    "UCOL_TERTIARY",
311    "UCOL_QUATERNARY",
312    "UCOL_IDENTICAL"
313};
314
315static const char * caseFirstC[] = {
316    "UCOL_OFF",
317    "UCOL_LOWER_FIRST",
318    "UCOL_UPPER_FIRST"
319};
320
321
322static const char * alternateHandlingC[] = {
323    "UCOL_NON_IGNORABLE",
324    "UCOL_SHIFTED"
325};
326
327static const char * caseLevelC[] = {
328    "UCOL_OFF",
329    "UCOL_ON"
330};
331
332/* not used currently - does not test only prints */
333static void PrintMarkDavis(void)
334{
335  UErrorCode status = U_ZERO_ERROR;
336  UChar m[256];
337  uint8_t sortkey[256];
338  UCollator *coll = ucol_open("en_US", &status);
339  uint32_t h,i,j,k, sortkeysize;
340  uint32_t sizem = 0;
341  char buffer[512];
342  uint32_t len = 512;
343
344  log_verbose("PrintMarkDavis");
345
346  u_uastrcpy(m, "Mark Davis");
347  sizem = u_strlen(m);
348
349
350  m[1] = 0xe4;
351
352  for(i = 0; i<sizem; i++) {
353    fprintf(stderr, "\\u%04X ", m[i]);
354  }
355  fprintf(stderr, "\n");
356
357  for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
358    ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
359    fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
360
361    for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
362      ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
363      fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
364
365      for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
366        ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
367        fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
368
369        for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
370          ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
371          sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
372          fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
373          fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
374        }
375
376      }
377
378    }
379
380  }
381}
382#endif
383
384static void BillFairmanTest(void) {
385/*
386** check for actual locale via ICU resource bundles
387**
388** lp points to the original locale ("fr_FR_....")
389*/
390
391    UResourceBundle *lr,*cr;
392    UErrorCode              lec = U_ZERO_ERROR;
393    const char *lp = "fr_FR_you_ll_never_find_this_locale";
394
395    log_verbose("BillFairmanTest\n");
396
397    lr = ures_open(NULL,lp,&lec);
398    if (lr) {
399        cr = ures_getByKey(lr,"collations",0,&lec);
400        if (cr) {
401            lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
402            if (lp) {
403                if (U_SUCCESS(lec)) {
404                    if(strcmp(lp, "fr") != 0) {
405                        log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
406                    }
407                }
408            }
409            ures_close(cr);
410        }
411        ures_close(lr);
412    }
413}
414
415const static char chTest[][20] = {
416  "c",
417  "C",
418  "ca", "cb", "cx", "cy", "CZ",
419  "c\\u030C", "C\\u030C",
420  "h",
421  "H",
422  "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
423  "ch", "cH", "Ch", "CH",
424  "cha", "charly", "che", "chh", "chch", "chr",
425  "i", "I", "iarly",
426  "r", "R",
427  "r\\u030C", "R\\u030C",
428  "s",
429  "S",
430  "s\\u030C", "S\\u030C",
431  "z", "Z",
432  "z\\u030C", "Z\\u030C"
433};
434
435static void TestChMove(void) {
436    UChar t1[256] = {0};
437    UChar t2[256] = {0};
438
439    uint32_t i = 0, j = 0;
440    uint32_t size = 0;
441    UErrorCode status = U_ZERO_ERROR;
442
443    UCollator *coll = ucol_open("cs", &status);
444
445    if(U_SUCCESS(status)) {
446        size = sizeof(chTest)/sizeof(chTest[0]);
447        for(i = 0; i < size-1; i++) {
448            for(j = i+1; j < size; j++) {
449                u_unescape(chTest[i], t1, 256);
450                u_unescape(chTest[j], t2, 256);
451                doTest(coll, t1, t2, UCOL_LESS);
452            }
453        }
454    }
455    else {
456        log_data_err("Can't open collator");
457    }
458    ucol_close(coll);
459}
460
461
462
463
464/*
465const static char impTest[][20] = {
466  "\\u4e00",
467    "a",
468    "A",
469    "b",
470    "B",
471    "\\u4e01"
472};
473*/
474
475
476static void TestImplicitTailoring(void) {
477  static const struct {
478    const char *rules;
479    const char *data[10];
480    const uint32_t len;
481  } tests[] = {
482      {
483        /* Tailor b and c before U+4E00. */
484        "&[before 1]\\u4e00 < b < c "
485        /* Now, before U+4E00 is c; put d and e after that. */
486        "&[before 1]\\u4e00 < d < e",
487        { "b", "c", "d", "e", "\\u4e00"}, 5 },
488      { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
489      { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
490      { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
491  };
492
493  int32_t i = 0;
494
495  for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
496      genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
497  }
498
499/*
500  UChar t1[256] = {0};
501  UChar t2[256] = {0};
502
503  const char *rule = "&\\u4e00 < a <<< A < b <<< B";
504
505  uint32_t i = 0, j = 0;
506  uint32_t size = 0;
507  uint32_t ruleLen = 0;
508  UErrorCode status = U_ZERO_ERROR;
509  UCollator *coll = NULL;
510  ruleLen = u_unescape(rule, t1, 256);
511
512  coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
513
514  if(U_SUCCESS(status)) {
515    size = sizeof(impTest)/sizeof(impTest[0]);
516    for(i = 0; i < size-1; i++) {
517      for(j = i+1; j < size; j++) {
518        u_unescape(impTest[i], t1, 256);
519        u_unescape(impTest[j], t2, 256);
520        doTest(coll, t1, t2, UCOL_LESS);
521      }
522    }
523  }
524  else {
525    log_err("Can't open collator");
526  }
527  ucol_close(coll);
528  */
529}
530
531static void TestFCDProblem(void) {
532  UChar t1[256] = {0};
533  UChar t2[256] = {0};
534
535  const char *s1 = "\\u0430\\u0306\\u0325";
536  const char *s2 = "\\u04D1\\u0325";
537
538  UErrorCode status = U_ZERO_ERROR;
539  UCollator *coll = ucol_open("", &status);
540  u_unescape(s1, t1, 256);
541  u_unescape(s2, t2, 256);
542
543  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
544  doTest(coll, t1, t2, UCOL_EQUAL);
545
546  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
547  doTest(coll, t1, t2, UCOL_EQUAL);
548
549  ucol_close(coll);
550}
551
552/*
553The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
554We're only using NFC/NFD in this test.
555*/
556#define NORM_BUFFER_TEST_LEN 18
557typedef struct {
558  UChar32 u;
559  UChar NFC[NORM_BUFFER_TEST_LEN];
560  UChar NFD[NORM_BUFFER_TEST_LEN];
561} tester;
562
563static void TestComposeDecompose(void) {
564    /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
565    static const UChar UNICODESET_STR[] = {
566        0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
567        0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
568        0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
569    };
570    int32_t noOfLoc;
571    int32_t i = 0, j = 0;
572
573    UErrorCode status = U_ZERO_ERROR;
574    const char *locName = NULL;
575    uint32_t nfcSize;
576    uint32_t nfdSize;
577    tester **t;
578    uint32_t noCases = 0;
579    UCollator *coll = NULL;
580    UChar32 u = 0;
581    UChar comp[NORM_BUFFER_TEST_LEN];
582    uint32_t len = 0;
583    UCollationElements *iter;
584    USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
585    int32_t charsToTestSize;
586
587    noOfLoc = uloc_countAvailable();
588
589    coll = ucol_open("", &status);
590    if (U_FAILURE(status)) {
591        log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
592        return;
593    }
594    charsToTestSize = uset_size(charsToTest);
595    if (charsToTestSize <= 0) {
596        log_err("Set was zero. Missing data?\n");
597        return;
598    }
599    t = (tester **)malloc(charsToTestSize * sizeof(tester *));
600    t[0] = (tester *)malloc(sizeof(tester));
601    log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
602
603    for(u = 0; u < charsToTestSize; u++) {
604        UChar32 ch = uset_charAt(charsToTest, u);
605        len = 0;
606        U16_APPEND_UNSAFE(comp, len, ch);
607        nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
608        nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
609
610        if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
611          || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
612            t[noCases]->u = ch;
613            if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
614                u_strncpy(t[noCases]->NFC, comp, len);
615                t[noCases]->NFC[len] = 0;
616            }
617            noCases++;
618            t[noCases] = (tester *)malloc(sizeof(tester));
619            uprv_memset(t[noCases], 0, sizeof(tester));
620        }
621    }
622    log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
623    uset_close(charsToTest);
624    charsToTest = NULL;
625
626    for(u=0; u<(UChar32)noCases; u++) {
627        if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
628            log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
629            doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
630        }
631    }
632    /*
633    for(u = 0; u < charsToTestSize; u++) {
634      if(!(u&0xFFFF)) {
635        log_verbose("%08X ", u);
636      }
637      uprv_memset(t[noCases], 0, sizeof(tester));
638      t[noCases]->u = u;
639      len = 0;
640      U16_APPEND_UNSAFE(comp, len, u);
641      comp[len] = 0;
642      nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
643      nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
644      doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
645      doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
646    }
647    */
648
649    ucol_close(coll);
650
651    log_verbose("Testing locales, number of cases = %i\n", noCases);
652    for(i = 0; i<noOfLoc; i++) {
653        status = U_ZERO_ERROR;
654        locName = uloc_getAvailable(i);
655        if(hasCollationElements(locName)) {
656            char cName[256];
657            UChar name[256];
658            int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
659
660            for(j = 0; j<nameSize; j++) {
661                cName[j] = (char)name[j];
662            }
663            cName[nameSize] = 0;
664            log_verbose("\nTesting locale %s (%s)\n", locName, cName);
665
666            coll = ucol_open(locName, &status);
667            ucol_setStrength(coll, UCOL_IDENTICAL);
668            iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
669
670            for(u=0; u<(UChar32)noCases; u++) {
671                if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
672                    log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
673                    doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
674                    log_verbose("Testing NFC\n");
675                    ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
676                    backAndForth(iter);
677                    log_verbose("Testing NFD\n");
678                    ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
679                    backAndForth(iter);
680                }
681            }
682            ucol_closeElements(iter);
683            ucol_close(coll);
684        }
685    }
686    for(u = 0; u <= (UChar32)noCases; u++) {
687        free(t[u]);
688    }
689    free(t);
690}
691
692static void TestEmptyRule(void) {
693  UErrorCode status = U_ZERO_ERROR;
694  UChar rulez[] = { 0 };
695  UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
696
697  ucol_close(coll);
698}
699
700static void TestUCARules(void) {
701  UErrorCode status = U_ZERO_ERROR;
702  UChar b[256];
703  UChar *rules = b;
704  uint32_t ruleLen = 0;
705  UCollator *UCAfromRules = NULL;
706  UCollator *coll = ucol_open("", &status);
707  if(status == U_FILE_ACCESS_ERROR) {
708    log_data_err("Is your data around?\n");
709    return;
710  } else if(U_FAILURE(status)) {
711    log_err("Error opening collator\n");
712    return;
713  }
714  ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
715
716  log_verbose("TestUCARules\n");
717  if(ruleLen > 256) {
718    rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
719    ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
720  }
721  log_verbose("Rules length is %d\n", ruleLen);
722  UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
723  if(U_SUCCESS(status)) {
724    ucol_close(UCAfromRules);
725  } else {
726    log_verbose("Unable to create a collator from UCARules!\n");
727  }
728/*
729  u_unescape(blah, b, 256);
730  ucol_getSortKey(coll, b, 1, res, 256);
731*/
732  ucol_close(coll);
733  if(rules != b) {
734    free(rules);
735  }
736}
737
738
739/* Pinyin tonal order */
740/*
741    A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
742          (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
743    E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
744    I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
745    O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
746    U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
747      < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
748.. (\u00fc)
749
750However, in testing we got the following order:
751    A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
752          (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
753    E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
754.. (\u0113)
755    I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
756    O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
757    U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
758.. (\u01d8)
759      < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
760*/
761
762static void TestBefore(void) {
763  const static char *data[] = {
764      "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
765      "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
766      "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
767      "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
768      "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
769      "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
770  };
771  genericRulesStarter(
772    "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
773    "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
774    "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
775    "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
776    "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
777    "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
778    data, sizeof(data)/sizeof(data[0]));
779}
780
781#if 0
782/* superceded by TestBeforePinyin */
783static void TestJ784(void) {
784  const static char *data[] = {
785      "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
786      "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
787      "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
788      "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
789      "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
790      "\\u00fc",
791           "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
792  };
793  genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
794}
795#endif
796
797#if 0
798/* superceded by the changes to the lv locale */
799static void TestJ831(void) {
800  const static char *data[] = {
801    "I",
802      "i",
803      "Y",
804      "y"
805  };
806  genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
807}
808#endif
809
810static void TestJ815(void) {
811  const static char *data[] = {
812    "aa",
813      "Aa",
814      "ab",
815      "Ab",
816      "ad",
817      "Ad",
818      "ae",
819      "Ae",
820      "\\u00e6",
821      "\\u00c6",
822      "af",
823      "Af",
824      "b",
825      "B"
826  };
827  genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
828  genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
829}
830
831
832static void TestCase(void)
833{
834    const static UChar gRules[MAX_TOKEN_LEN] =
835    /*" & 0 < 1,\u2461<a,A"*/
836    { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
837
838    const static UChar testCase[][MAX_TOKEN_LEN] =
839    {
840        /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
841        /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
842        /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
843        /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
844    };
845
846    const static UCollationResult caseTestResults[][9] =
847    {
848        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
849        { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
850        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
851        { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
852    };
853
854    const static UColAttributeValue caseTestAttributes[][2] =
855    {
856        { UCOL_LOWER_FIRST, UCOL_OFF},
857        { UCOL_UPPER_FIRST, UCOL_OFF},
858        { UCOL_LOWER_FIRST, UCOL_ON},
859        { UCOL_UPPER_FIRST, UCOL_ON}
860    };
861    int32_t i,j,k;
862    UErrorCode status = U_ZERO_ERROR;
863    UCollationElements *iter;
864    UCollator  *myCollation;
865    myCollation = ucol_open("en_US", &status);
866
867    if(U_FAILURE(status)){
868        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
869        return;
870    }
871    log_verbose("Testing different case settings\n");
872    ucol_setStrength(myCollation, UCOL_TERTIARY);
873
874    for(k = 0; k<4; k++) {
875      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
876      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
877      log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
878      for (i = 0; i < 3 ; i++) {
879        for(j = i+1; j<4; j++) {
880          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
881        }
882      }
883    }
884    ucol_close(myCollation);
885
886    myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
887    if(U_FAILURE(status)){
888        log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
889        return;
890    }
891    log_verbose("Testing different case settings with custom rules\n");
892    ucol_setStrength(myCollation, UCOL_TERTIARY);
893
894    for(k = 0; k<4; k++) {
895      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
896      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
897      for (i = 0; i < 3 ; i++) {
898        for(j = i+1; j<4; j++) {
899          log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
900          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
901          iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
902          backAndForth(iter);
903          ucol_closeElements(iter);
904          iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
905          backAndForth(iter);
906          ucol_closeElements(iter);
907        }
908      }
909    }
910    ucol_close(myCollation);
911    {
912      const static char *lowerFirst[] = {
913        "h",
914        "H",
915        "ch",
916        "Ch",
917        "CH",
918        "cha",
919        "chA",
920        "Cha",
921        "ChA",
922        "CHa",
923        "CHA",
924        "i",
925        "I"
926      };
927
928      const static char *upperFirst[] = {
929        "H",
930        "h",
931        "CH",
932        "Ch",
933        "ch",
934        "CHA",
935        "CHa",
936        "ChA",
937        "Cha",
938        "chA",
939        "cha",
940        "I",
941        "i"
942      };
943      log_verbose("mixed case test\n");
944      log_verbose("lower first, case level off\n");
945      genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
946      log_verbose("upper first, case level off\n");
947      genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
948      log_verbose("lower first, case level on\n");
949      genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
950      log_verbose("upper first, case level on\n");
951      genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
952    }
953
954}
955
956static void TestIncrementalNormalize(void) {
957
958    /*UChar baseA     =0x61;*/
959    UChar baseA     =0x41;
960/*    UChar baseB     = 0x42;*/
961    static const UChar ccMix[]   = {0x316, 0x321, 0x300};
962    /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
963    /*
964        0x316 is combining grave accent below, cc=220
965        0x321 is combining palatalized hook below, cc=202
966        0x300 is combining grave accent, cc=230
967    */
968
969#define MAXSLEN 2000
970    /*int          maxSLen   = 64000;*/
971    int          sLen;
972    int          i;
973
974    UCollator        *coll;
975    UErrorCode       status = U_ZERO_ERROR;
976    UCollationResult result;
977
978    int32_t myQ = getTestOption(QUICK_OPTION);
979
980    if(getTestOption(QUICK_OPTION) < 0) {
981        setTestOption(QUICK_OPTION, 1);
982    }
983
984    {
985        /* Test 1.  Run very long unnormalized strings, to force overflow of*/
986        /*          most buffers along the way.*/
987        UChar            strA[MAXSLEN+1];
988        UChar            strB[MAXSLEN+1];
989
990        coll = ucol_open("en_US", &status);
991        if(status == U_FILE_ACCESS_ERROR) {
992          log_data_err("Is your data around?\n");
993          return;
994        } else if(U_FAILURE(status)) {
995          log_err("Error opening collator\n");
996          return;
997        }
998        ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
999
1000        /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
1001        /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
1002        /*for (sLen = 1000; sLen<1001; sLen++) {*/
1003        for (sLen = 500; sLen<501; sLen++) {
1004        /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
1005            strA[0] = baseA;
1006            strB[0] = baseA;
1007            for (i=1; i<=sLen-1; i++) {
1008                strA[i] = ccMix[i % 3];
1009                strB[sLen-i] = ccMix[i % 3];
1010            }
1011            strA[sLen]   = 0;
1012            strB[sLen]   = 0;
1013
1014            ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
1015            doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
1016            ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
1017            doTest(coll, strA, strB, UCOL_EQUAL);
1018        }
1019    }
1020
1021    setTestOption(QUICK_OPTION, myQ);
1022
1023
1024    /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
1025    /*         of the string.  Checks a couple of edge cases.*/
1026
1027    {
1028        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
1029        static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
1030        ucol_setStrength(coll, UCOL_TERTIARY);
1031        doTest(coll, strA, strB, UCOL_EQUAL);
1032    }
1033
1034    /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
1035
1036    {
1037      /* New UCA  3.1.1.
1038       * test below used a code point from Desseret, which sorts differently
1039       * than d800 dc00
1040       */
1041        /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
1042        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
1043        static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
1044        ucol_setStrength(coll, UCOL_TERTIARY);
1045        doTest(coll, strA, strB, UCOL_GREATER);
1046    }
1047
1048    /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
1049
1050    {
1051        static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
1052        static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
1053        char  sortKeyA[50];
1054        char  sortKeyAz[50];
1055        char  sortKeyB[50];
1056        char  sortKeyBz[50];
1057        int   r;
1058
1059        /* there used to be -3 here. Hmmmm.... */
1060        /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
1061        result = ucol_strcoll(coll, strA, 3, strB, 3);
1062        if (result != UCOL_GREATER) {
1063            log_err("ERROR 1 in test 4\n");
1064        }
1065        result = ucol_strcoll(coll, strA, -1, strB, -1);
1066        if (result != UCOL_EQUAL) {
1067            log_err("ERROR 2 in test 4\n");
1068        }
1069
1070        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1071        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1072        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1073        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1074
1075        r = strcmp(sortKeyA, sortKeyAz);
1076        if (r <= 0) {
1077            log_err("Error 3 in test 4\n");
1078        }
1079        r = strcmp(sortKeyA, sortKeyB);
1080        if (r <= 0) {
1081            log_err("Error 4 in test 4\n");
1082        }
1083        r = strcmp(sortKeyAz, sortKeyBz);
1084        if (r != 0) {
1085            log_err("Error 5 in test 4\n");
1086        }
1087
1088        ucol_setStrength(coll, UCOL_IDENTICAL);
1089        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1090        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1091        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1092        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1093
1094        r = strcmp(sortKeyA, sortKeyAz);
1095        if (r <= 0) {
1096            log_err("Error 6 in test 4\n");
1097        }
1098        r = strcmp(sortKeyA, sortKeyB);
1099        if (r <= 0) {
1100            log_err("Error 7 in test 4\n");
1101        }
1102        r = strcmp(sortKeyAz, sortKeyBz);
1103        if (r != 0) {
1104            log_err("Error 8 in test 4\n");
1105        }
1106        ucol_setStrength(coll, UCOL_TERTIARY);
1107    }
1108
1109
1110    /*  Test 5:  Null characters in non-normal source strings.*/
1111
1112    {
1113        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
1114        static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
1115        char  sortKeyA[50];
1116        char  sortKeyAz[50];
1117        char  sortKeyB[50];
1118        char  sortKeyBz[50];
1119        int   r;
1120
1121        result = ucol_strcoll(coll, strA, 6, strB, 6);
1122        if (result != UCOL_GREATER) {
1123            log_err("ERROR 1 in test 5\n");
1124        }
1125        result = ucol_strcoll(coll, strA, -1, strB, -1);
1126        if (result != UCOL_EQUAL) {
1127            log_err("ERROR 2 in test 5\n");
1128        }
1129
1130        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1131        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1132        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1133        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1134
1135        r = strcmp(sortKeyA, sortKeyAz);
1136        if (r <= 0) {
1137            log_err("Error 3 in test 5\n");
1138        }
1139        r = strcmp(sortKeyA, sortKeyB);
1140        if (r <= 0) {
1141            log_err("Error 4 in test 5\n");
1142        }
1143        r = strcmp(sortKeyAz, sortKeyBz);
1144        if (r != 0) {
1145            log_err("Error 5 in test 5\n");
1146        }
1147
1148        ucol_setStrength(coll, UCOL_IDENTICAL);
1149        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1150        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1151        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1152        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1153
1154        r = strcmp(sortKeyA, sortKeyAz);
1155        if (r <= 0) {
1156            log_err("Error 6 in test 5\n");
1157        }
1158        r = strcmp(sortKeyA, sortKeyB);
1159        if (r <= 0) {
1160            log_err("Error 7 in test 5\n");
1161        }
1162        r = strcmp(sortKeyAz, sortKeyBz);
1163        if (r != 0) {
1164            log_err("Error 8 in test 5\n");
1165        }
1166        ucol_setStrength(coll, UCOL_TERTIARY);
1167    }
1168
1169
1170    /*  Test 6:  Null character as base of a non-normal combining sequence.*/
1171
1172    {
1173        static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
1174        static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
1175
1176        result = ucol_strcoll(coll, strA, 5, strB, 5);
1177        if (result != UCOL_LESS) {
1178            log_err("Error 1 in test 6\n");
1179        }
1180        result = ucol_strcoll(coll, strA, -1, strB, -1);
1181        if (result != UCOL_EQUAL) {
1182            log_err("Error 2 in test 6\n");
1183        }
1184    }
1185
1186    ucol_close(coll);
1187}
1188
1189
1190
1191#if 0
1192static void TestGetCaseBit(void) {
1193  static const char *caseBitData[] = {
1194    "a", "A", "ch", "Ch", "CH",
1195      "\\uFF9E", "\\u0009"
1196  };
1197
1198  static const uint8_t results[] = {
1199    UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
1200      UCOL_UPPER_CASE, UCOL_LOWER_CASE
1201  };
1202
1203  uint32_t i, blen = 0;
1204  UChar b[256] = {0};
1205  UErrorCode status = U_ZERO_ERROR;
1206  UCollator *UCA = ucol_open("", &status);
1207  uint8_t res = 0;
1208
1209  for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
1210    blen = u_unescape(caseBitData[i], b, 256);
1211    res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
1212    if(results[i] != res) {
1213      log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
1214    }
1215  }
1216}
1217#endif
1218
1219static void TestHangulTailoring(void) {
1220    static const char *koreanData[] = {
1221        "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
1222            "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
1223            "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
1224            "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
1225            "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
1226            "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
1227    };
1228
1229    const char *rules =
1230        "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
1231        "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
1232        "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
1233        "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
1234        "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
1235        "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
1236
1237
1238  UErrorCode status = U_ZERO_ERROR;
1239  UChar rlz[2048] = { 0 };
1240  uint32_t rlen = u_unescape(rules, rlz, 2048);
1241
1242  UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
1243  if(status == U_FILE_ACCESS_ERROR) {
1244    log_data_err("Is your data around?\n");
1245    return;
1246  } else if(U_FAILURE(status)) {
1247    log_err("Error opening collator\n");
1248    return;
1249  }
1250
1251  log_verbose("Using start of korean rules\n");
1252
1253  if(U_SUCCESS(status)) {
1254    genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
1255  } else {
1256    log_err("Unable to open collator with rules %s\n", rules);
1257  }
1258
1259  ucol_close(coll);
1260
1261  log_verbose("Using ko__LOTUS locale\n");
1262  genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
1263}
1264
1265/*
1266 * The secondary/tertiary compression middle byte
1267 * as used by the current implementation.
1268 * Subject to change as the sort key compression changes.
1269 * See class CollationKeys.
1270 */
1271enum {
1272    SEC_COMMON_MIDDLE = 0x25,  /* range 05..45 */
1273    TER_ONLY_COMMON_MIDDLE = 0x65  /* range 05..C5 */
1274};
1275
1276static void TestCompressOverlap(void) {
1277    UChar       secstr[150];
1278    UChar       tertstr[150];
1279    UErrorCode  status = U_ZERO_ERROR;
1280    UCollator  *coll;
1281    uint8_t     result[500];
1282    uint32_t    resultlen;
1283    int         count = 0;
1284    uint8_t    *tempptr;
1285
1286    coll = ucol_open("", &status);
1287
1288    if (U_FAILURE(status)) {
1289        log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
1290        return;
1291    }
1292    while (count < 149) {
1293        secstr[count] = 0x0020; /* [06, 05, 05] */
1294        tertstr[count] = 0x0020;
1295        count ++;
1296    }
1297
1298    /* top down compression ----------------------------------- */
1299    secstr[count] = 0x0332; /* [, 87, 05] */
1300    tertstr[count] = 0x3000; /* [06, 05, 07] */
1301
1302    /* no compression secstr should have 150 secondary bytes, tertstr should
1303    have 150 tertiary bytes.
1304    with correct compression, secstr should have 6 secondary
1305    bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
1306    resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
1307    (void)resultlen;    /* Suppress set but not used warning. */
1308    tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1309    while (*(tempptr + 1) != 1) {
1310        /* the last secondary collation element is not checked since it is not
1311        part of the compression */
1312        if (*tempptr < SEC_COMMON_MIDDLE) {
1313            log_err("Secondary top down compression overlapped\n");
1314        }
1315        tempptr ++;
1316    }
1317
1318    /* tertiary top/bottom/common for en_US is similar to the secondary
1319    top/bottom/common */
1320    resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
1321    tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1322    while (*(tempptr + 1) != 0) {
1323        /* the last secondary collation element is not checked since it is not
1324        part of the compression */
1325        if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
1326            log_err("Tertiary top down compression overlapped\n");
1327        }
1328        tempptr ++;
1329    }
1330
1331    /* bottom up compression ------------------------------------- */
1332    secstr[count] = 0;
1333    tertstr[count] = 0;
1334    resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
1335    tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1336    while (*(tempptr + 1) != 1) {
1337        /* the last secondary collation element is not checked since it is not
1338        part of the compression */
1339        if (*tempptr > SEC_COMMON_MIDDLE) {
1340            log_err("Secondary bottom up compression overlapped\n");
1341        }
1342        tempptr ++;
1343    }
1344
1345    /* tertiary top/bottom/common for en_US is similar to the secondary
1346    top/bottom/common */
1347    resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
1348    tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1349    while (*(tempptr + 1) != 0) {
1350        /* the last secondary collation element is not checked since it is not
1351        part of the compression */
1352        if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
1353            log_err("Tertiary bottom up compression overlapped\n");
1354        }
1355        tempptr ++;
1356    }
1357
1358    ucol_close(coll);
1359}
1360
1361static void TestCyrillicTailoring(void) {
1362  static const char *test[] = {
1363    "\\u0410b",
1364      "\\u0410\\u0306a",
1365      "\\u04d0A"
1366  };
1367
1368    /* Russian overrides contractions, so this test is not valid anymore */
1369    /*genericLocaleStarter("ru", test, 3);*/
1370
1371    // Most of the following are commented out because UCA 8.0
1372    // drops most of the Cyrillic contractions from the default order.
1373    // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
1374
1375    // genericLocaleStarter("root", test, 3);
1376    // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
1377    // genericRulesStarter("&Z < \\u0410", test, 3);
1378    genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
1379    genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
1380    // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
1381    // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
1382}
1383
1384static void TestSuppressContractions(void) {
1385
1386  static const char *testNoCont2[] = {
1387      "\\u0410\\u0302a",
1388      "\\u0410\\u0306b",
1389      "\\u0410c"
1390  };
1391  static const char *testNoCont[] = {
1392      "a\\u0410",
1393      "A\\u0410\\u0306",
1394      "\\uFF21\\u0410\\u0302"
1395  };
1396
1397  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
1398  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
1399}
1400
1401static void TestContraction(void) {
1402    const static char *testrules[] = {
1403        "&A = AB / B",
1404        "&A = A\\u0306/\\u0306",
1405        "&c = ch / h"
1406    };
1407    const static UChar testdata[][2] = {
1408        {0x0041 /* 'A' */, 0x0042 /* 'B' */},
1409        {0x0041 /* 'A' */, 0x0306 /* combining breve */},
1410        {0x0063 /* 'c' */, 0x0068 /* 'h' */}
1411    };
1412    const static UChar testdata2[][2] = {
1413        {0x0063 /* 'c' */, 0x0067 /* 'g' */},
1414        {0x0063 /* 'c' */, 0x0068 /* 'h' */},
1415        {0x0063 /* 'c' */, 0x006C /* 'l' */}
1416    };
1417#if 0
1418    /*
1419     * These pairs of rule strings are not guaranteed to yield the very same mappings.
1420     * In fact, LDML 24 recommends an improved way of creating mappings
1421     * which always yields different mappings for such pairs. See
1422     * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1423     */
1424    const static char *testrules3[] = {
1425        "&z < xyz &xyzw << B",
1426        "&z < xyz &xyz << B / w",
1427        "&z < ch &achm << B",
1428        "&z < ch &a << B / chm",
1429        "&\\ud800\\udc00w << B",
1430        "&\\ud800\\udc00 << B / w",
1431        "&a\\ud800\\udc00m << B",
1432        "&a << B / \\ud800\\udc00m",
1433    };
1434#endif
1435
1436    UErrorCode  status   = U_ZERO_ERROR;
1437    UCollator  *coll;
1438    UChar       rule[256] = {0};
1439    uint32_t    rlen     = 0;
1440    int         i;
1441
1442    for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
1443        UCollationElements *iter1;
1444        int j = 0;
1445        log_verbose("Rule %s for testing\n", testrules[i]);
1446        rlen = u_unescape(testrules[i], rule, 32);
1447        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1448        if (U_FAILURE(status)) {
1449            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1450            return;
1451        }
1452        iter1 = ucol_openElements(coll, testdata[i], 2, &status);
1453        if (U_FAILURE(status)) {
1454            log_err("Collation iterator creation failed\n");
1455            return;
1456        }
1457        while (j < 2) {
1458            UCollationElements *iter2 = ucol_openElements(coll,
1459                                                         &(testdata[i][j]),
1460                                                         1, &status);
1461            uint32_t ce;
1462            if (U_FAILURE(status)) {
1463                log_err("Collation iterator creation failed\n");
1464                return;
1465            }
1466            ce = ucol_next(iter2, &status);
1467            while (ce != UCOL_NULLORDER) {
1468                if ((uint32_t)ucol_next(iter1, &status) != ce) {
1469                    log_err("Collation elements in contraction split does not match\n");
1470                    return;
1471                }
1472                ce = ucol_next(iter2, &status);
1473            }
1474            j ++;
1475            ucol_closeElements(iter2);
1476        }
1477        if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
1478            log_err("Collation elements not exhausted\n");
1479            return;
1480        }
1481        ucol_closeElements(iter1);
1482        ucol_close(coll);
1483    }
1484
1485    rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
1486    coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1487    if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
1488        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1489                testdata2[0][0], testdata2[0][1], testdata2[1][0],
1490                testdata2[1][1]);
1491        return;
1492    }
1493    if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
1494        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1495                testdata2[1][0], testdata2[1][1], testdata2[2][0],
1496                testdata2[2][1]);
1497        return;
1498    }
1499    ucol_close(coll);
1500#if 0  /* see above */
1501    for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
1502        log_verbose("testrules3 i==%d  \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
1503        UCollator          *coll1,
1504                           *coll2;
1505        UCollationElements *iter1,
1506                           *iter2;
1507        UChar               ch = 0x0042 /* 'B' */;
1508        uint32_t            ce;
1509        rlen = u_unescape(testrules3[i], rule, 32);
1510        coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1511        rlen = u_unescape(testrules3[i + 1], rule, 32);
1512        coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1513        if (U_FAILURE(status)) {
1514            log_err("Collator creation failed %s\n", testrules[i]);
1515            return;
1516        }
1517        iter1 = ucol_openElements(coll1, &ch, 1, &status);
1518        iter2 = ucol_openElements(coll2, &ch, 1, &status);
1519        if (U_FAILURE(status)) {
1520            log_err("Collation iterator creation failed\n");
1521            return;
1522        }
1523        ce = ucol_next(iter1, &status);
1524        if (U_FAILURE(status)) {
1525            log_err("Retrieving ces failed\n");
1526            return;
1527        }
1528        while (ce != UCOL_NULLORDER) {
1529            uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
1530            if (ce == ce2) {
1531                log_verbose("CEs match: %08x\n", ce);
1532            } else {
1533                log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
1534                return;
1535            }
1536            ce = ucol_next(iter1, &status);
1537            if (U_FAILURE(status)) {
1538                log_err("Retrieving ces failed\n");
1539                return;
1540            }
1541        }
1542        if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
1543            log_err("CEs not exhausted\n");
1544            return;
1545        }
1546        ucol_closeElements(iter1);
1547        ucol_closeElements(iter2);
1548        ucol_close(coll1);
1549        ucol_close(coll2);
1550    }
1551#endif
1552}
1553
1554static void TestExpansion(void) {
1555    const static char *testrules[] = {
1556#if 0
1557        /*
1558         * This seems to have tested that M was not mapped to an expansion.
1559         * I believe the old builder just did that because it computed the extension CEs
1560         * at the very end, which was a bug.
1561         * Among other problems, it violated the core tailoring principle
1562         * by making an earlier rule depend on a later one.
1563         * And, of course, if M did not get an expansion, then it was primary different from K,
1564         * unlike what the rule &K<<M says.
1565         */
1566        "&J << K / B & K << M",
1567#endif
1568        "&J << K / B << M"
1569    };
1570    const static UChar testdata[][3] = {
1571        {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
1572        {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
1573        {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
1574        {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
1575        {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
1576        {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
1577    };
1578
1579    UErrorCode  status   = U_ZERO_ERROR;
1580    UCollator  *coll;
1581    UChar       rule[256] = {0};
1582    uint32_t    rlen     = 0;
1583    int         i;
1584
1585    for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
1586        int j = 0;
1587        log_verbose("Rule %s for testing\n", testrules[i]);
1588        rlen = u_unescape(testrules[i], rule, 32);
1589        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1590        if (U_FAILURE(status)) {
1591            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1592            return;
1593        }
1594
1595        for (j = 0; j < 5; j ++) {
1596            doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
1597        }
1598        ucol_close(coll);
1599    }
1600}
1601
1602#if 0
1603/* this test tests the current limitations of the engine */
1604/* it always fail, so it is disabled by default */
1605static void TestLimitations(void) {
1606  /* recursive expansions */
1607  {
1608    static const char *rule = "&a=b/c&d=c/e";
1609    static const char *tlimit01[] = {"add","b","adf"};
1610    static const char *tlimit02[] = {"aa","b","af"};
1611    log_verbose("recursive expansions\n");
1612    genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
1613    genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
1614  }
1615  /* contractions spanning expansions */
1616  {
1617    static const char *rule = "&a<<<c/e&g<<<eh";
1618    static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
1619    static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
1620    log_verbose("contractions spanning expansions\n");
1621    genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
1622    genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
1623  }
1624  /* normalization: nulls in contractions */
1625  {
1626    static const char *rule = "&a<<<\\u0000\\u0302";
1627    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1628    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1629    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1630    static const UColAttributeValue valOn[] = { UCOL_ON };
1631    static const UColAttributeValue valOff[] = { UCOL_OFF };
1632
1633    log_verbose("NULL in contractions\n");
1634    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1635    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1636    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1637    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1638
1639  }
1640  /* normalization: contractions spanning normalization */
1641  {
1642    static const char *rule = "&a<<<\\u0000\\u0302";
1643    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1644    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1645    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1646    static const UColAttributeValue valOn[] = { UCOL_ON };
1647    static const UColAttributeValue valOff[] = { UCOL_OFF };
1648
1649    log_verbose("contractions spanning normalization\n");
1650    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1651    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1652    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1653    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1654
1655  }
1656  /* variable top:  */
1657  {
1658    /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
1659    static const char *rule = "&\\u2010<x<[variable top]=z";
1660    /*static const char *rule3 = "&' '<x<[variable top]=z";*/
1661    static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
1662    static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
1663    static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
1664    static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
1665    static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
1666    static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
1667
1668    log_verbose("variable top\n");
1669    genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1670    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1671    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1672    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
1673    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
1674
1675  }
1676  /* case level */
1677  {
1678    static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
1679    static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
1680    static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
1681    static const UColAttribute att[] = { UCOL_CASE_FIRST};
1682    static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
1683    /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
1684    log_verbose("case level\n");
1685    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1686    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1687    /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
1688    /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
1689  }
1690
1691}
1692#endif
1693
1694static void TestBocsuCoverage(void) {
1695  UErrorCode status = U_ZERO_ERROR;
1696  const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
1697  UChar       test[256] = {0};
1698  uint32_t    tlen     = u_unescape(testString, test, 32);
1699  uint8_t key[256]     = {0};
1700  uint32_t klen         = 0;
1701
1702  UCollator *coll = ucol_open("", &status);
1703  if(U_SUCCESS(status)) {
1704  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
1705
1706  klen = ucol_getSortKey(coll, test, tlen, key, 256);
1707  (void)klen;    /* Suppress set but not used warning. */
1708
1709  ucol_close(coll);
1710  } else {
1711    log_data_err("Couldn't open UCA\n");
1712  }
1713}
1714
1715static void TestVariableTopSetting(void) {
1716  UErrorCode status = U_ZERO_ERROR;
1717  uint32_t varTopOriginal = 0, varTop1, varTop2;
1718  UCollator *coll = ucol_open("", &status);
1719  if(U_SUCCESS(status)) {
1720
1721  static const UChar nul = 0;
1722  static const UChar space = 0x20;
1723  static const UChar dot = 0x2e;  /* punctuation */
1724  static const UChar degree = 0xb0;  /* symbol */
1725  static const UChar dollar = 0x24;  /* currency symbol */
1726  static const UChar zero = 0x30;  /* digit */
1727
1728  varTopOriginal = ucol_getVariableTop(coll, &status);
1729  log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
1730  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1731
1732  varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1733  varTop2 = ucol_getVariableTop(coll, &status);
1734  log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
1735  if(U_FAILURE(status) || varTop1 != varTop2 ||
1736      !ucol_equal(coll, &nul, 0, &space, 1) ||
1737      ucol_equal(coll, &nul, 0, &dot, 1) ||
1738      ucol_equal(coll, &nul, 0, &degree, 1) ||
1739      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1740      ucol_equal(coll, &nul, 0, &zero, 1) ||
1741      ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1742    log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
1743  }
1744
1745  varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
1746  varTop2 = ucol_getVariableTop(coll, &status);
1747  log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
1748  if(U_FAILURE(status) || varTop1 != varTop2 ||
1749      !ucol_equal(coll, &nul, 0, &space, 1) ||
1750      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1751      ucol_equal(coll, &nul, 0, &degree, 1) ||
1752      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1753      ucol_equal(coll, &nul, 0, &zero, 1) ||
1754      ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1755    log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
1756  }
1757
1758  varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
1759  varTop2 = ucol_getVariableTop(coll, &status);
1760  log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
1761  if(U_FAILURE(status) || varTop1 != varTop2 ||
1762      !ucol_equal(coll, &nul, 0, &space, 1) ||
1763      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1764      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1765      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1766      ucol_equal(coll, &nul, 0, &zero, 1) ||
1767      ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1768    log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
1769  }
1770
1771  varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
1772  varTop2 = ucol_getVariableTop(coll, &status);
1773  log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
1774  if(U_FAILURE(status) || varTop1 != varTop2 ||
1775      !ucol_equal(coll, &nul, 0, &space, 1) ||
1776      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1777      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1778      !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1779      ucol_equal(coll, &nul, 0, &zero, 1) ||
1780      ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1781    log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
1782  }
1783
1784  log_verbose("Testing setting variable top to contractions\n");
1785  {
1786    UChar first[4] = { 0 };
1787    first[0] = 0x0040;
1788    first[1] = 0x0050;
1789    first[2] = 0x0000;
1790
1791    status = U_ZERO_ERROR;
1792    ucol_setVariableTop(coll, first, -1, &status);
1793
1794    if(U_SUCCESS(status)) {
1795      log_err("Invalid contraction succeded in setting variable top!\n");
1796    }
1797
1798  }
1799
1800  log_verbose("Test restoring variable top\n");
1801
1802  status = U_ZERO_ERROR;
1803  ucol_restoreVariableTop(coll, varTopOriginal, &status);
1804  if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
1805    log_err("Couldn't restore old variable top\n");
1806  }
1807
1808  log_verbose("Testing calling with error set\n");
1809
1810  status = U_INTERNAL_PROGRAM_ERROR;
1811  varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1812  varTop2 = ucol_getVariableTop(coll, &status);
1813  ucol_restoreVariableTop(coll, varTop2, &status);
1814  varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
1815  varTop2 = ucol_getVariableTop(NULL, &status);
1816  ucol_restoreVariableTop(NULL, varTop2, &status);
1817  if(status != U_INTERNAL_PROGRAM_ERROR) {
1818    log_err("Bad reaction to passed error!\n");
1819  }
1820  ucol_close(coll);
1821  } else {
1822    log_data_err("Couldn't open UCA collator\n");
1823  }
1824}
1825
1826static void TestMaxVariable() {
1827  UErrorCode status = U_ZERO_ERROR;
1828  UColReorderCode oldMax, max;
1829  UCollator *coll;
1830
1831  static const UChar nul = 0;
1832  static const UChar space = 0x20;
1833  static const UChar dot = 0x2e;  /* punctuation */
1834  static const UChar degree = 0xb0;  /* symbol */
1835  static const UChar dollar = 0x24;  /* currency symbol */
1836  static const UChar zero = 0x30;  /* digit */
1837
1838  coll = ucol_open("", &status);
1839  if(U_FAILURE(status)) {
1840    log_data_err("Couldn't open root collator\n");
1841    return;
1842  }
1843
1844  oldMax = ucol_getMaxVariable(coll);
1845  log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
1846  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1847
1848  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1849  max = ucol_getMaxVariable(coll);
1850  log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
1851  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
1852      !ucol_equal(coll, &nul, 0, &space, 1) ||
1853      ucol_equal(coll, &nul, 0, &dot, 1) ||
1854      ucol_equal(coll, &nul, 0, &degree, 1) ||
1855      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1856      ucol_equal(coll, &nul, 0, &zero, 1) ||
1857      ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1858    log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
1859  }
1860
1861  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
1862  max = ucol_getMaxVariable(coll);
1863  log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
1864  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
1865      !ucol_equal(coll, &nul, 0, &space, 1) ||
1866      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1867      ucol_equal(coll, &nul, 0, &degree, 1) ||
1868      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1869      ucol_equal(coll, &nul, 0, &zero, 1) ||
1870      ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1871    log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
1872  }
1873
1874  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
1875  max = ucol_getMaxVariable(coll);
1876  log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
1877  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
1878      !ucol_equal(coll, &nul, 0, &space, 1) ||
1879      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1880      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1881      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1882      ucol_equal(coll, &nul, 0, &zero, 1) ||
1883      ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1884    log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
1885  }
1886
1887  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
1888  max = ucol_getMaxVariable(coll);
1889  log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
1890  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
1891      !ucol_equal(coll, &nul, 0, &space, 1) ||
1892      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1893      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1894      !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1895      ucol_equal(coll, &nul, 0, &zero, 1) ||
1896      ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1897    log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
1898  }
1899
1900  log_verbose("Test restoring maxVariable\n");
1901  status = U_ZERO_ERROR;
1902  ucol_setMaxVariable(coll, oldMax, &status);
1903  if(oldMax != ucol_getMaxVariable(coll)) {
1904    log_err("Couldn't restore old maxVariable\n");
1905  }
1906
1907  log_verbose("Testing calling with error set\n");
1908  status = U_INTERNAL_PROGRAM_ERROR;
1909  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1910  max = ucol_getMaxVariable(coll);
1911  if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
1912    log_err("Bad reaction to passed error!\n");
1913  }
1914  ucol_close(coll);
1915}
1916
1917static void TestNonChars(void) {
1918  static const char *test[] = {
1919      "\\u0000",  /* ignorable */
1920      "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
1921      "\\uFDD0", "\\uFDEF",
1922      "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
1923      "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
1924      "\\U0003FFFE", "\\U0003FFFF",
1925      "\\U0004FFFE", "\\U0004FFFF",
1926      "\\U0005FFFE", "\\U0005FFFF",
1927      "\\U0006FFFE", "\\U0006FFFF",
1928      "\\U0007FFFE", "\\U0007FFFF",
1929      "\\U0008FFFE", "\\U0008FFFF",
1930      "\\U0009FFFE", "\\U0009FFFF",
1931      "\\U000AFFFE", "\\U000AFFFF",
1932      "\\U000BFFFE", "\\U000BFFFF",
1933      "\\U000CFFFE", "\\U000CFFFF",
1934      "\\U000DFFFE", "\\U000DFFFF",
1935      "\\U000EFFFE", "\\U000EFFFF",
1936      "\\U000FFFFE", "\\U000FFFFF",
1937      "\\U0010FFFE", "\\U0010FFFF",
1938      "\\uFFFF"  /* special character with maximum primary weight */
1939  };
1940  UErrorCode status = U_ZERO_ERROR;
1941  UCollator *coll = ucol_open("en_US", &status);
1942
1943  log_verbose("Test non characters\n");
1944
1945  if(U_SUCCESS(status)) {
1946    genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
1947  } else {
1948    log_err_status(status, "Unable to open collator\n");
1949  }
1950
1951  ucol_close(coll);
1952}
1953
1954static void TestExtremeCompression(void) {
1955  static char *test[4];
1956  int32_t j = 0, i = 0;
1957
1958  for(i = 0; i<4; i++) {
1959    test[i] = (char *)malloc(2048*sizeof(char));
1960  }
1961
1962  for(j = 20; j < 500; j++) {
1963    for(i = 0; i<4; i++) {
1964      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1965      test[i][j-1] = (char)('a'+i);
1966      test[i][j] = 0;
1967    }
1968    genericLocaleStarter("en_US", (const char **)test, 4);
1969  }
1970
1971
1972  for(i = 0; i<4; i++) {
1973    free(test[i]);
1974  }
1975}
1976
1977#if 0
1978static void TestExtremeCompression(void) {
1979  static char *test[4];
1980  int32_t j = 0, i = 0;
1981  UErrorCode status = U_ZERO_ERROR;
1982  UCollator *coll = ucol_open("en_US", status);
1983  for(i = 0; i<4; i++) {
1984    test[i] = (char *)malloc(2048*sizeof(char));
1985  }
1986  for(j = 10; j < 2048; j++) {
1987    for(i = 0; i<4; i++) {
1988      uprv_memset(test[i], 'a', (j-2)*sizeof(char));
1989      test[i][j-1] = (char)('a'+i);
1990      test[i][j] = 0;
1991    }
1992  }
1993  genericLocaleStarter("en_US", (const char **)test, 4);
1994
1995  for(j = 10; j < 2048; j++) {
1996    for(i = 0; i<1; i++) {
1997      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1998      test[i][j] = 0;
1999    }
2000  }
2001  for(i = 0; i<4; i++) {
2002    free(test[i]);
2003  }
2004}
2005#endif
2006
2007static void TestSurrogates(void) {
2008  static const char *test[] = {
2009    "z","\\ud900\\udc25",  "\\ud805\\udc50",
2010       "\\ud800\\udc00y",  "\\ud800\\udc00r",
2011       "\\ud800\\udc00f",  "\\ud800\\udc00",
2012       "\\ud800\\udc00c", "\\ud800\\udc00b",
2013       "\\ud800\\udc00fa", "\\ud800\\udc00fb",
2014       "\\ud800\\udc00a",
2015       "c", "b"
2016  };
2017
2018  static const char *rule =
2019    "&z < \\ud900\\udc25   < \\ud805\\udc50"
2020       "< \\ud800\\udc00y  < \\ud800\\udc00r"
2021       "< \\ud800\\udc00f  << \\ud800\\udc00"
2022       "< \\ud800\\udc00fa << \\ud800\\udc00fb"
2023       "< \\ud800\\udc00a  < c < b" ;
2024
2025  genericRulesStarter(rule, test, 14);
2026}
2027
2028/* This is a test for prefix implementation, used by JIS X 4061 collation rules */
2029static void TestPrefix(void) {
2030  uint32_t i;
2031
2032  static const struct {
2033    const char *rules;
2034    const char *data[50];
2035    const uint32_t len;
2036  } tests[] = {
2037    { "&z <<< z|a",
2038      {"zz", "za"}, 2 },
2039
2040    { "&z <<< z|   a",
2041      {"zz", "za"}, 2 },
2042    { "[strength I]"
2043      "&a=\\ud900\\udc25"
2044      "&z<<<\\ud900\\udc25|a",
2045      {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
2046  };
2047
2048
2049  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2050    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2051  }
2052}
2053
2054/* This test uses data suplied by Masashiko Maedera to test the implementation */
2055/* JIS X 4061 collation order implementation                                   */
2056static void TestNewJapanese(void) {
2057
2058  static const char * const test1[] = {
2059      "\\u30b7\\u30e3\\u30fc\\u30ec",
2060      "\\u30b7\\u30e3\\u30a4",
2061      "\\u30b7\\u30e4\\u30a3",
2062      "\\u30b7\\u30e3\\u30ec",
2063      "\\u3061\\u3087\\u3053",
2064      "\\u3061\\u3088\\u3053",
2065      "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
2066      "\\u3066\\u30fc\\u305f",
2067      "\\u30c6\\u30fc\\u30bf",
2068      "\\u30c6\\u30a7\\u30bf",
2069      "\\u3066\\u3048\\u305f",
2070      "\\u3067\\u30fc\\u305f",
2071      "\\u30c7\\u30fc\\u30bf",
2072      "\\u30c7\\u30a7\\u30bf",
2073      "\\u3067\\u3048\\u305f",
2074      "\\u3066\\u30fc\\u305f\\u30fc",
2075      "\\u30c6\\u30fc\\u30bf\\u30a1",
2076      "\\u30c6\\u30a7\\u30bf\\u30fc",
2077      "\\u3066\\u3047\\u305f\\u3041",
2078      "\\u3066\\u3048\\u305f\\u30fc",
2079      "\\u3067\\u30fc\\u305f\\u30fc",
2080      "\\u30c7\\u30fc\\u30bf\\u30a1",
2081      "\\u3067\\u30a7\\u305f\\u30a1",
2082      "\\u30c7\\u3047\\u30bf\\u3041",
2083      "\\u30c7\\u30a8\\u30bf\\u30a2",
2084      "\\u3072\\u3086",
2085      "\\u3073\\u3085\\u3042",
2086      "\\u3074\\u3085\\u3042",
2087      "\\u3073\\u3085\\u3042\\u30fc",
2088      "\\u30d3\\u30e5\\u30a2\\u30fc",
2089      "\\u3074\\u3085\\u3042\\u30fc",
2090      "\\u30d4\\u30e5\\u30a2\\u30fc",
2091      "\\u30d2\\u30e5\\u30a6",
2092      "\\u30d2\\u30e6\\u30a6",
2093      "\\u30d4\\u30e5\\u30a6\\u30a2",
2094      "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
2095      "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
2096      "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
2097      "\\u3072\\u3085\\u3093",
2098      "\\u3074\\u3085\\u3093",
2099      "\\u3075\\u30fc\\u308a",
2100      "\\u30d5\\u30fc\\u30ea",
2101      "\\u3075\\u3045\\u308a",
2102      "\\u3075\\u30a5\\u308a",
2103      "\\u3075\\u30a5\\u30ea",
2104      "\\u30d5\\u30a6\\u30ea",
2105      "\\u3076\\u30fc\\u308a",
2106      "\\u30d6\\u30fc\\u30ea",
2107      "\\u3076\\u3045\\u308a",
2108      "\\u30d6\\u30a5\\u308a",
2109      "\\u3077\\u3046\\u308a",
2110      "\\u30d7\\u30a6\\u30ea",
2111      "\\u3075\\u30fc\\u308a\\u30fc",
2112      "\\u30d5\\u30a5\\u30ea\\u30fc",
2113      "\\u3075\\u30a5\\u308a\\u30a3",
2114      "\\u30d5\\u3045\\u308a\\u3043",
2115      "\\u30d5\\u30a6\\u30ea\\u30fc",
2116      "\\u3075\\u3046\\u308a\\u3043",
2117      "\\u30d6\\u30a6\\u30ea\\u30a4",
2118      "\\u3077\\u30fc\\u308a\\u30fc",
2119      "\\u3077\\u30a5\\u308a\\u30a4",
2120      "\\u3077\\u3046\\u308a\\u30fc",
2121      "\\u30d7\\u30a6\\u30ea\\u30a4",
2122      "\\u30d5\\u30fd",
2123      "\\u3075\\u309e",
2124      "\\u3076\\u309d",
2125      "\\u3076\\u3075",
2126      "\\u3076\\u30d5",
2127      "\\u30d6\\u3075",
2128      "\\u30d6\\u30d5",
2129      "\\u3076\\u309e",
2130      "\\u3076\\u3077",
2131      "\\u30d6\\u3077",
2132      "\\u3077\\u309d",
2133      "\\u30d7\\u30fd",
2134      "\\u3077\\u3075",
2135};
2136
2137  static const char *test2[] = {
2138    "\\u306f\\u309d", /* H\\u309d */
2139    "\\u30cf\\u30fd", /* K\\u30fd */
2140    "\\u306f\\u306f", /* HH */
2141    "\\u306f\\u30cf", /* HK */
2142    "\\u30cf\\u30cf", /* KK */
2143    "\\u306f\\u309e", /* H\\u309e */
2144    "\\u30cf\\u30fe", /* K\\u30fe */
2145    "\\u306f\\u3070", /* HH\\u309b */
2146    "\\u30cf\\u30d0", /* KK\\u309b */
2147    "\\u306f\\u3071", /* HH\\u309c */
2148    "\\u30cf\\u3071", /* KH\\u309c */
2149    "\\u30cf\\u30d1", /* KK\\u309c */
2150    "\\u3070\\u309d", /* H\\u309b\\u309d */
2151    "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
2152    "\\u3070\\u306f", /* H\\u309bH */
2153    "\\u30d0\\u30cf", /* K\\u309bK */
2154    "\\u3070\\u309e", /* H\\u309b\\u309e */
2155    "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
2156    "\\u3070\\u3070", /* H\\u309bH\\u309b */
2157    "\\u30d0\\u3070", /* K\\u309bH\\u309b */
2158    "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
2159    "\\u3070\\u3071", /* H\\u309bH\\u309c */
2160    "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
2161    "\\u3071\\u309d", /* H\\u309c\\u309d */
2162    "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
2163    "\\u3071\\u306f", /* H\\u309cH */
2164    "\\u30d1\\u30cf", /* K\\u309cK */
2165    "\\u3071\\u3070", /* H\\u309cH\\u309b */
2166    "\\u3071\\u30d0", /* H\\u309cK\\u309b */
2167    "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
2168    "\\u3071\\u3071", /* H\\u309cH\\u309c */
2169    "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
2170  };
2171  /*
2172  static const char *test3[] = {
2173    "\\u221er\\u221e",
2174    "\\u221eR#",
2175    "\\u221et\\u221e",
2176    "#r\\u221e",
2177    "#R#",
2178    "#t%",
2179    "#T%",
2180    "8t\\u221e",
2181    "8T\\u221e",
2182    "8t#",
2183    "8T#",
2184    "8t%",
2185    "8T%",
2186    "8t8",
2187    "8T8",
2188    "\\u03c9r\\u221e",
2189    "\\u03a9R%",
2190    "rr\\u221e",
2191    "rR\\u221e",
2192    "Rr\\u221e",
2193    "RR\\u221e",
2194    "RT%",
2195    "rt8",
2196    "tr\\u221e",
2197    "tr8",
2198    "TR8",
2199    "tt8",
2200    "\\u30b7\\u30e3\\u30fc\\u30ec",
2201  };
2202  */
2203  static const UColAttribute att[] = { UCOL_STRENGTH };
2204  static const UColAttributeValue val[] = { UCOL_QUATERNARY };
2205
2206  static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
2207  static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
2208
2209  genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
2210  genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
2211  /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
2212  genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
2213  genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
2214}
2215
2216static void TestStrCollIdenticalPrefix(void) {
2217  const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
2218  const char* test[] = {
2219    "ab\\ud9b0\\udc70",
2220    "ab\\ud9b0\\udc71"
2221  };
2222  genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
2223}
2224/* Contractions should have all their canonically equivalent */
2225/* strings included */
2226static void TestContractionClosure(void) {
2227  static const struct {
2228    const char *rules;
2229    const char *data[10];
2230    const uint32_t len;
2231  } tests[] = {
2232    {   "&b=\\u00e4\\u00e4",
2233      { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
2234    {   "&b=\\u00C5",
2235      { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
2236  };
2237  uint32_t i;
2238
2239
2240  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2241    genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
2242  }
2243}
2244
2245/* This tests also fails*/
2246static void TestBeforePrefixFailure(void) {
2247  static const struct {
2248    const char *rules;
2249    const char *data[10];
2250    const uint32_t len;
2251  } tests[] = {
2252    { "&g <<< a"
2253      "&[before 3]\\uff41 <<< x",
2254      {"x", "\\uff41"}, 2 },
2255    {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2256        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2257        "&[before 3]\\u30a7<<<\\u30a9",
2258      {"\\u30a9", "\\u30a7"}, 2 },
2259    {   "&[before 3]\\u30a7<<<\\u30a9"
2260        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2261        "&\\u30A8=\\u30A8=\\u3048=\\uff74",
2262      {"\\u30a9", "\\u30a7"}, 2 },
2263  };
2264  uint32_t i;
2265
2266
2267  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2268    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2269  }
2270
2271#if 0
2272  const char* rule1 =
2273        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2274        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2275        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
2276  const char* rule2 =
2277        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
2278        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2279        "&\\u30A8=\\u30A8=\\u3048=\\uff74";
2280  const char* test[] = {
2281      "\\u30c6\\u30fc\\u30bf",
2282      "\\u30c6\\u30a7\\u30bf",
2283  };
2284  genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
2285  genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
2286/* this piece of code should be in some sort of verbose mode     */
2287/* it gets the collation elements for elements and prints them   */
2288/* This is useful when trying to see whether the problem is      */
2289  {
2290    UErrorCode status = U_ZERO_ERROR;
2291    uint32_t i = 0;
2292    UCollationElements *it = NULL;
2293    uint32_t CE;
2294    UChar string[256];
2295    uint32_t uStringLen;
2296    UCollator *coll = NULL;
2297
2298    uStringLen = u_unescape(rule1, string, 256);
2299
2300    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2301
2302    /*coll = ucol_open("ja_JP_JIS", &status);*/
2303    it = ucol_openElements(coll, string, 0, &status);
2304
2305    for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
2306      log_verbose("%s\n", test[i]);
2307      uStringLen = u_unescape(test[i], string, 256);
2308      ucol_setText(it, string, uStringLen, &status);
2309
2310      while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
2311        log_verbose("%08X\n", CE);
2312      }
2313      log_verbose("\n");
2314
2315    }
2316
2317    ucol_closeElements(it);
2318    ucol_close(coll);
2319  }
2320#endif
2321}
2322
2323static void TestPrefixCompose(void) {
2324  const char* rule1 =
2325        "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
2326  /*
2327  const char* test[] = {
2328      "\\u30c6\\u30fc\\u30bf",
2329      "\\u30c6\\u30a7\\u30bf",
2330  };
2331  */
2332  {
2333    UErrorCode status = U_ZERO_ERROR;
2334    /*uint32_t i = 0;*/
2335    /*UCollationElements *it = NULL;*/
2336/*    uint32_t CE;*/
2337    UChar string[256];
2338    uint32_t uStringLen;
2339    UCollator *coll = NULL;
2340
2341    uStringLen = u_unescape(rule1, string, 256);
2342
2343    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2344    ucol_close(coll);
2345  }
2346
2347
2348}
2349
2350/*
2351[last variable] last variable value
2352[last primary ignorable] largest CE for primary ignorable
2353[last secondary ignorable] largest CE for secondary ignorable
2354[last tertiary ignorable] largest CE for tertiary ignorable
2355[top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
2356*/
2357
2358static void TestRuleOptions(void) {
2359  /* values here are hardcoded and are correct for the current UCA
2360   * when the UCA changes, one might be forced to change these
2361   * values.
2362   */
2363
2364  /*
2365   * These strings contain the last character before [variable top]
2366   * and the first and second characters (by primary weights) after it.
2367   * See FractionalUCA.txt. For example:
2368      [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
2369      [variable top = 0C FE]
2370      [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
2371     and
2372      00B4; [0D 0C, 05, 05]
2373   *
2374   * Note: Starting with UCA 6.0, the [variable top] collation element
2375   * is not the weight of any character or string,
2376   * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
2377   */
2378#define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
2379#define FIRST_REGULAR_CHAR_STRING "\\u0060"
2380#define SECOND_REGULAR_CHAR_STRING "\\u00B4"
2381
2382  /*
2383   * This string has to match the character that has the [last regular] weight
2384   * which changes with each UCA version.
2385   * See the bottom of FractionalUCA.txt which says something like
2386      [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
2387   *
2388   * Note: Starting with UCA 6.0, the [last regular] collation element
2389   * is not the weight of any character or string,
2390   * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
2391   */
2392#define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2393
2394  static const struct {
2395    const char *rules;
2396    const char *data[10];
2397    const uint32_t len;
2398  } tests[] = {
2399#if 0
2400    /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
2401    /* - all befores here amount to zero */
2402    { "&[before 3][first tertiary ignorable]<<<a",
2403        { "\\u0000", "a"}, 2
2404    }, /* you cannot go before first tertiary ignorable */
2405
2406    { "&[before 3][last tertiary ignorable]<<<a",
2407        { "\\u0000", "a"}, 2
2408    }, /* you cannot go before last tertiary ignorable */
2409#endif
2410    /*
2411     * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
2412     * and it *is* possible to "go before" that.
2413     */
2414    { "&[before 3][first secondary ignorable]<<<a",
2415        { "\\u0000", "a"}, 2
2416    },
2417
2418    { "&[before 3][last secondary ignorable]<<<a",
2419        { "\\u0000", "a"}, 2
2420    },
2421
2422    /* 'normal' befores */
2423
2424    /*
2425     * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2426     * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
2427     * because there is no tailoring space before that boundary.
2428     * Made the tests work by tailoring to a space instead.
2429     */
2430    { "&[before 3][first primary ignorable]<<<c<<<b &' '<a",  /* was &[first primary ignorable]<a */
2431        {  "c", "b", "\\u0332", "a" }, 4
2432    },
2433
2434    /* we don't have a code point that corresponds to
2435     * the last primary ignorable
2436     */
2437    { "&[before 3][last primary ignorable]<<<c<<<b &' '<a",  /* was &[last primary ignorable]<a */
2438        {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
2439    },
2440
2441    { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
2442        {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
2443    },
2444
2445    { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
2446        { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
2447    },
2448
2449    { "&[first regular]<a"
2450      "&[before 1][first regular]<b",
2451      { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
2452    },
2453
2454    { "&[before 1][last regular]<b"
2455      "&[last regular]<a",
2456        { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
2457    },
2458
2459    { "&[before 1][first implicit]<b"
2460      "&[first implicit]<a",
2461        { "b", "\\u4e00", "a", "\\u4e01"}, 4
2462    },
2463#if 0  /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
2464    { "&[before 1][last implicit]<b"
2465      "&[last implicit]<a",
2466        { "b", "\\U0010FFFD", "a" }, 3
2467    },
2468#endif
2469    { "&[last variable]<z"
2470      "&' '<x"  /* was &[last primary ignorable]<x, see above */
2471      "&[last secondary ignorable]<<y"
2472      "&[last tertiary ignorable]<<<w"
2473      "&[top]<u",
2474      {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
2475    }
2476
2477  };
2478  uint32_t i;
2479
2480  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2481    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2482  }
2483}
2484
2485
2486static void TestOptimize(void) {
2487  /* this is not really a test - just trying out
2488   * whether copying of UCA contents will fail
2489   * Cannot really test, since the functionality
2490   * remains the same.
2491   */
2492  static const struct {
2493    const char *rules;
2494    const char *data[10];
2495    const uint32_t len;
2496  } tests[] = {
2497    /* - all befores here amount to zero */
2498    { "[optimize [\\uAC00-\\uD7FF]]",
2499    { "a", "b"}, 2}
2500  };
2501  uint32_t i;
2502
2503  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2504    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2505  }
2506}
2507
2508/*
2509cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
2510weiv    ucol_strcollIter?
2511cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
2512weiv    these are the input strings?
2513cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
2514weiv    will check - could be a problem with utf-8 iterator
2515cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
2516weiv    hmmm
2517cycheng@ca.ibm.c... note that we have a standalone high surrogate
2518weiv    that doesn't sound right
2519cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
2520weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
2521cycheng@ca.ibm.c... yes
2522weiv    and then do the comparison
2523cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
2524weiv    utf-16 strings look like a little endian ones in the example you sent me
2525weiv    It could be a bug - let me try to test it out
2526cycheng@ca.ibm.c... ok
2527cycheng@ca.ibm.c... we can wait till the conf. call
2528cycheng@ca.ibm.c... next weke
2529weiv    that would be great
2530weiv    hmmm
2531weiv    I might be wrong
2532weiv    let me play with it some more
2533cycheng@ca.ibm.c... ok
2534cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
2535cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
2536cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
2537weiv    ok
2538cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
2539weiv    thanks
2540cycheng@ca.ibm.c... the 4 strings we sent are just samples
2541*/
2542#if 0
2543static void Alexis(void) {
2544  UErrorCode status = U_ZERO_ERROR;
2545  UCollator *coll = ucol_open("", &status);
2546
2547
2548  const char utf16be[2][4] = {
2549    { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
2550    { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
2551  };
2552
2553  const char utf8[2][4] = {
2554    { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
2555    { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
2556  };
2557
2558  UCharIterator iterU161, iterU162;
2559  UCharIterator iterU81, iterU82;
2560
2561  UCollationResult resU16, resU8;
2562
2563  uiter_setUTF16BE(&iterU161, utf16be[0], 4);
2564  uiter_setUTF16BE(&iterU162, utf16be[1], 4);
2565
2566  uiter_setUTF8(&iterU81, utf8[0], 4);
2567  uiter_setUTF8(&iterU82, utf8[1], 4);
2568
2569  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2570
2571  resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
2572  resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
2573
2574
2575  if(resU16 != resU8) {
2576    log_err("different results\n");
2577  }
2578
2579  ucol_close(coll);
2580}
2581#endif
2582
2583#define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
2584static void Alexis2(void) {
2585  UErrorCode status = U_ZERO_ERROR;
2586  UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2587  char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2588  char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2589  int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
2590
2591  UConverter *conv = NULL;
2592
2593  UCharIterator U16BEItS, U16BEItT;
2594  UCharIterator U8ItS, U8ItT;
2595
2596  UCollationResult resU16, resU16BE, resU8;
2597
2598  static const char* const pairs[][2] = {
2599    { "\\ud800\\u0021", "\\uFFFC\\u0062"},
2600    { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
2601    { "\\u0E40\\u0021", "\\u00A1\\u0021"},
2602    { "\\u0E40\\u0021", "\\uFE57\\u0062"},
2603    { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
2604    { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
2605    { "\\u0020", "\\u0020\\u0000"}
2606/*
26075F20 (my result here)
26085F204E008E3F
26095F20 (your result here)
2610*/
2611  };
2612
2613  int32_t i = 0;
2614
2615  UCollator *coll = ucol_open("", &status);
2616  if(status == U_FILE_ACCESS_ERROR) {
2617    log_data_err("Is your data around?\n");
2618    return;
2619  } else if(U_FAILURE(status)) {
2620    log_err("Error opening collator\n");
2621    return;
2622  }
2623  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2624  conv = ucnv_open("UTF16BE", &status);
2625  for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
2626    U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2627    U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2628
2629    resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
2630
2631    log_verbose("Result of strcoll is %i\n", resU16);
2632
2633    U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
2634    U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
2635    (void)U16BELenS;    /* Suppress set but not used warnings. */
2636    (void)U16BELenT;
2637
2638    /* use the original sizes, as the result from converter is in bytes */
2639    uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
2640    uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
2641
2642    resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
2643
2644    log_verbose("Result of U16BE is %i\n", resU16BE);
2645
2646    if(resU16 != resU16BE) {
2647      log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
2648    }
2649
2650    u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
2651    u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
2652
2653    uiter_setUTF8(&U8ItS, U8Source, U8LenS);
2654    uiter_setUTF8(&U8ItT, U8Target, U8LenT);
2655
2656    resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
2657
2658    if(resU16 != resU8) {
2659      log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
2660    }
2661
2662  }
2663
2664  ucol_close(coll);
2665  ucnv_close(conv);
2666}
2667
2668static void TestHebrewUCA(void) {
2669  UErrorCode status = U_ZERO_ERROR;
2670  static const char *first[] = {
2671    "d790d6b8d79cd795d6bcd7a9",
2672    "d790d79cd79ed7a7d799d799d7a1",
2673    "d790d6b4d79ed795d6bcd7a9",
2674  };
2675
2676  char utf8String[3][256];
2677  UChar utf16String[3][256];
2678
2679  int32_t i = 0, j = 0;
2680  int32_t sizeUTF8[3];
2681  int32_t sizeUTF16[3];
2682
2683  UCollator *coll = ucol_open("", &status);
2684  if (U_FAILURE(status)) {
2685      log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
2686      return;
2687  }
2688  /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
2689
2690  for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
2691    sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
2692    u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
2693    log_verbose("%i: ");
2694    for(j = 0; j < sizeUTF16[i]; j++) {
2695      /*log_verbose("\\u%04X", utf16String[i][j]);*/
2696      log_verbose("%04X", utf16String[i][j]);
2697    }
2698    log_verbose("\n");
2699  }
2700  for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
2701    for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
2702      doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
2703    }
2704  }
2705
2706  ucol_close(coll);
2707
2708}
2709
2710static void TestPartialSortKeyTermination(void) {
2711  static const char* cases[] = {
2712    "\\u1234\\u1234\\udc00",
2713    "\\udc00\\ud800\\ud800"
2714  };
2715
2716  int32_t i;
2717
2718  UErrorCode status = U_ZERO_ERROR;
2719
2720  UCollator *coll = ucol_open("", &status);
2721
2722  UCharIterator iter;
2723
2724  UChar currCase[256];
2725  int32_t length = 0;
2726  int32_t pKeyLen = 0;
2727
2728  uint8_t key[256];
2729
2730  for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
2731    uint32_t state[2] = {0, 0};
2732    length = u_unescape(cases[i], currCase, 256);
2733    uiter_setString(&iter, currCase, length);
2734    pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
2735    (void)pKeyLen;   /* Suppress set but not used warning. */
2736
2737    log_verbose("Done\n");
2738
2739  }
2740  ucol_close(coll);
2741}
2742
2743static void TestSettings(void) {
2744  static const char* cases[] = {
2745    "apple",
2746      "Apple"
2747  };
2748
2749  static const char* locales[] = {
2750    "",
2751      "en"
2752  };
2753
2754  UErrorCode status = U_ZERO_ERROR;
2755
2756  int32_t i = 0, j = 0;
2757
2758  UChar source[256], target[256];
2759  int32_t sLen = 0, tLen = 0;
2760
2761  UCollator *collateObject = NULL;
2762  for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
2763    collateObject = ucol_open(locales[i], &status);
2764    ucol_setStrength(collateObject, UCOL_PRIMARY);
2765    ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
2766    for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
2767      sLen = u_unescape(cases[j-1], source, 256);
2768      source[sLen] = 0;
2769      tLen = u_unescape(cases[j], target, 256);
2770      source[tLen] = 0;
2771      doTest(collateObject, source, target, UCOL_EQUAL);
2772    }
2773    ucol_close(collateObject);
2774  }
2775}
2776
2777static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
2778    UErrorCode status = U_ZERO_ERROR;
2779    int32_t errorNo = 0;
2780    const UChar *sourceRules = NULL;
2781    int32_t sourceRulesLen = 0;
2782    UParseError parseError;
2783    UColAttributeValue french = UCOL_OFF;
2784
2785    if(!ucol_equals(source, target)) {
2786        log_err("Same collators, different address not equal\n");
2787        errorNo++;
2788    }
2789    ucol_close(target);
2790    if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
2791        target = ucol_safeClone(source, NULL, NULL, &status);
2792        if(U_FAILURE(status)) {
2793            log_err("Error creating clone\n");
2794            errorNo++;
2795            return errorNo;
2796        }
2797        if(!ucol_equals(source, target)) {
2798            log_err("Collator different from it's clone\n");
2799            errorNo++;
2800        }
2801        french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
2802        if(french == UCOL_ON) {
2803            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
2804        } else {
2805            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
2806        }
2807        if(U_FAILURE(status)) {
2808            log_err("Error setting attributes\n");
2809            errorNo++;
2810            return errorNo;
2811        }
2812        if(ucol_equals(source, target)) {
2813            log_err("Collators same even when options changed\n");
2814            errorNo++;
2815        }
2816        ucol_close(target);
2817
2818        sourceRules = ucol_getRules(source, &sourceRulesLen);
2819        target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2820        if(U_FAILURE(status)) {
2821            log_err("Error instantiating target from rules - %s\n", u_errorName(status));
2822            errorNo++;
2823            return errorNo;
2824        }
2825        /* Note: The tailoring rule string is an optional data item. */
2826        if(!ucol_equals(source, target) && sourceRulesLen != 0) {
2827            log_err("Collator different from collator that was created from the same rules\n");
2828            errorNo++;
2829        }
2830        ucol_close(target);
2831    }
2832    return errorNo;
2833}
2834
2835
2836static void TestEquals(void) {
2837    /* ucol_equals is not currently a public API. There is a chance that it will become
2838    * something like this.
2839    */
2840    /* test whether the two collators instantiated from the same locale are equal */
2841    UErrorCode status = U_ZERO_ERROR;
2842    UParseError parseError;
2843    int32_t noOfLoc = uloc_countAvailable();
2844    const char *locName = NULL;
2845    UCollator *source = NULL, *target = NULL;
2846    int32_t i = 0;
2847
2848    const char* rules[] = {
2849        "&l < lj <<< Lj <<< LJ",
2850        "&n < nj <<< Nj <<< NJ",
2851        "&ae <<< \\u00e4",
2852        "&AE <<< \\u00c4"
2853    };
2854    /*
2855    const char* badRules[] = {
2856    "&l <<< Lj",
2857    "&n < nj <<< nJ <<< NJ",
2858    "&a <<< \\u00e4",
2859    "&AE <<< \\u00c4 <<< x"
2860    };
2861    */
2862
2863    UChar sourceRules[1024], targetRules[1024];
2864    int32_t sourceRulesSize = 0, targetRulesSize = 0;
2865    int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
2866
2867    for(i = 0; i < rulesSize; i++) {
2868        sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
2869        targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
2870    }
2871
2872    source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2873    if(status == U_FILE_ACCESS_ERROR) {
2874        log_data_err("Is your data around?\n");
2875        return;
2876    } else if(U_FAILURE(status)) {
2877        log_err("Error opening collator\n");
2878        return;
2879    }
2880    target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2881    if(!ucol_equals(source, target)) {
2882        log_err("Equivalent collators not equal!\n");
2883    }
2884    ucol_close(source);
2885    ucol_close(target);
2886
2887    source = ucol_open("root", &status);
2888    target = ucol_open("root", &status);
2889    log_verbose("Testing root\n");
2890    if(!ucol_equals(source, source)) {
2891        log_err("Same collator not equal\n");
2892    }
2893    if(TestEqualsForCollator("root", source, target)) {
2894        log_err("Errors for root\n");
2895    }
2896    ucol_close(source);
2897
2898    for(i = 0; i<noOfLoc; i++) {
2899        status = U_ZERO_ERROR;
2900        locName = uloc_getAvailable(i);
2901        /*if(hasCollationElements(locName)) {*/
2902        log_verbose("Testing equality for locale %s\n", locName);
2903        source = ucol_open(locName, &status);
2904        target = ucol_open(locName, &status);
2905        if (U_FAILURE(status)) {
2906            log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
2907            continue;
2908        }
2909        if(TestEqualsForCollator(locName, source, target)) {
2910            log_err("Errors for locale %s\n", locName);
2911        }
2912        ucol_close(source);
2913        /*}*/
2914    }
2915}
2916
2917static void TestJ2726(void) {
2918    UChar a[2] = { 0x61, 0x00 }; /*"a"*/
2919    UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
2920    UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
2921    UErrorCode status = U_ZERO_ERROR;
2922    UCollator *coll = ucol_open("en", &status);
2923    ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2924    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
2925    doTest(coll, a, aSpace, UCOL_EQUAL);
2926    doTest(coll, aSpace, a, UCOL_EQUAL);
2927    doTest(coll, a, spaceA, UCOL_EQUAL);
2928    doTest(coll, spaceA, a, UCOL_EQUAL);
2929    doTest(coll, spaceA, aSpace, UCOL_EQUAL);
2930    doTest(coll, aSpace, spaceA, UCOL_EQUAL);
2931    ucol_close(coll);
2932}
2933
2934static void NullRule(void) {
2935    UChar r[3] = {0};
2936    UErrorCode status = U_ZERO_ERROR;
2937    UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2938    if(U_SUCCESS(status)) {
2939        log_err("This should have been an error!\n");
2940        ucol_close(coll);
2941    } else {
2942        status = U_ZERO_ERROR;
2943    }
2944    coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2945    if(U_FAILURE(status)) {
2946        log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
2947    } else {
2948        ucol_close(coll);
2949    }
2950}
2951
2952/**
2953 * Test for CollationElementIterator previous and next for the whole set of
2954 * unicode characters with normalization on.
2955 */
2956static void TestNumericCollation(void)
2957{
2958    UErrorCode status = U_ZERO_ERROR;
2959
2960    const static char *basicTestStrings[]={
2961    "hello1",
2962    "hello2",
2963    "hello2002",
2964    "hello2003",
2965    "hello123456",
2966    "hello1234567",
2967    "hello10000000",
2968    "hello100000000",
2969    "hello1000000000",
2970    "hello10000000000",
2971    };
2972
2973    const static char *preZeroTestStrings[]={
2974    "avery10000",
2975    "avery010000",
2976    "avery0010000",
2977    "avery00010000",
2978    "avery000010000",
2979    "avery0000010000",
2980    "avery00000010000",
2981    "avery000000010000",
2982    };
2983
2984    const static char *thirtyTwoBitNumericStrings[]={
2985    "avery42949672960",
2986    "avery42949672961",
2987    "avery42949672962",
2988    "avery429496729610"
2989    };
2990
2991     const static char *longNumericStrings[]={
2992     /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
2993        In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
2994        are treated as multiple collation elements. */
2995    "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
2996    "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
2997    "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
2998    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
2999    "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
3000    "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
3001    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
3002    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
3003    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
3004    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
3005    };
3006
3007    const static char *supplementaryDigits[] = {
3008      "\\uD835\\uDFCE", /* 0 */
3009      "\\uD835\\uDFCF", /* 1 */
3010      "\\uD835\\uDFD0", /* 2 */
3011      "\\uD835\\uDFD1", /* 3 */
3012      "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
3013      "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
3014      "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
3015      "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
3016      "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
3017      "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
3018    };
3019
3020    const static char *foreignDigits[] = {
3021      "\\u0661",
3022        "\\u0662",
3023        "\\u0663",
3024      "\\u0661\\u0660",
3025      "\\u0661\\u0662",
3026      "\\u0661\\u0663",
3027      "\\u0662\\u0660",
3028      "\\u0662\\u0662",
3029      "\\u0662\\u0663",
3030      "\\u0663\\u0660",
3031      "\\u0663\\u0662",
3032      "\\u0663\\u0663"
3033    };
3034
3035    const static char *evenZeroes[] = {
3036      "2000",
3037      "2001",
3038        "2002",
3039        "2003"
3040    };
3041
3042    UColAttribute att = UCOL_NUMERIC_COLLATION;
3043    UColAttributeValue val = UCOL_ON;
3044
3045    /* Open our collator. */
3046    UCollator* coll = ucol_open("root", &status);
3047    if (U_FAILURE(status)){
3048        log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
3049              myErrorName(status));
3050        return;
3051    }
3052    genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
3053    genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
3054    genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
3055    genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
3056    genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
3057    genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
3058
3059    /* Setting up our collator to do digits. */
3060    ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
3061    if (U_FAILURE(status)){
3062        log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
3063              myErrorName(status));
3064        return;
3065    }
3066
3067    /*
3068       Testing that prepended zeroes still yield the correct collation behavior.
3069       We expect that every element in our strings array will be equal.
3070    */
3071    genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
3072
3073    ucol_close(coll);
3074}
3075
3076static void TestTibetanConformance(void)
3077{
3078    const char* test[] = {
3079        "\\u0FB2\\u0591\\u0F71\\u0061",
3080        "\\u0FB2\\u0F71\\u0061"
3081    };
3082
3083    UErrorCode status = U_ZERO_ERROR;
3084    UCollator *coll = ucol_open("", &status);
3085    UChar source[100];
3086    UChar target[100];
3087    int result;
3088    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3089    if (U_SUCCESS(status)) {
3090        u_unescape(test[0], source, 100);
3091        u_unescape(test[1], target, 100);
3092        doTest(coll, source, target, UCOL_EQUAL);
3093        result = ucol_strcoll(coll, source, -1,   target, -1);
3094        log_verbose("result %d\n", result);
3095        if (UCOL_EQUAL != result) {
3096            log_err("Tibetan comparison error\n");
3097        }
3098    }
3099    ucol_close(coll);
3100
3101    genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
3102}
3103
3104static void TestPinyinProblem(void) {
3105    static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
3106    genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
3107}
3108
3109/**
3110 * Iterate through the given iterator, checking to see that all the strings
3111 * in the expected array are present.
3112 * @param expected array of strings we expect to see, or NULL
3113 * @param expectedCount number of elements of expected, or 0
3114 */
3115static int32_t checkUEnumeration(const char* msg,
3116                                 UEnumeration* iter,
3117                                 const char** expected,
3118                                 int32_t expectedCount) {
3119    UErrorCode ec = U_ZERO_ERROR;
3120    int32_t i = 0, n, j, bit;
3121    int32_t seenMask = 0;
3122
3123    U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
3124    n = uenum_count(iter, &ec);
3125    if (!assertSuccess("count", &ec)) return -1;
3126    log_verbose("%s = [", msg);
3127    for (;; ++i) {
3128        const char* s = uenum_next(iter, NULL, &ec);
3129        if (!assertSuccess("snext", &ec) || s == NULL) break;
3130        if (i != 0) log_verbose(",");
3131        log_verbose("%s", s);
3132        /* check expected list */
3133        for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3134            if ((seenMask&bit) == 0 &&
3135                uprv_strcmp(s, expected[j]) == 0) {
3136                seenMask |= bit;
3137                break;
3138            }
3139        }
3140    }
3141    log_verbose("] (%d)\n", i);
3142    assertTrue("count verified", i==n);
3143    /* did we see all expected strings? */
3144    for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3145        if ((seenMask&bit)!=0) {
3146            log_verbose("Ok: \"%s\" seen\n", expected[j]);
3147        } else {
3148            log_err("FAIL: \"%s\" not seen\n", expected[j]);
3149        }
3150    }
3151    return n;
3152}
3153
3154/**
3155 * Test new API added for separate collation tree.
3156 */
3157static void TestSeparateTrees(void) {
3158    UErrorCode ec = U_ZERO_ERROR;
3159    UEnumeration *e = NULL;
3160    int32_t n = -1;
3161    UBool isAvailable;
3162    char loc[256];
3163
3164    static const char* AVAIL[] = { "en", "de" };
3165
3166    static const char* KW[] = { "collation" };
3167
3168    static const char* KWVAL[] = { "phonebook", "stroke" };
3169
3170#if !UCONFIG_NO_SERVICE
3171    e = ucol_openAvailableLocales(&ec);
3172    if (e != NULL) {
3173        assertSuccess("ucol_openAvailableLocales", &ec);
3174        assertTrue("ucol_openAvailableLocales!=0", e!=0);
3175        n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
3176        (void)n;    /* Suppress set but not used warnings. */
3177        /* Don't need to check n because we check list */
3178        uenum_close(e);
3179    } else {
3180        log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
3181    }
3182#endif
3183
3184    e = ucol_getKeywords(&ec);
3185    if (e != NULL) {
3186        assertSuccess("ucol_getKeywords", &ec);
3187        assertTrue("ucol_getKeywords!=0", e!=0);
3188        n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
3189        /* Don't need to check n because we check list */
3190        uenum_close(e);
3191    } else {
3192        log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
3193    }
3194
3195    e = ucol_getKeywordValues(KW[0], &ec);
3196    if (e != NULL) {
3197        assertSuccess("ucol_getKeywordValues", &ec);
3198        assertTrue("ucol_getKeywordValues!=0", e!=0);
3199        n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
3200        /* Don't need to check n because we check list */
3201        uenum_close(e);
3202    } else {
3203        log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
3204    }
3205
3206    /* Try setting a warning before calling ucol_getKeywordValues */
3207    ec = U_USING_FALLBACK_WARNING;
3208    e = ucol_getKeywordValues(KW[0], &ec);
3209    if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
3210        assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
3211        n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
3212        /* Don't need to check n because we check list */
3213        uenum_close(e);
3214    }
3215
3216    /*
3217U_DRAFT int32_t U_EXPORT2
3218ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
3219                             const char* locale, UBool* isAvailable,
3220                             UErrorCode* status);
3221}
3222*/
3223    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
3224                                     &isAvailable, &ec);
3225    if (assertSuccess("getFunctionalEquivalent", &ec)) {
3226        assertEquals("getFunctionalEquivalent(de)", "root", loc);
3227        assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
3228                   isAvailable == TRUE);
3229    }
3230
3231    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
3232                                     &isAvailable, &ec);
3233    if (assertSuccess("getFunctionalEquivalent", &ec)) {
3234        assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
3235        assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
3236                   isAvailable == FALSE);
3237    }
3238}
3239
3240/* supercedes TestJ784 */
3241static void TestBeforePinyin(void) {
3242    const static char rules[] = {
3243        "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
3244        "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
3245        "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
3246        "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
3247        "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
3248        "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
3249    };
3250
3251    const static char *test[] = {
3252        "l\\u0101",
3253        "la",
3254        "l\\u0101n",
3255        "lan ",
3256        "l\\u0113",
3257        "le",
3258        "l\\u0113n",
3259        "len"
3260    };
3261
3262    const static char *test2[] = {
3263        "x\\u0101",
3264        "x\\u0100",
3265        "X\\u0101",
3266        "X\\u0100",
3267        "x\\u00E1",
3268        "x\\u00C1",
3269        "X\\u00E1",
3270        "X\\u00C1",
3271        "x\\u01CE",
3272        "x\\u01CD",
3273        "X\\u01CE",
3274        "X\\u01CD",
3275        "x\\u00E0",
3276        "x\\u00C0",
3277        "X\\u00E0",
3278        "X\\u00C0",
3279        "xa",
3280        "xA",
3281        "Xa",
3282        "XA",
3283        "x\\u0101x",
3284        "x\\u0100x",
3285        "x\\u00E1x",
3286        "x\\u00C1x",
3287        "x\\u01CEx",
3288        "x\\u01CDx",
3289        "x\\u00E0x",
3290        "x\\u00C0x",
3291        "xax",
3292        "xAx"
3293    };
3294
3295    genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
3296    genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
3297    genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
3298    genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
3299}
3300
3301static void TestBeforeTightening(void) {
3302    static const struct {
3303        const char *rules;
3304        UErrorCode expectedStatus;
3305    } tests[] = {
3306        { "&[before 1]a<x", U_ZERO_ERROR },
3307        { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
3308        { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
3309        { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
3310        { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
3311        { "&[before 2]a<<x",U_ZERO_ERROR },
3312        { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
3313        { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
3314        { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
3315        { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
3316        { "&[before 3]a<<<x",U_ZERO_ERROR },
3317        { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
3318        { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
3319    };
3320
3321    int32_t i = 0;
3322
3323    UErrorCode status = U_ZERO_ERROR;
3324    UChar rlz[RULE_BUFFER_LEN] = { 0 };
3325    uint32_t rlen = 0;
3326
3327    UCollator *coll = NULL;
3328
3329
3330    for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
3331        rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
3332        coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3333        if(status != tests[i].expectedStatus) {
3334            log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
3335                tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
3336        }
3337        ucol_close(coll);
3338        status = U_ZERO_ERROR;
3339    }
3340
3341}
3342
3343/*
3344&m < a
3345&[before 1] a < x <<< X << q <<< Q < z
3346assert: m <<< M < x <<< X << q <<< Q < z < a < n
3347
3348&m < a
3349&[before 2] a << x <<< X << q <<< Q < z
3350assert: m <<< M < x <<< X << q <<< Q << a < z < n
3351
3352&m < a
3353&[before 3] a <<< x <<< X << q <<< Q < z
3354assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
3355
3356
3357&m << a
3358&[before 1] a < x <<< X << q <<< Q < z
3359assert: x <<< X << q <<< Q < z < m <<< M << a < n
3360
3361&m << a
3362&[before 2] a << x <<< X << q <<< Q < z
3363assert: m <<< M << x <<< X << q <<< Q << a < z < n
3364
3365&m << a
3366&[before 3] a <<< x <<< X << q <<< Q < z
3367assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
3368
3369
3370&m <<< a
3371&[before 1] a < x <<< X << q <<< Q < z
3372assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
3373
3374&m <<< a
3375&[before 2] a << x <<< X << q <<< Q < z
3376assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
3377
3378&m <<< a
3379&[before 3] a <<< x <<< X << q <<< Q < z
3380assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
3381
3382
3383&[before 1] s < x <<< X << q <<< Q < z
3384assert: r <<< R < x <<< X << q <<< Q < z < s < n
3385
3386&[before 2] s << x <<< X << q <<< Q < z
3387assert: r <<< R < x <<< X << q <<< Q << s < z < n
3388
3389&[before 3] s <<< x <<< X << q <<< Q < z
3390assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
3391
3392
3393&[before 1] \u24DC < x <<< X << q <<< Q < z
3394assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
3395
3396&[before 2] \u24DC << x <<< X << q <<< Q < z
3397assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
3398
3399&[before 3] \u24DC <<< x <<< X << q <<< Q < z
3400assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
3401*/
3402
3403
3404#if 0
3405/* requires features not yet supported */
3406static void TestMoreBefore(void) {
3407    static const struct {
3408        const char* rules;
3409        const char* order[16];
3410        int32_t size;
3411    } tests[] = {
3412        { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
3413        { "m","M","x","X","q","Q","z","a","n" }, 9},
3414        { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
3415        { "m","M","x","X","q","Q","a","z","n" }, 9},
3416        { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
3417        { "m","M","x","X","a","q","Q","z","n" }, 9},
3418        { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
3419        { "x","X","q","Q","z","m","M","a","n" }, 9},
3420        { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
3421        { "m","M","x","X","q","Q","a","z","n" }, 9},
3422        { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
3423        { "m","M","x","X","a","q","Q","z","n" }, 9},
3424        { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
3425        { "x","X","q","Q","z","n","m","a","M" }, 9},
3426        { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
3427        { "x","X","q","Q","m","a","M","z","n" }, 9},
3428        { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
3429        { "m","x","X","a","M","q","Q","z","n" }, 9},
3430        { "&[before 1] s < x <<< X << q <<< Q < z",
3431        { "r","R","x","X","q","Q","z","s","n" }, 9},
3432        { "&[before 2] s << x <<< X << q <<< Q < z",
3433        { "r","R","x","X","q","Q","s","z","n" }, 9},
3434        { "&[before 3] s <<< x <<< X << q <<< Q < z",
3435        { "r","R","x","X","s","q","Q","z","n" }, 9},
3436        { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
3437        { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
3438        { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
3439        { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
3440        { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
3441        { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
3442    };
3443
3444    int32_t i = 0;
3445
3446    for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
3447        genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
3448    }
3449}
3450#endif
3451
3452static void TestTailorNULL( void ) {
3453    const static char* rule = "&a <<< '\\u0000'";
3454    UErrorCode status = U_ZERO_ERROR;
3455    UChar rlz[RULE_BUFFER_LEN] = { 0 };
3456    uint32_t rlen = 0;
3457    UChar a = 1, null = 0;
3458    UCollationResult res = UCOL_EQUAL;
3459
3460    UCollator *coll = NULL;
3461
3462
3463    rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
3464    coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3465
3466    if(U_FAILURE(status)) {
3467        log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
3468    } else {
3469        res = ucol_strcoll(coll, &a, 1, &null, 1);
3470
3471        if(res != UCOL_LESS) {
3472            log_err("NULL was not tailored properly!\n");
3473        }
3474    }
3475
3476    ucol_close(coll);
3477}
3478
3479static void
3480TestUpperFirstQuaternary(void)
3481{
3482  const char* tests[] = { "B", "b", "Bb", "bB" };
3483  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
3484  UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
3485  genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
3486}
3487
3488static void
3489TestJ4960(void)
3490{
3491  const char* tests[] = { "\\u00e2T", "aT" };
3492  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
3493  UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
3494  const char* tests2[] = { "a", "A" };
3495  const char* rule = "&[first tertiary ignorable]=A=a";
3496  UColAttribute att2[] = { UCOL_CASE_LEVEL };
3497  UColAttributeValue attVals2[] = { UCOL_ON };
3498  /* Test whether we correctly ignore primary ignorables on case level when */
3499  /* we have only primary & case level */
3500  genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
3501  /* Test whether ICU4J will make case level for sortkeys that have primary strength */
3502  /* and case level */
3503  genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
3504  /* Test whether completely ignorable letters have case level info (they shouldn't) */
3505  genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
3506}
3507
3508static void
3509TestJ5223(void)
3510{
3511  static const char *test = "this is a test string";
3512  UChar ustr[256];
3513  int32_t ustr_length = u_unescape(test, ustr, 256);
3514  unsigned char sortkey[256];
3515  int32_t sortkey_length;
3516  UErrorCode status = U_ZERO_ERROR;
3517  static UCollator *coll = NULL;
3518  coll = ucol_open("root", &status);
3519  if(U_FAILURE(status)) {
3520    log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
3521    return;
3522  }
3523  ucol_setStrength(coll, UCOL_PRIMARY);
3524  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
3525  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3526  if (U_FAILURE(status)) {
3527    log_err("Failed setting atributes\n");
3528    return;
3529  }
3530  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
3531  if (sortkey_length > 256) return;
3532
3533  /* we mark the position where the null byte should be written in advance */
3534  sortkey[sortkey_length-1] = 0xAA;
3535
3536  /* we set the buffer size one byte higher than needed */
3537  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3538    sortkey_length+1);
3539
3540  /* no error occurs (for me) */
3541  if (sortkey[sortkey_length-1] == 0xAA) {
3542    log_err("Hit bug at first try\n");
3543  }
3544
3545  /* we mark the position where the null byte should be written again */
3546  sortkey[sortkey_length-1] = 0xAA;
3547
3548  /* this time we set the buffer size to the exact amount needed */
3549  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3550    sortkey_length);
3551
3552  /* now the trailing null byte is not written */
3553  if (sortkey[sortkey_length-1] == 0xAA) {
3554    log_err("Hit bug at second try\n");
3555  }
3556
3557  ucol_close(coll);
3558}
3559
3560/* Regression test for Thai partial sort key problem */
3561static void
3562TestJ5232(void)
3563{
3564    const static char *test[] = {
3565        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
3566        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
3567    };
3568
3569    genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
3570}
3571
3572static void
3573TestJ5367(void)
3574{
3575    const static char *test[] = { "a", "y" };
3576    const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
3577    genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
3578}
3579
3580static void
3581TestVI5913(void)
3582{
3583    UErrorCode status = U_ZERO_ERROR;
3584    int32_t i, j;
3585    UCollator *coll =NULL;
3586    uint8_t  resColl[100], expColl[100];
3587    int32_t  rLen, tLen, ruleLen, sLen, kLen;
3588    UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &b<0x1FF3-omega with Ypogegrammeni*/
3589    UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
3590    /*
3591     * Note: Just tailoring &z<ae^ does not work as expected:
3592     * The UCA spec requires for discontiguous contractions that they
3593     * extend an *existing match* by one combining mark at a time.
3594     * Therefore, ae must be a contraction so that the builder finds
3595     * discontiguous contractions for ae^, for example with an intervening underdot.
3596     * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
3597     */
3598    UChar rule3[256]={
3599        0x26, 0x78, 0x3c, 0x61, 0x65,      /* &x<ae */
3600        0x26, 0x7a, 0x3c, 0x0061, 0x00ea,  /* &z<a+e with circumflex.*/
3601        0};
3602    static const UChar tData[][20]={
3603        {0x1EAC, 0},
3604        {0x0041, 0x0323, 0x0302, 0},
3605        {0x1EA0, 0x0302, 0},
3606        {0x00C2, 0x0323, 0},
3607        {0x1ED8, 0},  /* O with dot and circumflex */
3608        {0x1ECC, 0x0302, 0},
3609        {0x1EB7, 0},
3610        {0x1EA1, 0x0306, 0},
3611    };
3612    static const UChar tailorData[][20]={
3613        {0x1FA2, 0},  /* Omega with 3 combining marks */
3614        {0x03C9, 0x0313, 0x0300, 0x0345, 0},
3615        {0x1FF3, 0x0313, 0x0300, 0},
3616        {0x1F60, 0x0300, 0x0345, 0},
3617        {0x1F62, 0x0345, 0},
3618        {0x1FA0, 0x0300, 0},
3619    };
3620    static const UChar tailorData2[][20]={
3621        {0x1E63, 0x030C, 0},  /* s with dot below + caron */
3622        {0x0073, 0x0323, 0x030C, 0},
3623        {0x0073, 0x030C, 0x0323, 0},
3624    };
3625    static const UChar tailorData3[][20]={
3626        {0x007a, 0},  /*  z */
3627        {0x0061, 0x0065, 0},  /*  a + e */
3628        {0x0061, 0x00ea, 0}, /* a + e with circumflex */
3629        {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
3630        {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
3631        {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
3632        {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
3633        {0x00EA, 0},  /* e with circumflex  */
3634    };
3635
3636    /* Test Vietnamese sort. */
3637    coll = ucol_open("vi", &status);
3638    if(U_FAILURE(status)) {
3639        log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
3640        return;
3641    }
3642    log_verbose("\n\nVI collation:");
3643    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
3644        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3645    }
3646    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
3647        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3648    }
3649    if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
3650        log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
3651    }
3652    if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
3653        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3654    }
3655
3656    for (j=0; j<8; j++) {
3657        tLen = u_strlen(tData[j]);
3658        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3659        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3660        for(i = 0; i<rLen; i++) {
3661            log_verbose(" %02X", resColl[i]);
3662        }
3663    }
3664
3665    ucol_close(coll);
3666
3667    /* Test Romanian sort. */
3668    coll = ucol_open("ro", &status);
3669    log_verbose("\n\nRO collation:");
3670    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
3671        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3672    }
3673    if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
3674        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3675    }
3676    if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
3677        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3678    }
3679
3680    for (j=4; j<8; j++) {
3681        tLen = u_strlen(tData[j]);
3682        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3683        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3684        for(i = 0; i<rLen; i++) {
3685            log_verbose(" %02X", resColl[i]);
3686        }
3687    }
3688    ucol_close(coll);
3689
3690    /* Test the precomposed Greek character with 3 combining marks. */
3691    log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
3692    ruleLen = u_strlen(rule);
3693    coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3694    if (U_FAILURE(status)) {
3695        log_err("ucol_openRules failed with %s\n", u_errorName(status));
3696        return;
3697    }
3698    sLen = u_strlen(tailorData[0]);
3699    for (j=1; j<6; j++) {
3700        tLen = u_strlen(tailorData[j]);
3701        if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
3702            log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
3703        }
3704    }
3705    /* Test getSortKey. */
3706    tLen = u_strlen(tailorData[0]);
3707    kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
3708    for (j=0; j<6; j++) {
3709        tLen = u_strlen(tailorData[j]);
3710        rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
3711        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3712            log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3713            for(i = 0; i<rLen; i++) {
3714                log_err(" %02X", resColl[i]);
3715            }
3716        }
3717    }
3718    ucol_close(coll);
3719
3720    log_verbose("\n\nTailoring test for s with caron:");
3721    ruleLen = u_strlen(rule2);
3722    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3723    tLen = u_strlen(tailorData2[0]);
3724    kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
3725    for (j=1; j<3; j++) {
3726        tLen = u_strlen(tailorData2[j]);
3727        rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
3728        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3729            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3730            for(i = 0; i<rLen; i++) {
3731                log_err(" %02X", resColl[i]);
3732            }
3733        }
3734    }
3735    ucol_close(coll);
3736
3737    log_verbose("\n\nTailoring test for &z< ae with circumflex:");
3738    ruleLen = u_strlen(rule3);
3739    coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3740    tLen = u_strlen(tailorData3[3]);
3741    kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
3742    log_verbose("\n Test Data[3] :%s  \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
3743    for(i = 0; i<kLen; i++) {
3744        log_verbose(" %02X", expColl[i]);
3745    }
3746    for (j=4; j<6; j++) {
3747        tLen = u_strlen(tailorData3[j]);
3748        rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
3749
3750        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3751            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3752            for(i = 0; i<rLen; i++) {
3753                log_err(" %02X", resColl[i]);
3754            }
3755        }
3756
3757        log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3758         for(i = 0; i<rLen; i++) {
3759             log_verbose(" %02X", resColl[i]);
3760         }
3761    }
3762    ucol_close(coll);
3763}
3764
3765static void
3766TestTailor6179(void)
3767{
3768    UErrorCode status = U_ZERO_ERROR;
3769    int32_t i;
3770    UCollator *coll =NULL;
3771    uint8_t  resColl[100];
3772    int32_t  rLen, tLen, ruleLen;
3773    /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
3774    static const UChar rule1[]={
3775            0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
3776            0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
3777            0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
3778            0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
3779    /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
3780    static const UChar rule2[]={
3781            0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
3782            0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
3783            0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
3784            0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
3785            0x3C,0x3C,0x20,0x62,0};
3786
3787    static const UChar tData1[][4]={
3788        {0x61, 0},
3789        {0x62, 0},
3790        { 0xFDD0,0x009E, 0}
3791    };
3792    static const UChar tData2[][4]={
3793        {0x61, 0},
3794        {0x62, 0},
3795        { 0xFDD0,0x009E, 0}
3796     };
3797
3798    /*
3799     * These values from FractionalUCA.txt will change,
3800     * and need to be updated here.
3801     * TODO: Make this not check for particular sort keys.
3802     * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
3803     */
3804    static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
3805    static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
3806    static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
3807    static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
3808
3809    UParseError parseError;
3810
3811    /* Test [Last Primary ignorable] */
3812
3813    log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
3814    ruleLen = u_strlen(rule1);
3815    coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3816    if (U_FAILURE(status)) {
3817        log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
3818        return;
3819    }
3820    tLen = u_strlen(tData1[0]);
3821    rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
3822    if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
3823        log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
3824        for(i = 0; i<rLen; i++) {
3825            log_err(" %02X", resColl[i]);
3826        }
3827        log_err("\n");
3828    }
3829    tLen = u_strlen(tData1[1]);
3830    rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
3831    if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
3832        log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
3833        for(i = 0; i<rLen; i++) {
3834            log_err(" %02X", resColl[i]);
3835        }
3836        log_err("\n");
3837    }
3838    ucol_close(coll);
3839
3840
3841    /* Test [Last Secondary ignorable] */
3842    log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
3843    ruleLen = u_strlen(rule2);
3844    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
3845    if (U_FAILURE(status)) {
3846        log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
3847        log_info("  offset=%d  \"%s\" | \"%s\"\n",
3848                 parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
3849        return;
3850    }
3851    tLen = u_strlen(tData2[0]);
3852    rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
3853    if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
3854        log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
3855        for(i = 0; i<rLen; i++) {
3856            log_err(" %02X", resColl[i]);
3857        }
3858        log_err("\n");
3859    }
3860    tLen = u_strlen(tData2[1]);
3861    rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
3862    if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
3863      log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
3864      for(i = 0; i<rLen; i++) {
3865        log_err(" %02X", resColl[i]);
3866      }
3867      log_err("\n");
3868    }
3869    ucol_close(coll);
3870}
3871
3872static void
3873TestUCAPrecontext(void)
3874{
3875    UErrorCode status = U_ZERO_ERROR;
3876    int32_t i, j;
3877    UCollator *coll =NULL;
3878    uint8_t  resColl[100], prevColl[100];
3879    int32_t  rLen, tLen, ruleLen;
3880    UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
3881    UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
3882    /* & l middle-dot << a  a is an expansion. */
3883
3884    UChar tData1[][20]={
3885            { 0xb7, 0},  /* standalone middle dot(0xb7) */
3886            { 0x387, 0}, /* standalone middle dot(0x387) */
3887            { 0x61, 0},  /* a */
3888            { 0x6C, 0},  /* l */
3889            { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
3890            { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
3891            { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
3892            { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
3893            { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
3894            { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
3895            { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
3896     };
3897
3898    log_verbose("\n\nEN collation:");
3899    coll = ucol_open("en", &status);
3900    if (U_FAILURE(status)) {
3901        log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
3902        return;
3903    }
3904    for (j=0; j<11; j++) {
3905        tLen = u_strlen(tData1[j]);
3906        rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3907        if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3908            log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3909                    j, tData1[j]);
3910        }
3911        log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3912        for(i = 0; i<rLen; i++) {
3913            log_verbose(" %02X", resColl[i]);
3914        }
3915        uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3916     }
3917     ucol_close(coll);
3918
3919
3920     log_verbose("\n\nJA collation:");
3921     coll = ucol_open("ja", &status);
3922     if (U_FAILURE(status)) {
3923         log_err("Tailoring test: &z <<a|- failed!");
3924         return;
3925     }
3926     for (j=0; j<11; j++) {
3927         tLen = u_strlen(tData1[j]);
3928         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3929         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3930             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3931                     j, tData1[j]);
3932         }
3933         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3934         for(i = 0; i<rLen; i++) {
3935             log_verbose(" %02X", resColl[i]);
3936         }
3937         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3938      }
3939      ucol_close(coll);
3940
3941
3942      log_verbose("\n\nTailoring test: & middle dot < a ");
3943      ruleLen = u_strlen(rule1);
3944      coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3945      if (U_FAILURE(status)) {
3946          log_err("Tailoring test: & middle dot < a failed!");
3947          return;
3948      }
3949      for (j=0; j<11; j++) {
3950          tLen = u_strlen(tData1[j]);
3951          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3952          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3953              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3954                      j, tData1[j]);
3955          }
3956          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3957          for(i = 0; i<rLen; i++) {
3958              log_verbose(" %02X", resColl[i]);
3959          }
3960          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3961       }
3962       ucol_close(coll);
3963
3964
3965       log_verbose("\n\nTailoring test: & l middle-dot << a ");
3966       ruleLen = u_strlen(rule2);
3967       coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3968       if (U_FAILURE(status)) {
3969           log_err("Tailoring test: & l middle-dot << a failed!");
3970           return;
3971       }
3972       for (j=0; j<11; j++) {
3973           tLen = u_strlen(tData1[j]);
3974           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3975           if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3976               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3977                       j, tData1[j]);
3978           }
3979           if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
3980               log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
3981                       j, tData1[j]);
3982           }
3983           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3984           for(i = 0; i<rLen; i++) {
3985               log_verbose(" %02X", resColl[i]);
3986           }
3987           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3988        }
3989        ucol_close(coll);
3990}
3991
3992static void
3993TestOutOfBuffer5468(void)
3994{
3995    static const char *test = "\\u4e00";
3996    UChar ustr[256];
3997    int32_t ustr_length = u_unescape(test, ustr, 256);
3998    unsigned char shortKeyBuf[1];
3999    int32_t sortkey_length;
4000    UErrorCode status = U_ZERO_ERROR;
4001    static UCollator *coll = NULL;
4002
4003    coll = ucol_open("root", &status);
4004    if(U_FAILURE(status)) {
4005      log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4006      return;
4007    }
4008    ucol_setStrength(coll, UCOL_PRIMARY);
4009    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4010    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4011    if (U_FAILURE(status)) {
4012      log_err("Failed setting atributes\n");
4013      return;
4014    }
4015
4016    sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
4017    if (sortkey_length != 4) {
4018        log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
4019    }
4020    log_verbose("length of sortKey is %d", sortkey_length);
4021    ucol_close(coll);
4022}
4023
4024#define TSKC_DATA_SIZE 5
4025#define TSKC_BUF_SIZE  50
4026static void
4027TestSortKeyConsistency(void)
4028{
4029    UErrorCode icuRC = U_ZERO_ERROR;
4030    UCollator* ucol;
4031    UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4032
4033    uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4034    uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4035    int32_t i, j, i2;
4036
4037    ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
4038    if (U_FAILURE(icuRC))
4039    {
4040        log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
4041        return;
4042    }
4043
4044    for (i = 0; i < TSKC_DATA_SIZE; i++)
4045    {
4046        UCharIterator uiter;
4047        uint32_t state[2] = { 0, 0 };
4048        int32_t dataLen = i+1;
4049        for (j=0; j<TSKC_BUF_SIZE; j++)
4050            bufFull[i][j] = bufPart[i][j] = 0;
4051
4052        /* Full sort key */
4053        ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
4054
4055        /* Partial sort key */
4056        uiter_setString(&uiter, data, dataLen);
4057        ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
4058        if (U_FAILURE(icuRC))
4059        {
4060            log_err("ucol_nextSortKeyPart failed\n");
4061            ucol_close(ucol);
4062            return;
4063        }
4064
4065        for (i2=0; i2<i; i2++)
4066        {
4067            UBool fullMatch = TRUE;
4068            UBool partMatch = TRUE;
4069            for (j=0; j<TSKC_BUF_SIZE; j++)
4070            {
4071                fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
4072                partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
4073            }
4074            if (fullMatch != partMatch) {
4075                log_err(fullMatch ? "full key was consistent, but partial key changed\n"
4076                                  : "partial key was consistent, but full key changed\n");
4077                ucol_close(ucol);
4078                return;
4079            }
4080        }
4081    }
4082
4083    /*=============================================*/
4084   ucol_close(ucol);
4085}
4086
4087/* ticket: 6101 */
4088static void TestCroatianSortKey(void) {
4089    const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
4090    UErrorCode status = U_ZERO_ERROR;
4091    UCollator *ucol;
4092    UCharIterator iter;
4093
4094    static const UChar text[] = { 0x0044, 0xD81A };
4095
4096    size_t length = sizeof(text)/sizeof(*text);
4097
4098    uint8_t textSortKey[32];
4099    size_t lenSortKey = 32;
4100    size_t actualSortKeyLen;
4101    uint32_t uStateInfo[2] = { 0, 0 };
4102
4103    ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
4104    if (U_FAILURE(status)) {
4105        log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
4106        return;
4107    }
4108
4109    uiter_setString(&iter, text, length);
4110
4111    actualSortKeyLen = ucol_nextSortKeyPart(
4112        ucol, &iter, (uint32_t*)uStateInfo,
4113        textSortKey, lenSortKey, &status
4114        );
4115
4116    if (actualSortKeyLen == lenSortKey) {
4117        log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
4118    }
4119
4120    ucol_close(ucol);
4121}
4122
4123/* ticket: 6140 */
4124/* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
4125 * they are both Hiragana and Katakana
4126 */
4127#define SORTKEYLEN 50
4128static void TestHiragana(void) {
4129    UErrorCode status = U_ZERO_ERROR;
4130    UCollator* ucol;
4131    UCollationResult strcollresult;
4132    UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
4133    UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
4134    int32_t data1Len = sizeof(data1)/sizeof(*data1);
4135    int32_t data2Len = sizeof(data2)/sizeof(*data2);
4136    int32_t i, j;
4137    uint8_t sortKey1[SORTKEYLEN];
4138    uint8_t sortKey2[SORTKEYLEN];
4139
4140    UCharIterator uiter1;
4141    UCharIterator uiter2;
4142    uint32_t state1[2] = { 0, 0 };
4143    uint32_t state2[2] = { 0, 0 };
4144    int32_t keySize1;
4145    int32_t keySize2;
4146
4147    ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
4148            &status);
4149    if (U_FAILURE(status)) {
4150        log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
4151        return;
4152    }
4153
4154    /* Start of full sort keys */
4155    /* Full sort key1 */
4156    keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
4157    /* Full sort key2 */
4158    keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
4159    if (keySize1 == keySize2) {
4160        for (i = 0; i < keySize1; i++) {
4161            if (sortKey1[i] != sortKey2[i]) {
4162                log_err("Full sort keys are different. Should be equal.");
4163            }
4164        }
4165    } else {
4166        log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
4167    }
4168    /* End of full sort keys */
4169
4170    /* Start of partial sort keys */
4171    /* Partial sort key1 */
4172    uiter_setString(&uiter1, data1, data1Len);
4173    keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
4174    /* Partial sort key2 */
4175    uiter_setString(&uiter2, data2, data2Len);
4176    keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
4177    if (U_SUCCESS(status) && keySize1 == keySize2) {
4178        for (j = 0; j < keySize1; j++) {
4179            if (sortKey1[j] != sortKey2[j]) {
4180                log_err("Partial sort keys are different. Should be equal");
4181            }
4182        }
4183    } else {
4184        log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
4185    }
4186    /* End of partial sort keys */
4187
4188    /* Start of strcoll */
4189    /* Use ucol_strcoll() to determine ordering */
4190    strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
4191    if (strcollresult != UCOL_EQUAL) {
4192        log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
4193    }
4194
4195    ucol_close(ucol);
4196}
4197
4198/* Convenient struct for running collation tests */
4199typedef struct {
4200  const UChar source[MAX_TOKEN_LEN];  /* String on left */
4201  const UChar target[MAX_TOKEN_LEN];  /* String on right */
4202  UCollationResult result;            /* -1, 0 or +1, depending on collation */
4203} OneTestCase;
4204
4205/*
4206 * Utility function to test one collation test case.
4207 * @param testcases Array of test cases.
4208 * @param n_testcases Size of the array testcases.
4209 * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
4210 * @param n_rules Size of the array str_rules.
4211 */
4212static void doTestOneTestCase(const OneTestCase testcases[],
4213                              int n_testcases,
4214                              const char* str_rules[],
4215                              int n_rules)
4216{
4217  int rule_no, testcase_no;
4218  UChar rule[500];
4219  int32_t length = 0;
4220  UErrorCode status = U_ZERO_ERROR;
4221  UParseError parse_error;
4222  UCollator  *myCollation;
4223
4224  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4225
4226    length = u_unescape(str_rules[rule_no], rule, 500);
4227    if (length == 0) {
4228        log_err("ERROR: The rule cannot be unescaped: %s\n");
4229        return;
4230    }
4231    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4232    if(U_FAILURE(status)){
4233        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4234        log_info("  offset=%d  \"%s\" | \"%s\"\n",
4235                 parse_error.offset,
4236                 aescstrdup(parse_error.preContext, -1),
4237                 aescstrdup(parse_error.postContext, -1));
4238        return;
4239    }
4240    log_verbose("Testing the <<* syntax\n");
4241    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4242    ucol_setStrength(myCollation, UCOL_TERTIARY);
4243    for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
4244      doTest(myCollation,
4245             testcases[testcase_no].source,
4246             testcases[testcase_no].target,
4247             testcases[testcase_no].result
4248             );
4249    }
4250    ucol_close(myCollation);
4251  }
4252}
4253
4254const static OneTestCase rangeTestcases[] = {
4255  { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
4256  { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
4257  { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
4258
4259  { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
4260  { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
4261  { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
4262  { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
4263  { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
4264
4265  { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
4266  { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
4267  { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
4268  { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
4269
4270  { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
4271  { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
4272  { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
4273  { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
4274  { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
4275  { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
4276  { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
4277  { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
4278};
4279
4280static int nRangeTestcases = LEN(rangeTestcases);
4281
4282const static OneTestCase rangeTestcasesSupplemental[] = {
4283  { {0x4e00},                            {0xfffb},                          UCOL_LESS }, /* U+4E00 < U+FFFB */
4284  { {0xfffb},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFB < U+10000 */
4285  { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
4286  { {0x4e00},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4287  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4288  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4289  { {0x4e00},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4290};
4291
4292static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
4293
4294const static OneTestCase rangeTestcasesQwerty[] = {
4295  { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
4296  { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
4297
4298  { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
4299  { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
4300
4301  { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
4302  { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
4303
4304  { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
4305  { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
4306
4307  { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
4308    {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
4309  { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
4310    {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
4311};
4312
4313static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
4314
4315static void TestSameStrengthList(void)
4316{
4317  const char* strRules[] = {
4318    /* Normal */
4319    "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
4320
4321    /* Lists */
4322    "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
4323  };
4324  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
4325}
4326
4327static void TestSameStrengthListQuoted(void)
4328{
4329  const char* strRules[] = {
4330    /* Lists with quoted characters */
4331    "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
4332    "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
4333
4334    "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
4335    "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
4336
4337    "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
4338    "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
4339  };
4340  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
4341}
4342
4343static void TestSameStrengthListSupplemental(void)
4344{
4345  const char* strRules[] = {
4346    "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
4347    "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
4348    "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
4349    "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
4350  };
4351  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
4352}
4353
4354static void TestSameStrengthListQwerty(void)
4355{
4356  const char* strRules[] = {
4357    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4358    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4359    "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
4360    "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
4361    "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
4362
4363    /* Quoted characters also will work if two quoted characters are not consecutive.  */
4364    "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
4365
4366    /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
4367    /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
4368
4369 };
4370  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
4371}
4372
4373static void TestSameStrengthListQuotedQwerty(void)
4374{
4375  const char* strRules[] = {
4376    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4377    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4378    "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
4379
4380    /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
4381    /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
4382   };
4383  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
4384}
4385
4386static void TestSameStrengthListRanges(void)
4387{
4388  const char* strRules[] = {
4389    "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
4390  };
4391  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
4392}
4393
4394static void TestSameStrengthListSupplementalRanges(void)
4395{
4396  const char* strRules[] = {
4397    /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4398    "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
4399  };
4400  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
4401}
4402
4403static void TestSpecialCharacters(void)
4404{
4405  const char* strRules[] = {
4406    /* Normal */
4407    "&';'<'+'<','<'-'<'&'<'*'",
4408
4409    /* List */
4410    "&';'<*'+,-&*'",
4411
4412    /* Range */
4413    "&';'<*'+'-'-&*'",
4414  };
4415
4416  const static OneTestCase specialCharacterStrings[] = {
4417    { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
4418    { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
4419    { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
4420    { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
4421  };
4422  doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
4423}
4424
4425static void TestPrivateUseCharacters(void)
4426{
4427  const char* strRules[] = {
4428    /* Normal */
4429    "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
4430    "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
4431  };
4432
4433  const static OneTestCase privateUseCharacterStrings[] = {
4434    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4435    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4436    { {0xe2d9}, {0xe2da}, UCOL_LESS },
4437    { {0xe2da}, {0xe2db}, UCOL_LESS },
4438    { {0xe2db}, {0xe2dc}, UCOL_LESS },
4439    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4440  };
4441  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
4442}
4443
4444static void TestPrivateUseCharactersInList(void)
4445{
4446  const char* strRules[] = {
4447    /* List */
4448    "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
4449    /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
4450    "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
4451  };
4452
4453  const static OneTestCase privateUseCharacterStrings[] = {
4454    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4455    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4456    { {0xe2d9}, {0xe2da}, UCOL_LESS },
4457    { {0xe2da}, {0xe2db}, UCOL_LESS },
4458    { {0xe2db}, {0xe2dc}, UCOL_LESS },
4459    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4460  };
4461  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
4462}
4463
4464static void TestPrivateUseCharactersInRange(void)
4465{
4466  const char* strRules[] = {
4467    /* Range */
4468    "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
4469    "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
4470    /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
4471  };
4472
4473  const static OneTestCase privateUseCharacterStrings[] = {
4474    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4475    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4476    { {0xe2d9}, {0xe2da}, UCOL_LESS },
4477    { {0xe2da}, {0xe2db}, UCOL_LESS },
4478    { {0xe2db}, {0xe2dc}, UCOL_LESS },
4479    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4480  };
4481  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
4482}
4483
4484static void TestInvalidListsAndRanges(void)
4485{
4486  const char* invalidRules[] = {
4487    /* Range not in starred expression */
4488    "&\\ufffe<\\uffff-\\U00010002",
4489
4490    /* Range without start */
4491    "&a<*-c",
4492
4493    /* Range without end */
4494    "&a<*b-",
4495
4496    /* More than one hyphen */
4497    "&a<*b-g-l",
4498
4499    /* Range in the wrong order */
4500    "&a<*k-b",
4501
4502  };
4503
4504  UChar rule[500];
4505  UErrorCode status = U_ZERO_ERROR;
4506  UParseError parse_error;
4507  int n_rules = LEN(invalidRules);
4508  int rule_no;
4509  int length;
4510  UCollator  *myCollation;
4511
4512  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4513
4514    length = u_unescape(invalidRules[rule_no], rule, 500);
4515    if (length == 0) {
4516        log_err("ERROR: The rule cannot be unescaped: %s\n");
4517        return;
4518    }
4519    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4520    (void)myCollation;      /* Suppress set but not used warning. */
4521    if(!U_FAILURE(status)){
4522      log_err("ERROR: Could not cause a failure as expected: \n");
4523    }
4524    status = U_ZERO_ERROR;
4525  }
4526}
4527
4528/*
4529 * This test ensures that characters placed before a character in a different script have the same lead byte
4530 * in their collation key before and after script reordering.
4531 */
4532static void TestBeforeRuleWithScriptReordering(void)
4533{
4534    UParseError error;
4535    UErrorCode status = U_ZERO_ERROR;
4536    UCollator  *myCollation;
4537    char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
4538    UChar rules[500];
4539    uint32_t rulesLength = 0;
4540    int32_t reorderCodes[1] = {USCRIPT_GREEK};
4541    UCollationResult collResult;
4542
4543    uint8_t baseKey[256];
4544    uint32_t baseKeyLength;
4545    uint8_t beforeKey[256];
4546    uint32_t beforeKeyLength;
4547
4548    UChar base[] = { 0x03b1 }; /* base */
4549    int32_t baseLen = sizeof(base)/sizeof(*base);
4550
4551    UChar before[] = { 0x0e01 }; /* ko kai */
4552    int32_t beforeLen = sizeof(before)/sizeof(*before);
4553
4554    /*UChar *data[] = { before, base };
4555    genericRulesStarter(srules, data, 2);*/
4556
4557    log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
4558
4559    (void)beforeKeyLength;   /* Suppress set but not used warnings. */
4560    (void)baseKeyLength;
4561
4562    /* build collator */
4563    log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
4564
4565    rulesLength = u_unescape(srules, rules, LEN(rules));
4566    myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
4567    if(U_FAILURE(status)) {
4568        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4569        return;
4570    }
4571
4572    /* check collation results - before rule applied but not script reordering */
4573    collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4574    if (collResult != UCOL_GREATER) {
4575        log_err("Collation result not correct before script reordering = %d\n", collResult);
4576    }
4577
4578    /* check the lead byte of the collation keys before script reordering */
4579    baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4580    beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4581    if (baseKey[0] != beforeKey[0]) {
4582      log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4583   }
4584
4585    /* reorder the scripts */
4586    ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
4587    if(U_FAILURE(status)) {
4588        log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
4589        return;
4590    }
4591
4592    /* check collation results - before rule applied and after script reordering */
4593    collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4594    if (collResult != UCOL_GREATER) {
4595        log_err("Collation result not correct after script reordering = %d\n", collResult);
4596    }
4597
4598    /* check the lead byte of the collation keys after script reordering */
4599    ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4600    ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4601    if (baseKey[0] != beforeKey[0]) {
4602        log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4603    }
4604
4605    ucol_close(myCollation);
4606}
4607
4608/*
4609 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
4610 */
4611static void TestNonLeadBytesDuringCollationReordering(void)
4612{
4613    UErrorCode status = U_ZERO_ERROR;
4614    UCollator  *myCollation;
4615    int32_t reorderCodes[1] = {USCRIPT_GREEK};
4616
4617    uint8_t baseKey[256];
4618    uint32_t baseKeyLength;
4619    uint8_t reorderKey[256];
4620    uint32_t reorderKeyLength;
4621
4622    UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
4623
4624    uint32_t i;
4625
4626
4627    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4628
4629    /* build collator tertiary */
4630    myCollation = ucol_open("", &status);
4631    ucol_setStrength(myCollation, UCOL_TERTIARY);
4632    if(U_FAILURE(status)) {
4633        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4634        return;
4635    }
4636    baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
4637
4638    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
4639    if(U_FAILURE(status)) {
4640        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4641        return;
4642    }
4643    reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
4644
4645    if (baseKeyLength != reorderKeyLength) {
4646        log_err("Key lengths not the same during reordering.\n");
4647        return;
4648    }
4649
4650    for (i = 1; i < baseKeyLength; i++) {
4651        if (baseKey[i] != reorderKey[i]) {
4652            log_err("Collation key bytes not the same at position %d.\n", i);
4653            return;
4654        }
4655    }
4656    ucol_close(myCollation);
4657
4658    /* build collator quaternary */
4659    myCollation = ucol_open("", &status);
4660    ucol_setStrength(myCollation, UCOL_QUATERNARY);
4661    if(U_FAILURE(status)) {
4662        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4663        return;
4664    }
4665    baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
4666
4667    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
4668    if(U_FAILURE(status)) {
4669        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4670        return;
4671    }
4672    reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
4673
4674    if (baseKeyLength != reorderKeyLength) {
4675        log_err("Key lengths not the same during reordering.\n");
4676        return;
4677    }
4678
4679    for (i = 1; i < baseKeyLength; i++) {
4680        if (baseKey[i] != reorderKey[i]) {
4681            log_err("Collation key bytes not the same at position %d.\n", i);
4682            return;
4683        }
4684    }
4685    ucol_close(myCollation);
4686}
4687
4688/*
4689 * Test reordering API.
4690 */
4691static void TestReorderingAPI(void)
4692{
4693    UErrorCode status = U_ZERO_ERROR;
4694    UCollator  *myCollation;
4695    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4696    int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_KATAKANA};
4697    int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4698    int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
4699    UCollationResult collResult;
4700    int32_t retrievedReorderCodesLength;
4701    int32_t retrievedReorderCodes[10];
4702    UChar greekString[] = { 0x03b1 };
4703    UChar punctuationString[] = { 0x203e };
4704    int loopIndex;
4705
4706    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4707
4708    /* build collator tertiary */
4709    myCollation = ucol_open("", &status);
4710    ucol_setStrength(myCollation, UCOL_TERTIARY);
4711    if(U_FAILURE(status)) {
4712        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4713        return;
4714    }
4715
4716    /* set the reorderding */
4717    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
4718    if (U_FAILURE(status)) {
4719        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4720        return;
4721    }
4722
4723    /* get the reordering */
4724    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4725    if (status != U_BUFFER_OVERFLOW_ERROR) {
4726        log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4727        return;
4728    }
4729    status = U_ZERO_ERROR;
4730    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
4731        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
4732        return;
4733    }
4734    /* now let's really get it */
4735    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
4736    if (U_FAILURE(status)) {
4737        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4738        return;
4739    }
4740    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
4741        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
4742        return;
4743    }
4744    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4745        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4746            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4747            return;
4748        }
4749    }
4750    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
4751    if (collResult != UCOL_LESS) {
4752        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4753        return;
4754    }
4755
4756    /* clear the reordering */
4757    ucol_setReorderCodes(myCollation, NULL, 0, &status);
4758    if (U_FAILURE(status)) {
4759        log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4760        return;
4761    }
4762
4763    /* get the reordering again */
4764    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4765    if (retrievedReorderCodesLength != 0) {
4766        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4767        return;
4768    }
4769
4770    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
4771    if (collResult != UCOL_GREATER) {
4772        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4773        return;
4774    }
4775
4776    /* clear the reordering using [NONE] */
4777    ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
4778    if (U_FAILURE(status)) {
4779        log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));
4780        return;
4781    }
4782
4783    /* get the reordering again */
4784    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4785    if (retrievedReorderCodesLength != 0) {
4786        log_err_status(status,
4787                       "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
4788                       retrievedReorderCodesLength);
4789        return;
4790    }
4791
4792    /* test for error condition on duplicate reorder codes */
4793    ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
4794    if (!U_FAILURE(status)) {
4795        log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
4796        return;
4797    }
4798
4799    status = U_ZERO_ERROR;
4800    /* test for reorder codes after a reset code */
4801    ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status);
4802    if (!U_FAILURE(status)) {
4803        log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
4804        return;
4805    }
4806
4807    ucol_close(myCollation);
4808}
4809
4810/*
4811 * Test reordering API.
4812 */
4813static void TestReorderingAPIWithRuleCreatedCollator(void)
4814{
4815    UErrorCode status = U_ZERO_ERROR;
4816    UCollator  *myCollation;
4817    UChar rules[90];
4818    static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
4819    static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4820    static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
4821    UCollationResult collResult;
4822    int32_t retrievedReorderCodesLength;
4823    int32_t retrievedReorderCodes[10];
4824    static const UChar greekString[] = { 0x03b1 };
4825    static const UChar punctuationString[] = { 0x203e };
4826    static const UChar hanString[] = { 0x65E5, 0x672C };
4827    int loopIndex;
4828
4829    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4830
4831    /* build collator from rules */
4832    u_uastrcpy(rules, "[reorder Hani Grek]");
4833    myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
4834    if(U_FAILURE(status)) {
4835        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4836        return;
4837    }
4838
4839    /* get the reordering */
4840    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
4841    if (U_FAILURE(status)) {
4842        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4843        return;
4844    }
4845    if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
4846        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
4847        return;
4848    }
4849    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4850        if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4851            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4852            return;
4853        }
4854    }
4855    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString));
4856    if (collResult != UCOL_GREATER) {
4857        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4858        return;
4859    }
4860
4861    /* set the reordering */
4862    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
4863    if (U_FAILURE(status)) {
4864        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4865        return;
4866    }
4867
4868    /* get the reordering */
4869    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4870    if (status != U_BUFFER_OVERFLOW_ERROR) {
4871        log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4872        return;
4873    }
4874    status = U_ZERO_ERROR;
4875    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
4876        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
4877        return;
4878    }
4879    /* now let's really get it */
4880    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
4881    if (U_FAILURE(status)) {
4882        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4883        return;
4884    }
4885    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
4886        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
4887        return;
4888    }
4889    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4890        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4891            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4892            return;
4893        }
4894    }
4895    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
4896    if (collResult != UCOL_LESS) {
4897        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4898        return;
4899    }
4900
4901    /* clear the reordering */
4902    ucol_setReorderCodes(myCollation, NULL, 0, &status);
4903    if (U_FAILURE(status)) {
4904        log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4905        return;
4906    }
4907
4908    /* get the reordering again */
4909    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4910    if (retrievedReorderCodesLength != 0) {
4911        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4912        return;
4913    }
4914
4915    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
4916    if (collResult != UCOL_GREATER) {
4917        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4918        return;
4919    }
4920
4921    /* reset the reordering */
4922    ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
4923    if (U_FAILURE(status)) {
4924        log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
4925        return;
4926    }
4927    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
4928    if (U_FAILURE(status)) {
4929        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4930        return;
4931    }
4932    if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
4933        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
4934        return;
4935    }
4936    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4937        if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4938            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4939            return;
4940        }
4941    }
4942
4943    ucol_close(myCollation);
4944}
4945
4946static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {
4947    int32_t i;
4948    for (i = 0; i < length; ++i) {
4949        if (expectedScript == scripts[i]) { return TRUE; }
4950    }
4951    return FALSE;
4952}
4953
4954static void TestEquivalentReorderingScripts(void) {
4955    // Beginning with ICU 55, collation reordering moves single scripts
4956    // rather than groups of scripts,
4957    // except where scripts share a range and sort primary-equal.
4958    UErrorCode status = U_ZERO_ERROR;
4959    int32_t equivalentScripts[100];
4960    int32_t length;
4961    int i;
4962    int32_t prevScript;
4963    /* These scripts are expected to be equivalent. */
4964    static const int32_t expectedScripts[] = {
4965        USCRIPT_HIRAGANA,
4966        USCRIPT_KATAKANA,
4967        USCRIPT_KATAKANA_OR_HIRAGANA
4968    };
4969
4970    equivalentScripts[0] = 0;
4971    length = ucol_getEquivalentReorderCodes(
4972            USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
4973    if (U_FAILURE(status)) {
4974        log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4975        return;
4976    }
4977    if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) {
4978        log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "
4979                "length expected 1, was = %d; expected [%d] was [%d]\n",
4980                length, USCRIPT_GOTHIC, equivalentScripts[0]);
4981    }
4982
4983    length = ucol_getEquivalentReorderCodes(
4984            USCRIPT_HIRAGANA, equivalentScripts, LEN(equivalentScripts), &status);
4985    if (U_FAILURE(status)) {
4986        log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4987        return;
4988    }
4989    if (length != LEN(expectedScripts)) {
4990        log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "
4991                "expected %d, was = %d\n",
4992                LEN(expectedScripts), length);
4993    }
4994    prevScript = -1;
4995    for (i = 0; i < length; ++i) {
4996        int32_t script = equivalentScripts[i];
4997        if (script <= prevScript) {
4998            log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i);
4999        }
5000        prevScript = script;
5001    }
5002    for (i = 0; i < LEN(expectedScripts); i++) {
5003        if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5004            log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",
5005                    expectedScripts[i]);
5006        }
5007    }
5008
5009    length = ucol_getEquivalentReorderCodes(
5010            USCRIPT_KATAKANA, equivalentScripts, LEN(equivalentScripts), &status);
5011    if (U_FAILURE(status)) {
5012        log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
5013        return;
5014    }
5015    if (length != LEN(expectedScripts)) {
5016        log_err("ERROR/Katakana: retrieved equivalent script length wrong: "
5017                "expected %d, was = %d\n",
5018                LEN(expectedScripts), length);
5019    }
5020    for (i = 0; i < LEN(expectedScripts); i++) {
5021        if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5022            log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",
5023                    expectedScripts[i]);
5024        }
5025    }
5026
5027    length = ucol_getEquivalentReorderCodes(
5028            USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, LEN(equivalentScripts), &status);
5029    if (U_FAILURE(status) || length != LEN(expectedScripts)) {
5030        log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "
5031                "expected %d, was = %d\n",
5032                LEN(expectedScripts), length);
5033    }
5034
5035    length = ucol_getEquivalentReorderCodes(
5036            USCRIPT_HAN, equivalentScripts, LEN(equivalentScripts), &status);
5037    if (U_FAILURE(status) || length != 3) {
5038        log_err("ERROR/Hani: retrieved equivalent script length wrong: "
5039                "expected 3, was = %d\n", length);
5040    }
5041    length = ucol_getEquivalentReorderCodes(
5042            USCRIPT_SIMPLIFIED_HAN, equivalentScripts, LEN(equivalentScripts), &status);
5043    if (U_FAILURE(status) || length != 3) {
5044        log_err("ERROR/Hans: retrieved equivalent script length wrong: "
5045                "expected 3, was = %d\n", length);
5046    }
5047    length = ucol_getEquivalentReorderCodes(
5048            USCRIPT_TRADITIONAL_HAN, equivalentScripts, LEN(equivalentScripts), &status);
5049    if (U_FAILURE(status) || length != 3) {
5050        log_err("ERROR/Hant: retrieved equivalent script length wrong: "
5051                "expected 3, was = %d\n", length);
5052    }
5053
5054    length = ucol_getEquivalentReorderCodes(
5055            USCRIPT_MEROITIC_CURSIVE, equivalentScripts, LEN(equivalentScripts), &status);
5056    if (U_FAILURE(status) || length != 2) {
5057        log_err("ERROR/Merc: retrieved equivalent script length wrong: "
5058                "expected 2, was = %d\n", length);
5059    }
5060    length = ucol_getEquivalentReorderCodes(
5061            USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, LEN(equivalentScripts), &status);
5062    if (U_FAILURE(status) || length != 2) {
5063        log_err("ERROR/Mero: retrieved equivalent script length wrong: "
5064                "expected 2, was = %d\n", length);
5065    }
5066}
5067
5068static void TestReorderingAcrossCloning(void)
5069{
5070    UErrorCode status = U_ZERO_ERROR;
5071    UCollator  *myCollation;
5072    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5073    UCollator *clonedCollation;
5074    int32_t retrievedReorderCodesLength;
5075    int32_t retrievedReorderCodes[10];
5076    int loopIndex;
5077
5078    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5079
5080    /* build collator tertiary */
5081    myCollation = ucol_open("", &status);
5082    ucol_setStrength(myCollation, UCOL_TERTIARY);
5083    if(U_FAILURE(status)) {
5084        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5085        return;
5086    }
5087
5088    /* set the reorderding */
5089    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5090    if (U_FAILURE(status)) {
5091        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5092        return;
5093    }
5094
5095    /* clone the collator */
5096    clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
5097    if (U_FAILURE(status)) {
5098        log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
5099        return;
5100    }
5101
5102    /* get the reordering */
5103    retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
5104    if (U_FAILURE(status)) {
5105        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
5106        return;
5107    }
5108    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
5109        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
5110        return;
5111    }
5112    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
5113        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
5114            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
5115            return;
5116        }
5117    }
5118
5119    /*uprv_free(buffer);*/
5120    ucol_close(myCollation);
5121    ucol_close(clonedCollation);
5122}
5123
5124/*
5125 * Utility function to test one collation reordering test case set.
5126 * @param testcases Array of test cases.
5127 * @param n_testcases Size of the array testcases.
5128 * @param reorderTokens Array of reordering codes.
5129 * @param reorderTokensLen Size of the array reorderTokens.
5130 */
5131static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
5132{
5133    uint32_t testCaseNum;
5134    UErrorCode status = U_ZERO_ERROR;
5135    UCollator  *myCollation;
5136
5137    myCollation = ucol_open("", &status);
5138    if (U_FAILURE(status)) {
5139        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5140        return;
5141    }
5142    ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
5143    if(U_FAILURE(status)) {
5144        log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5145        return;
5146    }
5147
5148    for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
5149        doTest(myCollation,
5150            testCases[testCaseNum].source,
5151            testCases[testCaseNum].target,
5152            testCases[testCaseNum].result
5153        );
5154    }
5155    ucol_close(myCollation);
5156}
5157
5158static void TestGreekFirstReorder(void)
5159{
5160    const char* strRules[] = {
5161        "[reorder Grek]"
5162    };
5163
5164    const int32_t apiRules[] = {
5165        USCRIPT_GREEK
5166    };
5167
5168    const static OneTestCase privateUseCharacterStrings[] = {
5169        { {0x0391}, {0x0391}, UCOL_EQUAL },
5170        { {0x0041}, {0x0391}, UCOL_GREATER },
5171        { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
5172        { {0x0060}, {0x0391}, UCOL_LESS },
5173        { {0x0391}, {0xe2dc}, UCOL_LESS },
5174        { {0x0391}, {0x0060}, UCOL_GREATER },
5175    };
5176
5177    /* Test rules creation */
5178    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5179
5180    /* Test collation reordering API */
5181    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
5182}
5183
5184static void TestGreekLastReorder(void)
5185{
5186    const char* strRules[] = {
5187        "[reorder Zzzz Grek]"
5188    };
5189
5190    const int32_t apiRules[] = {
5191        USCRIPT_UNKNOWN, USCRIPT_GREEK
5192    };
5193
5194    const static OneTestCase privateUseCharacterStrings[] = {
5195        { {0x0391}, {0x0391}, UCOL_EQUAL },
5196        { {0x0041}, {0x0391}, UCOL_LESS },
5197        { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
5198        { {0x0060}, {0x0391}, UCOL_LESS },
5199        { {0x0391}, {0xe2dc}, UCOL_GREATER },
5200    };
5201
5202    /* Test rules creation */
5203    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5204
5205    /* Test collation reordering API */
5206    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
5207}
5208
5209static void TestNonScriptReorder(void)
5210{
5211    const char* strRules[] = {
5212        "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
5213    };
5214
5215    const int32_t apiRules[] = {
5216        USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
5217        UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
5218        UCOL_REORDER_CODE_CURRENCY
5219    };
5220
5221    const static OneTestCase privateUseCharacterStrings[] = {
5222        { {0x0391}, {0x0041}, UCOL_LESS },
5223        { {0x0041}, {0x0391}, UCOL_GREATER },
5224        { {0x0060}, {0x0041}, UCOL_LESS },
5225        { {0x0060}, {0x0391}, UCOL_GREATER },
5226        { {0x0024}, {0x0041}, UCOL_GREATER },
5227    };
5228
5229    /* Test rules creation */
5230    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5231
5232    /* Test collation reordering API */
5233    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
5234}
5235
5236static void TestHaniReorder(void)
5237{
5238    const char* strRules[] = {
5239        "[reorder Hani]"
5240    };
5241    const int32_t apiRules[] = {
5242        USCRIPT_HAN
5243    };
5244
5245    const static OneTestCase privateUseCharacterStrings[] = {
5246        { {0x4e00}, {0x0041}, UCOL_LESS },
5247        { {0x4e00}, {0x0060}, UCOL_GREATER },
5248        { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5249        { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5250        { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5251        { {0xfa27}, {0x0041}, UCOL_LESS },
5252        { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5253    };
5254
5255    /* Test rules creation */
5256    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5257
5258    /* Test collation reordering API */
5259    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
5260}
5261
5262static void TestHaniReorderWithOtherRules(void)
5263{
5264    const char* strRules[] = {
5265        "[reorder Hani] &b<a"
5266    };
5267    /*const int32_t apiRules[] = {
5268        USCRIPT_HAN
5269    };*/
5270
5271    const static OneTestCase privateUseCharacterStrings[] = {
5272        { {0x4e00}, {0x0041}, UCOL_LESS },
5273        { {0x4e00}, {0x0060}, UCOL_GREATER },
5274        { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5275        { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5276        { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5277        { {0xfa27}, {0x0041}, UCOL_LESS },
5278        { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5279        { {0x0062}, {0x0061}, UCOL_LESS },
5280    };
5281
5282    /* Test rules creation */
5283    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5284}
5285
5286static void TestMultipleReorder(void)
5287{
5288    const char* strRules[] = {
5289        "[reorder Grek Zzzz DIGIT Latn Hani]"
5290    };
5291
5292    const int32_t apiRules[] = {
5293        USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
5294    };
5295
5296    const static OneTestCase collationTestCases[] = {
5297        { {0x0391}, {0x0041}, UCOL_LESS},
5298        { {0x0031}, {0x0041}, UCOL_LESS},
5299        { {0x0041}, {0x4e00}, UCOL_LESS},
5300    };
5301
5302    /* Test rules creation */
5303    doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
5304
5305    /* Test collation reordering API */
5306    doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
5307}
5308
5309/*
5310 * Test that covers issue reported in ticket 8814
5311 */
5312static void TestReorderWithNumericCollation(void)
5313{
5314    UErrorCode status = U_ZERO_ERROR;
5315    UCollator  *myCollation;
5316    UCollator  *myReorderCollation;
5317    int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
5318    /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
5319    UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
5320    UChar fortyS[] = { 0x0053 };
5321    UChar fortyThreeP[] = { 0x0050 };
5322    uint8_t fortyS_sortKey[128];
5323    int32_t fortyS_sortKey_Length;
5324    uint8_t fortyThreeP_sortKey[128];
5325    int32_t fortyThreeP_sortKey_Length;
5326    uint8_t fortyS_sortKey_reorder[128];
5327    int32_t fortyS_sortKey_reorder_Length;
5328    uint8_t fortyThreeP_sortKey_reorder[128];
5329    int32_t fortyThreeP_sortKey_reorder_Length;
5330    UCollationResult collResult;
5331    UCollationResult collResultReorder;
5332
5333    log_verbose("Testing reordering with and without numeric collation\n");
5334
5335    /* build collator tertiary with numeric */
5336    myCollation = ucol_open("", &status);
5337    /*
5338    ucol_setStrength(myCollation, UCOL_TERTIARY);
5339    */
5340    ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5341    if(U_FAILURE(status)) {
5342        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5343        return;
5344    }
5345
5346    /* build collator tertiary with numeric and reordering */
5347    myReorderCollation = ucol_open("", &status);
5348    /*
5349    ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
5350    */
5351    ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5352    ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status);
5353    if(U_FAILURE(status)) {
5354        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5355        return;
5356    }
5357
5358    fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128);
5359    fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128);
5360    fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128);
5361    fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
5362
5363    if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
5364        log_err_status(status, "ERROR: couldn't generate sort keys\n");
5365        return;
5366    }
5367    collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
5368    collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
5369    /*
5370    fprintf(stderr, "\tcollResult = %x\n", collResult);
5371    fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
5372    fprintf(stderr, "\nfortyS\n");
5373    for (i = 0; i < fortyS_sortKey_Length; i++) {
5374        fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
5375    }
5376    fprintf(stderr, "\nfortyThreeP\n");
5377    for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
5378        fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
5379    }
5380    */
5381    if (collResult != collResultReorder) {
5382        log_err_status(status, "ERROR: collation results should have been the same.\n");
5383        return;
5384    }
5385
5386    ucol_close(myCollation);
5387    ucol_close(myReorderCollation);
5388}
5389
5390static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
5391{
5392  for (; *a == *b; ++a, ++b) {
5393    if (*a == 0) {
5394      return 0;
5395    }
5396  }
5397  return (*a < *b ? -1 : 1);
5398}
5399
5400static void TestImportRulesDeWithPhonebook(void)
5401{
5402  const char* normalRules[] = {
5403    "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
5404    "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
5405    "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
5406  };
5407  const OneTestCase normalTests[] = {
5408    { {0x00e6}, {0x00c6}, UCOL_LESS},
5409    { {0x00fc}, {0x00dc}, UCOL_GREATER},
5410  };
5411
5412  const char* importRules[] = {
5413    "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
5414    "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5415    "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5416  };
5417  const OneTestCase importTests[] = {
5418    { {0x00e6}, {0x00c6}, UCOL_LESS},
5419    { {0x00fc}, {0x00dc}, UCOL_LESS},
5420  };
5421
5422  doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules));
5423  doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules));
5424}
5425
5426#if 0
5427static void TestImportRulesFiWithEor(void)
5428{
5429  /* DUCET. */
5430  const char* defaultRules[] = {
5431    "&a<b",                                    /* Dummy rule. */
5432  };
5433
5434  const OneTestCase defaultTests[] = {
5435    { {0x0110}, {0x00F0}, UCOL_LESS},
5436    { {0x00a3}, {0x00a5}, UCOL_LESS},
5437    { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5438  };
5439
5440  /* European Ordering rules: ignore currency characters. */
5441  const char* eorRules[] = {
5442    "[import root-u-co-eor]",
5443  };
5444
5445  const OneTestCase eorTests[] = {
5446    { {0x0110}, {0x00F0}, UCOL_LESS},
5447    { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5448    { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5449  };
5450
5451  const char* fiStdRules[] = {
5452    "[import fi-u-co-standard]",
5453  };
5454
5455  const OneTestCase fiStdTests[] = {
5456    { {0x0110}, {0x00F0}, UCOL_GREATER},
5457    { {0x00a3}, {0x00a5}, UCOL_LESS},
5458    { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5459  };
5460
5461  /* Both European Ordering Rules and Fi Standard Rules. */
5462  const char* eorFiStdRules[] = {
5463    "[import root-u-co-eor][import fi-u-co-standard]",
5464  };
5465
5466  /* This is essentially same as the one before once fi.txt is updated with import. */
5467  const char* fiEorRules[] = {
5468    "[import fi-u-co-eor]",
5469  };
5470
5471  const OneTestCase fiEorTests[] = {
5472    { {0x0110}, {0x00F0}, UCOL_GREATER},
5473    { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5474    { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5475  };
5476
5477  doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
5478  doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules));
5479  doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules));
5480  doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules));
5481
5482  log_knownIssue("8962", NULL);
5483  /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
5484        eor{
5485            Sequence{
5486                "[import root-u-co-eor][import fi-u-co-standard]"
5487            }
5488            Version{"21.0"}
5489        }
5490  */
5491  /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
5492
5493}
5494#endif
5495
5496#if 0
5497/*
5498 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
5499 * the resource files are built with -includeUnihanColl option.
5500 * TODO: Uncomment this function and make it work when unihan rules are built by default.
5501 */
5502static void TestImportRulesCJKWithUnihan(void)
5503{
5504  /* DUCET. */
5505  const char* defaultRules[] = {
5506    "&a<b",                                    /* Dummy rule. */
5507  };
5508
5509  const OneTestCase defaultTests[] = {
5510    { {0x3402}, {0x4e1e}, UCOL_GREATER},
5511  };
5512
5513  /* European Ordering rules: ignore currency characters. */
5514  const char* unihanRules[] = {
5515    "[import ko-u-co-unihan]",
5516  };
5517
5518  const OneTestCase unihanTests[] = {
5519    { {0x3402}, {0x4e1e}, UCOL_LESS},
5520  };
5521
5522  doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
5523  doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules));
5524
5525}
5526#endif
5527
5528static void TestImport(void)
5529{
5530    UCollator* vicoll;
5531    UCollator* escoll;
5532    UCollator* viescoll;
5533    UCollator* importviescoll;
5534    UParseError error;
5535    UErrorCode status = U_ZERO_ERROR;
5536    UChar* virules;
5537    int32_t viruleslength;
5538    UChar* esrules;
5539    int32_t esruleslength;
5540    UChar* viesrules;
5541    int32_t viesruleslength;
5542    char srules[500] = "[import vi][import es]";
5543    UChar rules[500];
5544    uint32_t length = 0;
5545    int32_t itemCount;
5546    int32_t i, k;
5547    UChar32 start;
5548    UChar32 end;
5549    UChar str[500];
5550    int32_t strLength;
5551
5552    uint8_t sk1[500];
5553    uint8_t sk2[500];
5554
5555    UBool b;
5556    USet* tailoredSet;
5557    USet* importTailoredSet;
5558
5559
5560    vicoll = ucol_open("vi", &status);
5561    if(U_FAILURE(status)){
5562        log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
5563        return;
5564    }
5565
5566    virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
5567    if(viruleslength == 0) {
5568        log_data_err("missing vi tailoring rule string\n");
5569        ucol_close(vicoll);
5570        return;
5571    }
5572    escoll = ucol_open("es", &status);
5573    esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
5574    viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
5575    viesrules[0] = 0;
5576    u_strcat(viesrules, virules);
5577    u_strcat(viesrules, esrules);
5578    viesruleslength = viruleslength + esruleslength;
5579    viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5580
5581    /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5582    length = u_unescape(srules, rules, 500);
5583    importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5584    if(U_FAILURE(status)){
5585        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5586        return;
5587    }
5588
5589    tailoredSet = ucol_getTailoredSet(viescoll, &status);
5590    importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
5591
5592    if(!uset_equals(tailoredSet, importTailoredSet)){
5593        log_err("Tailored sets not equal");
5594    }
5595
5596    uset_close(importTailoredSet);
5597
5598    itemCount = uset_getItemCount(tailoredSet);
5599
5600    for( i = 0; i < itemCount; i++){
5601        strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5602        if(strLength < 2){
5603            for (; start <= end; start++){
5604                k = 0;
5605                U16_APPEND(str, k, 500, start, b);
5606                (void)b;    /* Suppress set but not used warning. */
5607                ucol_getSortKey(viescoll, str, 1, sk1, 500);
5608                ucol_getSortKey(importviescoll, str, 1, sk2, 500);
5609                if(compare_uint8_t_arrays(sk1, sk2) != 0){
5610                    log_err("Sort key for %s not equal\n", str);
5611                    break;
5612                }
5613            }
5614        }else{
5615            ucol_getSortKey(viescoll, str, strLength, sk1, 500);
5616            ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
5617            if(compare_uint8_t_arrays(sk1, sk2) != 0){
5618                log_err("ZZSort key for %s not equal\n", str);
5619                break;
5620            }
5621
5622        }
5623    }
5624
5625    uset_close(tailoredSet);
5626
5627    uprv_free(viesrules);
5628
5629    ucol_close(vicoll);
5630    ucol_close(escoll);
5631    ucol_close(viescoll);
5632    ucol_close(importviescoll);
5633}
5634
5635static void TestImportWithType(void)
5636{
5637    UCollator* vicoll;
5638    UCollator* decoll;
5639    UCollator* videcoll;
5640    UCollator* importvidecoll;
5641    UParseError error;
5642    UErrorCode status = U_ZERO_ERROR;
5643    const UChar* virules;
5644    int32_t viruleslength;
5645    const UChar* derules;
5646    int32_t deruleslength;
5647    UChar* viderules;
5648    int32_t videruleslength;
5649    const char srules[500] = "[import vi][import de-u-co-phonebk]";
5650    UChar rules[500];
5651    uint32_t length = 0;
5652    int32_t itemCount;
5653    int32_t i, k;
5654    UChar32 start;
5655    UChar32 end;
5656    UChar str[500];
5657    int32_t strLength;
5658
5659    uint8_t sk1[500];
5660    uint8_t sk2[500];
5661
5662    USet* tailoredSet;
5663    USet* importTailoredSet;
5664
5665    vicoll = ucol_open("vi", &status);
5666    if(U_FAILURE(status)){
5667        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5668        return;
5669    }
5670    virules = ucol_getRules(vicoll, &viruleslength);
5671    if(viruleslength == 0) {
5672        log_data_err("missing vi tailoring rule string\n");
5673        ucol_close(vicoll);
5674        return;
5675    }
5676    /* decoll = ucol_open("de@collation=phonebook", &status); */
5677    decoll = ucol_open("de-u-co-phonebk", &status);
5678    if(U_FAILURE(status)){
5679        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5680        return;
5681    }
5682
5683
5684    derules = ucol_getRules(decoll, &deruleslength);
5685    viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
5686    viderules[0] = 0;
5687    u_strcat(viderules, virules);
5688    u_strcat(viderules, derules);
5689    videruleslength = viruleslength + deruleslength;
5690    videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5691
5692    /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5693    length = u_unescape(srules, rules, 500);
5694    importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5695    if(U_FAILURE(status)){
5696        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5697        return;
5698    }
5699
5700    tailoredSet = ucol_getTailoredSet(videcoll, &status);
5701    importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
5702
5703    if(!uset_equals(tailoredSet, importTailoredSet)){
5704        log_err("Tailored sets not equal");
5705    }
5706
5707    uset_close(importTailoredSet);
5708
5709    itemCount = uset_getItemCount(tailoredSet);
5710
5711    for( i = 0; i < itemCount; i++){
5712        strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5713        if(strLength < 2){
5714            for (; start <= end; start++){
5715                k = 0;
5716                U16_APPEND_UNSAFE(str, k, start);
5717                ucol_getSortKey(videcoll, str, 1, sk1, 500);
5718                ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
5719                if(compare_uint8_t_arrays(sk1, sk2) != 0){
5720                    log_err("Sort key for %s not equal\n", str);
5721                    break;
5722                }
5723            }
5724        }else{
5725            ucol_getSortKey(videcoll, str, strLength, sk1, 500);
5726            ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
5727            if(compare_uint8_t_arrays(sk1, sk2) != 0){
5728                log_err("Sort key for %s not equal\n", str);
5729                break;
5730            }
5731
5732        }
5733    }
5734
5735    uset_close(tailoredSet);
5736
5737    uprv_free(viderules);
5738
5739    ucol_close(videcoll);
5740    ucol_close(importvidecoll);
5741    ucol_close(vicoll);
5742    ucol_close(decoll);
5743}
5744
5745/* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
5746static const UChar longUpperStr1[]= { /* 155 chars */
5747    0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
5748    0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
5749    0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
5750    0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
5751    0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
5752    0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
5753    0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
5754    0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
5755    0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
5756    0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
5757};
5758
5759/* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
5760static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
5761    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5762    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5763    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5764    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5765    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
5766};
5767
5768/* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
5769static const UChar longUpperStr3[]= { /* 324 chars */
5770    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5771    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5772    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5773    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5774    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5775    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5776    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5777    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5778    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5779    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5780    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5781    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
5782};
5783
5784#define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
5785
5786typedef struct {
5787    const UChar * longUpperStrPtr;
5788    int32_t       longUpperStrLen;
5789} LongUpperStrItem;
5790
5791/* String pointers must be in reverse collation order of the corresponding strings */
5792static const LongUpperStrItem longUpperStrItems[] = {
5793    { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },
5794    { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },
5795    { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },
5796    { NULL,          0                           }
5797};
5798
5799enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
5800
5801/* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
5802static void TestCaseLevelBufferOverflow(void)
5803{
5804    UErrorCode status = U_ZERO_ERROR;
5805    UCollator * ucol = ucol_open("root", &status);
5806    if ( U_SUCCESS(status) ) {
5807        ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
5808        if ( U_SUCCESS(status) ) {
5809            const LongUpperStrItem * itemPtr;
5810            uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
5811            for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
5812                int32_t sortKeyLen;
5813                if (itemPtr > longUpperStrItems) {
5814                    uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
5815                }
5816                sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
5817                if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
5818                    log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
5819                    break;
5820                }
5821                if ( itemPtr > longUpperStrItems ) {
5822                    int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
5823                    if (compareResult >= 0) {
5824                        log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
5825                    }
5826                }
5827            }
5828        } else {
5829            log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
5830        }
5831        ucol_close(ucol);
5832    } else {
5833        log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
5834    }
5835}
5836
5837/* Test for #10595 */
5838static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5839#define KEY_PART_SIZE 16
5840
5841static void TestNextSortKeyPartJaIdentical(void)
5842{
5843    UErrorCode status = U_ZERO_ERROR;
5844    UCollator *coll;
5845    uint8_t keyPart[KEY_PART_SIZE];
5846    UCharIterator iter;
5847    uint32_t state[2] = {0, 0};
5848    int32_t keyPartLen;
5849
5850    coll = ucol_open("ja", &status);
5851    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
5852    if (U_FAILURE(status)) {
5853        log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
5854        return;
5855    }
5856
5857    uiter_setString(&iter, testJapaneseName, 5);
5858    keyPartLen = KEY_PART_SIZE;
5859    while (keyPartLen == KEY_PART_SIZE) {
5860        keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
5861        if (U_FAILURE(status)) {
5862            log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
5863            break;
5864        }
5865    }
5866
5867    ucol_close(coll);
5868}
5869
5870#define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5871
5872void addMiscCollTest(TestNode** root)
5873{
5874    TEST(TestRuleOptions);
5875    TEST(TestBeforePrefixFailure);
5876    TEST(TestContractionClosure);
5877    TEST(TestPrefixCompose);
5878    TEST(TestStrCollIdenticalPrefix);
5879    TEST(TestPrefix);
5880    TEST(TestNewJapanese);
5881    /*TEST(TestLimitations);*/
5882    TEST(TestNonChars);
5883    TEST(TestExtremeCompression);
5884    TEST(TestSurrogates);
5885    TEST(TestVariableTopSetting);
5886    TEST(TestMaxVariable);
5887    TEST(TestBocsuCoverage);
5888    TEST(TestCyrillicTailoring);
5889    TEST(TestCase);
5890    TEST(IncompleteCntTest);
5891    TEST(BlackBirdTest);
5892    TEST(FunkyATest);
5893    TEST(BillFairmanTest);
5894    TEST(TestChMove);
5895    TEST(TestImplicitTailoring);
5896    TEST(TestFCDProblem);
5897    TEST(TestEmptyRule);
5898    /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5899    TEST(TestJ815);
5900    /*TEST(TestJ831);*/ /* we changed lv locale */
5901    TEST(TestBefore);
5902    TEST(TestHangulTailoring);
5903    TEST(TestUCARules);
5904    TEST(TestIncrementalNormalize);
5905    TEST(TestComposeDecompose);
5906    TEST(TestCompressOverlap);
5907    TEST(TestContraction);
5908    TEST(TestExpansion);
5909    /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5910    /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5911    TEST(TestOptimize);
5912    TEST(TestSuppressContractions);
5913    TEST(Alexis2);
5914    TEST(TestHebrewUCA);
5915    TEST(TestPartialSortKeyTermination);
5916    TEST(TestSettings);
5917    TEST(TestEquals);
5918    TEST(TestJ2726);
5919    TEST(NullRule);
5920    TEST(TestNumericCollation);
5921    TEST(TestTibetanConformance);
5922    TEST(TestPinyinProblem);
5923    TEST(TestSeparateTrees);
5924    TEST(TestBeforePinyin);
5925    TEST(TestBeforeTightening);
5926    /*TEST(TestMoreBefore);*/
5927    TEST(TestTailorNULL);
5928    TEST(TestUpperFirstQuaternary);
5929    TEST(TestJ4960);
5930    TEST(TestJ5223);
5931    TEST(TestJ5232);
5932    TEST(TestJ5367);
5933    TEST(TestHiragana);
5934    TEST(TestSortKeyConsistency);
5935    TEST(TestVI5913);  /* VI, RO tailored rules */
5936    TEST(TestCroatianSortKey);
5937    TEST(TestTailor6179);
5938    TEST(TestUCAPrecontext);
5939    TEST(TestOutOfBuffer5468);
5940    TEST(TestSameStrengthList);
5941
5942    TEST(TestSameStrengthListQuoted);
5943    TEST(TestSameStrengthListSupplemental);
5944    TEST(TestSameStrengthListQwerty);
5945    TEST(TestSameStrengthListQuotedQwerty);
5946    TEST(TestSameStrengthListRanges);
5947    TEST(TestSameStrengthListSupplementalRanges);
5948    TEST(TestSpecialCharacters);
5949    TEST(TestPrivateUseCharacters);
5950    TEST(TestPrivateUseCharactersInList);
5951    TEST(TestPrivateUseCharactersInRange);
5952    TEST(TestInvalidListsAndRanges);
5953    TEST(TestImportRulesDeWithPhonebook);
5954    /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
5955    /* TEST(TestImportRulesCJKWithUnihan); */
5956    TEST(TestImport);
5957    TEST(TestImportWithType);
5958
5959    TEST(TestBeforeRuleWithScriptReordering);
5960    TEST(TestNonLeadBytesDuringCollationReordering);
5961    TEST(TestReorderingAPI);
5962    TEST(TestReorderingAPIWithRuleCreatedCollator);
5963    TEST(TestEquivalentReorderingScripts);
5964    TEST(TestGreekFirstReorder);
5965    TEST(TestGreekLastReorder);
5966    TEST(TestNonScriptReorder);
5967    TEST(TestHaniReorder);
5968    TEST(TestHaniReorderWithOtherRules);
5969    TEST(TestMultipleReorder);
5970    TEST(TestReorderingAcrossCloning);
5971    TEST(TestReorderWithNumericCollation);
5972
5973    TEST(TestCaseLevelBufferOverflow);
5974    TEST(TestNextSortKeyPartJaIdentical);
5975}
5976
5977#endif /* #if !UCONFIG_NO_COLLATION */
5978