1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2001-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8/*******************************************************************************
9*
10* File cmsccoll.C
11*
12*******************************************************************************/
13/**
14 * These are the tests specific to ICU 1.8 and above, that I didn't know where
15 * to fit.
16 */
17
18#include <stdio.h>
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_COLLATION
23
24#include "unicode/ucol.h"
25#include "unicode/ucoleitr.h"
26#include "unicode/uloc.h"
27#include "cintltst.h"
28#include "ccolltst.h"
29#include "callcoll.h"
30#include "unicode/ustring.h"
31#include "string.h"
32#include "ucol_imp.h"
33#include "cmemory.h"
34#include "cstring.h"
35#include "uassert.h"
36#include "unicode/parseerr.h"
37#include "unicode/ucnv.h"
38#include "unicode/ures.h"
39#include "unicode/uscript.h"
40#include "unicode/utf16.h"
41#include "uparse.h"
42#include "putilimp.h"
43
44
45#define MAX_TOKEN_LEN 16
46
47typedef UCollationResult tst_strcoll(void *collator, const int object,
48                        const UChar *source, const int sLen,
49                        const UChar *target, const int tLen);
50
51
52
53const static char cnt1[][10] = {
54
55  "AA",
56  "AC",
57  "AZ",
58  "AQ",
59  "AB",
60  "ABZ",
61  "ABQ",
62  "Z",
63  "ABC",
64  "Q",
65  "B"
66};
67
68const static char cnt2[][10] = {
69  "DA",
70  "DAD",
71  "DAZ",
72  "MAR",
73  "Z",
74  "DAVIS",
75  "MARK",
76  "DAV",
77  "DAVI"
78};
79
80static void IncompleteCntTest(void)
81{
82  UErrorCode status = U_ZERO_ERROR;
83  UChar temp[90];
84  UChar t1[90];
85  UChar t2[90];
86
87  UCollator *coll =  NULL;
88  uint32_t i = 0, j = 0;
89  uint32_t size = 0;
90
91  u_uastrcpy(temp, " & Z < ABC < Q < B");
92
93  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
94
95  if(U_SUCCESS(status)) {
96    size = UPRV_LENGTHOF(cnt1);
97    for(i = 0; i < size-1; i++) {
98      for(j = i+1; j < size; j++) {
99        UCollationElements *iter;
100        u_uastrcpy(t1, cnt1[i]);
101        u_uastrcpy(t2, cnt1[j]);
102        doTest(coll, t1, t2, UCOL_LESS);
103        /* synwee : added collation element iterator test */
104        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
105        if (U_FAILURE(status)) {
106          log_err("Creation of iterator failed\n");
107          break;
108        }
109        backAndForth(iter);
110        ucol_closeElements(iter);
111      }
112    }
113  }
114
115  ucol_close(coll);
116
117
118  u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
119  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
120
121  if(U_SUCCESS(status)) {
122    size = UPRV_LENGTHOF(cnt2);
123    for(i = 0; i < size-1; i++) {
124      for(j = i+1; j < size; j++) {
125        UCollationElements *iter;
126        u_uastrcpy(t1, cnt2[i]);
127        u_uastrcpy(t2, cnt2[j]);
128        doTest(coll, t1, t2, UCOL_LESS);
129
130        /* synwee : added collation element iterator test */
131        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
132        if (U_FAILURE(status)) {
133          log_err("Creation of iterator failed\n");
134          break;
135        }
136        backAndForth(iter);
137        ucol_closeElements(iter);
138      }
139    }
140  }
141
142  ucol_close(coll);
143
144
145}
146
147const static char shifted[][20] = {
148  "black bird",
149  "black-bird",
150  "blackbird",
151  "black Bird",
152  "black-Bird",
153  "blackBird",
154  "black birds",
155  "black-birds",
156  "blackbirds"
157};
158
159const static UCollationResult shiftedTert[] = {
160  UCOL_EQUAL,
161  UCOL_EQUAL,
162  UCOL_EQUAL,
163  UCOL_LESS,
164  UCOL_EQUAL,
165  UCOL_EQUAL,
166  UCOL_LESS,
167  UCOL_EQUAL,
168  UCOL_EQUAL
169};
170
171const static char nonignorable[][20] = {
172  "black bird",
173  "black Bird",
174  "black birds",
175  "black-bird",
176  "black-Bird",
177  "black-birds",
178  "blackbird",
179  "blackBird",
180  "blackbirds"
181};
182
183static void BlackBirdTest(void) {
184  UErrorCode status = U_ZERO_ERROR;
185  UChar t1[90];
186  UChar t2[90];
187
188  uint32_t i = 0, j = 0;
189  uint32_t size = 0;
190  UCollator *coll = ucol_open("en_US", &status);
191
192  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
193  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
194
195  if(U_SUCCESS(status)) {
196    size = UPRV_LENGTHOF(nonignorable);
197    for(i = 0; i < size-1; i++) {
198      for(j = i+1; j < size; j++) {
199        u_uastrcpy(t1, nonignorable[i]);
200        u_uastrcpy(t2, nonignorable[j]);
201        doTest(coll, t1, t2, UCOL_LESS);
202      }
203    }
204  }
205
206  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
207  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
208
209  if(U_SUCCESS(status)) {
210    size = UPRV_LENGTHOF(shifted);
211    for(i = 0; i < size-1; i++) {
212      for(j = i+1; j < size; j++) {
213        u_uastrcpy(t1, shifted[i]);
214        u_uastrcpy(t2, shifted[j]);
215        doTest(coll, t1, t2, UCOL_LESS);
216      }
217    }
218  }
219
220  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
221  if(U_SUCCESS(status)) {
222    size = UPRV_LENGTHOF(shifted);
223    for(i = 1; i < size; i++) {
224      u_uastrcpy(t1, shifted[i-1]);
225      u_uastrcpy(t2, shifted[i]);
226      doTest(coll, t1, t2, shiftedTert[i]);
227    }
228  }
229
230  ucol_close(coll);
231}
232
233const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
234    {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
235    {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
236    {0x0041/*'A'*/, 0x0300, 0x0000},
237    {0x00C0, 0x0301, 0x0000},
238    /* this would work with forced normalization */
239    {0x00C0, 0x0316, 0x0000}
240};
241
242const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
243    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
244    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
245    {0x00C0, 0},
246    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
247    /* this would work with forced normalization */
248    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
249};
250
251const static UCollationResult results[] = {
252    UCOL_GREATER,
253    UCOL_EQUAL,
254    UCOL_EQUAL,
255    UCOL_GREATER,
256    UCOL_EQUAL
257};
258
259static void FunkyATest(void)
260{
261
262    int32_t i;
263    UErrorCode status = U_ZERO_ERROR;
264    UCollator  *myCollation;
265    myCollation = ucol_open("en_US", &status);
266    if(U_FAILURE(status)){
267        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
268        return;
269    }
270    log_verbose("Testing some A letters, for some reason\n");
271    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
272    ucol_setStrength(myCollation, UCOL_TERTIARY);
273    for (i = 0; i < 4 ; i++)
274    {
275        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
276    }
277    ucol_close(myCollation);
278}
279
280UColAttributeValue caseFirst[] = {
281    UCOL_OFF,
282    UCOL_LOWER_FIRST,
283    UCOL_UPPER_FIRST
284};
285
286
287UColAttributeValue alternateHandling[] = {
288    UCOL_NON_IGNORABLE,
289    UCOL_SHIFTED
290};
291
292UColAttributeValue caseLevel[] = {
293    UCOL_OFF,
294    UCOL_ON
295};
296
297UColAttributeValue strengths[] = {
298    UCOL_PRIMARY,
299    UCOL_SECONDARY,
300    UCOL_TERTIARY,
301    UCOL_QUATERNARY,
302    UCOL_IDENTICAL
303};
304
305#if 0
306static const char * strengthsC[] = {
307    "UCOL_PRIMARY",
308    "UCOL_SECONDARY",
309    "UCOL_TERTIARY",
310    "UCOL_QUATERNARY",
311    "UCOL_IDENTICAL"
312};
313
314static const char * caseFirstC[] = {
315    "UCOL_OFF",
316    "UCOL_LOWER_FIRST",
317    "UCOL_UPPER_FIRST"
318};
319
320
321static const char * alternateHandlingC[] = {
322    "UCOL_NON_IGNORABLE",
323    "UCOL_SHIFTED"
324};
325
326static const char * caseLevelC[] = {
327    "UCOL_OFF",
328    "UCOL_ON"
329};
330
331/* not used currently - does not test only prints */
332static void PrintMarkDavis(void)
333{
334  UErrorCode status = U_ZERO_ERROR;
335  UChar m[256];
336  uint8_t sortkey[256];
337  UCollator *coll = ucol_open("en_US", &status);
338  uint32_t h,i,j,k, sortkeysize;
339  uint32_t sizem = 0;
340  char buffer[512];
341  uint32_t len = 512;
342
343  log_verbose("PrintMarkDavis");
344
345  u_uastrcpy(m, "Mark Davis");
346  sizem = u_strlen(m);
347
348
349  m[1] = 0xe4;
350
351  for(i = 0; i<sizem; i++) {
352    fprintf(stderr, "\\u%04X ", m[i]);
353  }
354  fprintf(stderr, "\n");
355
356  for(h = 0; h<UPRV_LENGTHOF(caseFirst); h++) {
357    ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
358    fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
359
360    for(i = 0; i<UPRV_LENGTHOF(alternateHandling); i++) {
361      ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
362      fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
363
364      for(j = 0; j<UPRV_LENGTHOF(caseLevel); j++) {
365        ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
366        fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
367
368        for(k = 0; k<UPRV_LENGTHOF(strengths); k++) {
369          ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
370          sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
371          fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
372          fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
373        }
374
375      }
376
377    }
378
379  }
380}
381#endif
382
383static void BillFairmanTest(void) {
384/*
385** check for actual locale via ICU resource bundles
386**
387** lp points to the original locale ("fr_FR_....")
388*/
389
390    UResourceBundle *lr,*cr;
391    UErrorCode              lec = U_ZERO_ERROR;
392    const char *lp = "fr_FR_you_ll_never_find_this_locale";
393
394    log_verbose("BillFairmanTest\n");
395
396    lr = ures_open(NULL,lp,&lec);
397    if (lr) {
398        cr = ures_getByKey(lr,"collations",0,&lec);
399        if (cr) {
400            lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
401            if (lp) {
402                if (U_SUCCESS(lec)) {
403                    if(strcmp(lp, "fr") != 0) {
404                        log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
405                    }
406                }
407            }
408            ures_close(cr);
409        }
410        ures_close(lr);
411    }
412}
413
414const static char chTest[][20] = {
415  "c",
416  "C",
417  "ca", "cb", "cx", "cy", "CZ",
418  "c\\u030C", "C\\u030C",
419  "h",
420  "H",
421  "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
422  "ch", "cH", "Ch", "CH",
423  "cha", "charly", "che", "chh", "chch", "chr",
424  "i", "I", "iarly",
425  "r", "R",
426  "r\\u030C", "R\\u030C",
427  "s",
428  "S",
429  "s\\u030C", "S\\u030C",
430  "z", "Z",
431  "z\\u030C", "Z\\u030C"
432};
433
434static void TestChMove(void) {
435    UChar t1[256] = {0};
436    UChar t2[256] = {0};
437
438    uint32_t i = 0, j = 0;
439    uint32_t size = 0;
440    UErrorCode status = U_ZERO_ERROR;
441
442    UCollator *coll = ucol_open("cs", &status);
443
444    if(U_SUCCESS(status)) {
445        size = UPRV_LENGTHOF(chTest);
446        for(i = 0; i < size-1; i++) {
447            for(j = i+1; j < size; j++) {
448                u_unescape(chTest[i], t1, 256);
449                u_unescape(chTest[j], t2, 256);
450                doTest(coll, t1, t2, UCOL_LESS);
451            }
452        }
453    }
454    else {
455        log_data_err("Can't open collator");
456    }
457    ucol_close(coll);
458}
459
460
461
462
463/*
464const static char impTest[][20] = {
465  "\\u4e00",
466    "a",
467    "A",
468    "b",
469    "B",
470    "\\u4e01"
471};
472*/
473
474
475static void TestImplicitTailoring(void) {
476  static const struct {
477    const char *rules;
478    const char *data[10];
479    const uint32_t len;
480  } tests[] = {
481      {
482        /* Tailor b and c before U+4E00. */
483        "&[before 1]\\u4e00 < b < c "
484        /* Now, before U+4E00 is c; put d and e after that. */
485        "&[before 1]\\u4e00 < d < e",
486        { "b", "c", "d", "e", "\\u4e00"}, 5 },
487      { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
488      { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
489      { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
490  };
491
492  int32_t i = 0;
493
494  for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
495      genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
496  }
497
498/*
499  UChar t1[256] = {0};
500  UChar t2[256] = {0};
501
502  const char *rule = "&\\u4e00 < a <<< A < b <<< B";
503
504  uint32_t i = 0, j = 0;
505  uint32_t size = 0;
506  uint32_t ruleLen = 0;
507  UErrorCode status = U_ZERO_ERROR;
508  UCollator *coll = NULL;
509  ruleLen = u_unescape(rule, t1, 256);
510
511  coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
512
513  if(U_SUCCESS(status)) {
514    size = UPRV_LENGTHOF(impTest);
515    for(i = 0; i < size-1; i++) {
516      for(j = i+1; j < size; j++) {
517        u_unescape(impTest[i], t1, 256);
518        u_unescape(impTest[j], t2, 256);
519        doTest(coll, t1, t2, UCOL_LESS);
520      }
521    }
522  }
523  else {
524    log_err("Can't open collator");
525  }
526  ucol_close(coll);
527  */
528}
529
530static void TestFCDProblem(void) {
531  UChar t1[256] = {0};
532  UChar t2[256] = {0};
533
534  const char *s1 = "\\u0430\\u0306\\u0325";
535  const char *s2 = "\\u04D1\\u0325";
536
537  UErrorCode status = U_ZERO_ERROR;
538  UCollator *coll = ucol_open("", &status);
539  u_unescape(s1, t1, 256);
540  u_unescape(s2, t2, 256);
541
542  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
543  doTest(coll, t1, t2, UCOL_EQUAL);
544
545  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
546  doTest(coll, t1, t2, UCOL_EQUAL);
547
548  ucol_close(coll);
549}
550
551/*
552The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
553We're only using NFC/NFD in this test.
554*/
555#define NORM_BUFFER_TEST_LEN 18
556typedef struct {
557  UChar32 u;
558  UChar NFC[NORM_BUFFER_TEST_LEN];
559  UChar NFD[NORM_BUFFER_TEST_LEN];
560} tester;
561
562static void TestComposeDecompose(void) {
563    /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
564    static const UChar UNICODESET_STR[] = {
565        0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
566        0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
567        0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
568    };
569    int32_t noOfLoc;
570    int32_t i = 0, j = 0;
571
572    UErrorCode status = U_ZERO_ERROR;
573    const char *locName = NULL;
574    uint32_t nfcSize;
575    uint32_t nfdSize;
576    tester **t;
577    uint32_t noCases = 0;
578    UCollator *coll = NULL;
579    UChar32 u = 0;
580    UChar comp[NORM_BUFFER_TEST_LEN];
581    uint32_t len = 0;
582    UCollationElements *iter;
583    USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
584    int32_t charsToTestSize;
585
586    noOfLoc = uloc_countAvailable();
587
588    coll = ucol_open("", &status);
589    if (U_FAILURE(status)) {
590        log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
591        return;
592    }
593    charsToTestSize = uset_size(charsToTest);
594    if (charsToTestSize <= 0) {
595        log_err("Set was zero. Missing data?\n");
596        return;
597    }
598    t = (tester **)malloc(charsToTestSize * sizeof(tester *));
599    t[0] = (tester *)malloc(sizeof(tester));
600    log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
601
602    for(u = 0; u < charsToTestSize; u++) {
603        UChar32 ch = uset_charAt(charsToTest, u);
604        len = 0;
605        U16_APPEND_UNSAFE(comp, len, ch);
606        nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
607        nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
608
609        if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
610          || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
611            t[noCases]->u = ch;
612            if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
613                u_strncpy(t[noCases]->NFC, comp, len);
614                t[noCases]->NFC[len] = 0;
615            }
616            noCases++;
617            t[noCases] = (tester *)malloc(sizeof(tester));
618            uprv_memset(t[noCases], 0, sizeof(tester));
619        }
620    }
621    log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
622    uset_close(charsToTest);
623    charsToTest = NULL;
624
625    for(u=0; u<(UChar32)noCases; u++) {
626        if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
627            log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
628            doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
629        }
630    }
631    /*
632    for(u = 0; u < charsToTestSize; u++) {
633      if(!(u&0xFFFF)) {
634        log_verbose("%08X ", u);
635      }
636      uprv_memset(t[noCases], 0, sizeof(tester));
637      t[noCases]->u = u;
638      len = 0;
639      U16_APPEND_UNSAFE(comp, len, u);
640      comp[len] = 0;
641      nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
642      nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
643      doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
644      doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
645    }
646    */
647
648    ucol_close(coll);
649
650    log_verbose("Testing locales, number of cases = %i\n", noCases);
651    for(i = 0; i<noOfLoc; i++) {
652        status = U_ZERO_ERROR;
653        locName = uloc_getAvailable(i);
654        if(hasCollationElements(locName)) {
655            char cName[256];
656            UChar name[256];
657            int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
658
659            for(j = 0; j<nameSize; j++) {
660                cName[j] = (char)name[j];
661            }
662            cName[nameSize] = 0;
663            log_verbose("\nTesting locale %s (%s)\n", locName, cName);
664
665            coll = ucol_open(locName, &status);
666            ucol_setStrength(coll, UCOL_IDENTICAL);
667            iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
668
669            for(u=0; u<(UChar32)noCases; u++) {
670                if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
671                    log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
672                    doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
673                    log_verbose("Testing NFC\n");
674                    ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
675                    backAndForth(iter);
676                    log_verbose("Testing NFD\n");
677                    ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
678                    backAndForth(iter);
679                }
680            }
681            ucol_closeElements(iter);
682            ucol_close(coll);
683        }
684    }
685    for(u = 0; u <= (UChar32)noCases; u++) {
686        free(t[u]);
687    }
688    free(t);
689}
690
691static void TestEmptyRule(void) {
692  UErrorCode status = U_ZERO_ERROR;
693  UChar rulez[] = { 0 };
694  UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
695
696  ucol_close(coll);
697}
698
699static void TestUCARules(void) {
700  UErrorCode status = U_ZERO_ERROR;
701  UChar b[256];
702  UChar *rules = b;
703  uint32_t ruleLen = 0;
704  UCollator *UCAfromRules = NULL;
705  UCollator *coll = ucol_open("", &status);
706  if(status == U_FILE_ACCESS_ERROR) {
707    log_data_err("Is your data around?\n");
708    return;
709  } else if(U_FAILURE(status)) {
710    log_err("Error opening collator\n");
711    return;
712  }
713  ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
714
715  log_verbose("TestUCARules\n");
716  if(ruleLen > 256) {
717    rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
718    ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
719  }
720  log_verbose("Rules length is %d\n", ruleLen);
721  UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
722  if(U_SUCCESS(status)) {
723    ucol_close(UCAfromRules);
724  } else {
725    log_verbose("Unable to create a collator from UCARules!\n");
726  }
727/*
728  u_unescape(blah, b, 256);
729  ucol_getSortKey(coll, b, 1, res, 256);
730*/
731  ucol_close(coll);
732  if(rules != b) {
733    free(rules);
734  }
735}
736
737
738/* Pinyin tonal order */
739/*
740    A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
741          (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
742    E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
743    I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
744    O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
745    U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
746      < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
747.. (\u00fc)
748
749However, in testing we got the following order:
750    A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
751          (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
752    E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
753.. (\u0113)
754    I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
755    O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
756    U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
757.. (\u01d8)
758      < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
759*/
760
761static void TestBefore(void) {
762  const static char *data[] = {
763      "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
764      "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
765      "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
766      "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
767      "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
768      "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
769  };
770  genericRulesStarter(
771    "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
772    "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
773    "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
774    "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
775    "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
776    "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
777    data, UPRV_LENGTHOF(data));
778}
779
780#if 0
781/* superceded by TestBeforePinyin */
782static void TestJ784(void) {
783  const static char *data[] = {
784      "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
785      "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
786      "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
787      "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
788      "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
789      "\\u00fc",
790           "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
791  };
792  genericLocaleStarter("zh", data, UPRV_LENGTHOF(data));
793}
794#endif
795
796static void TestUpperCaseFirst(void) {
797  const static char *data[] = {
798    "I",
799      "i",
800      "Y",
801      "y"
802  };
803  genericLocaleStarter("da", data, UPRV_LENGTHOF(data));
804}
805
806static void TestJ815(void) {
807  const static char *data[] = {
808    "aa",
809      "Aa",
810      "ab",
811      "Ab",
812      "ad",
813      "Ad",
814      "ae",
815      "Ae",
816      "\\u00e6",
817      "\\u00c6",
818      "af",
819      "Af",
820      "b",
821      "B"
822  };
823  genericLocaleStarter("fr", data, UPRV_LENGTHOF(data));
824  genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, UPRV_LENGTHOF(data));
825}
826
827
828static void TestCase(void)
829{
830    const static UChar gRules[MAX_TOKEN_LEN] =
831    /*" & 0 < 1,\u2461<a,A"*/
832    { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
833
834    const static UChar testCase[][MAX_TOKEN_LEN] =
835    {
836        /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
837        /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
838        /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
839        /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
840    };
841
842    const static UCollationResult caseTestResults[][9] =
843    {
844        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
845        { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
846        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
847        { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
848    };
849
850    const static UColAttributeValue caseTestAttributes[][2] =
851    {
852        { UCOL_LOWER_FIRST, UCOL_OFF},
853        { UCOL_UPPER_FIRST, UCOL_OFF},
854        { UCOL_LOWER_FIRST, UCOL_ON},
855        { UCOL_UPPER_FIRST, UCOL_ON}
856    };
857    int32_t i,j,k;
858    UErrorCode status = U_ZERO_ERROR;
859    UCollationElements *iter;
860    UCollator  *myCollation;
861    myCollation = ucol_open("en_US", &status);
862
863    if(U_FAILURE(status)){
864        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
865        return;
866    }
867    log_verbose("Testing different case settings\n");
868    ucol_setStrength(myCollation, UCOL_TERTIARY);
869
870    for(k = 0; k<4; k++) {
871      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
872      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
873      log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
874      for (i = 0; i < 3 ; i++) {
875        for(j = i+1; j<4; j++) {
876          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
877        }
878      }
879    }
880    ucol_close(myCollation);
881
882    myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
883    if(U_FAILURE(status)){
884        log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
885        return;
886    }
887    log_verbose("Testing different case settings with custom rules\n");
888    ucol_setStrength(myCollation, UCOL_TERTIARY);
889
890    for(k = 0; k<4; k++) {
891      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
892      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
893      for (i = 0; i < 3 ; i++) {
894        for(j = i+1; j<4; j++) {
895          log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
896          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
897          iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
898          backAndForth(iter);
899          ucol_closeElements(iter);
900          iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
901          backAndForth(iter);
902          ucol_closeElements(iter);
903        }
904      }
905    }
906    ucol_close(myCollation);
907    {
908      const static char *lowerFirst[] = {
909        "h",
910        "H",
911        "ch",
912        "Ch",
913        "CH",
914        "cha",
915        "chA",
916        "Cha",
917        "ChA",
918        "CHa",
919        "CHA",
920        "i",
921        "I"
922      };
923
924      const static char *upperFirst[] = {
925        "H",
926        "h",
927        "CH",
928        "Ch",
929        "ch",
930        "CHA",
931        "CHa",
932        "ChA",
933        "Cha",
934        "chA",
935        "cha",
936        "I",
937        "i"
938      };
939      log_verbose("mixed case test\n");
940      log_verbose("lower first, case level off\n");
941      genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
942      log_verbose("upper first, case level off\n");
943      genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
944      log_verbose("lower first, case level on\n");
945      genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
946      log_verbose("upper first, case level on\n");
947      genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
948    }
949
950}
951
952static void TestIncrementalNormalize(void) {
953
954    /*UChar baseA     =0x61;*/
955    UChar baseA     =0x41;
956/*    UChar baseB     = 0x42;*/
957    static const UChar ccMix[]   = {0x316, 0x321, 0x300};
958    /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
959    /*
960        0x316 is combining grave accent below, cc=220
961        0x321 is combining palatalized hook below, cc=202
962        0x300 is combining grave accent, cc=230
963    */
964
965#define MAXSLEN 2000
966    /*int          maxSLen   = 64000;*/
967    int          sLen;
968    int          i;
969
970    UCollator        *coll;
971    UErrorCode       status = U_ZERO_ERROR;
972    UCollationResult result;
973
974    int32_t myQ = getTestOption(QUICK_OPTION);
975
976    if(getTestOption(QUICK_OPTION) < 0) {
977        setTestOption(QUICK_OPTION, 1);
978    }
979
980    {
981        /* Test 1.  Run very long unnormalized strings, to force overflow of*/
982        /*          most buffers along the way.*/
983        UChar            strA[MAXSLEN+1];
984        UChar            strB[MAXSLEN+1];
985
986        coll = ucol_open("en_US", &status);
987        if(status == U_FILE_ACCESS_ERROR) {
988          log_data_err("Is your data around?\n");
989          return;
990        } else if(U_FAILURE(status)) {
991          log_err("Error opening collator\n");
992          return;
993        }
994        ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
995
996        /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
997        /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
998        /*for (sLen = 1000; sLen<1001; sLen++) {*/
999        for (sLen = 500; sLen<501; sLen++) {
1000        /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
1001            strA[0] = baseA;
1002            strB[0] = baseA;
1003            for (i=1; i<=sLen-1; i++) {
1004                strA[i] = ccMix[i % 3];
1005                strB[sLen-i] = ccMix[i % 3];
1006            }
1007            strA[sLen]   = 0;
1008            strB[sLen]   = 0;
1009
1010            ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
1011            doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
1012            ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
1013            doTest(coll, strA, strB, UCOL_EQUAL);
1014        }
1015    }
1016
1017    setTestOption(QUICK_OPTION, myQ);
1018
1019
1020    /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
1021    /*         of the string.  Checks a couple of edge cases.*/
1022
1023    {
1024        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
1025        static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
1026        ucol_setStrength(coll, UCOL_TERTIARY);
1027        doTest(coll, strA, strB, UCOL_EQUAL);
1028    }
1029
1030    /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
1031
1032    {
1033      /* New UCA  3.1.1.
1034       * test below used a code point from Desseret, which sorts differently
1035       * than d800 dc00
1036       */
1037        /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
1038        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
1039        static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
1040        ucol_setStrength(coll, UCOL_TERTIARY);
1041        doTest(coll, strA, strB, UCOL_GREATER);
1042    }
1043
1044    /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
1045
1046    {
1047        static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
1048        static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
1049        char  sortKeyA[50];
1050        char  sortKeyAz[50];
1051        char  sortKeyB[50];
1052        char  sortKeyBz[50];
1053        int   r;
1054
1055        /* there used to be -3 here. Hmmmm.... */
1056        /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
1057        result = ucol_strcoll(coll, strA, 3, strB, 3);
1058        if (result != UCOL_GREATER) {
1059            log_err("ERROR 1 in test 4\n");
1060        }
1061        result = ucol_strcoll(coll, strA, -1, strB, -1);
1062        if (result != UCOL_EQUAL) {
1063            log_err("ERROR 2 in test 4\n");
1064        }
1065
1066        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1067        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1068        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1069        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1070
1071        r = strcmp(sortKeyA, sortKeyAz);
1072        if (r <= 0) {
1073            log_err("Error 3 in test 4\n");
1074        }
1075        r = strcmp(sortKeyA, sortKeyB);
1076        if (r <= 0) {
1077            log_err("Error 4 in test 4\n");
1078        }
1079        r = strcmp(sortKeyAz, sortKeyBz);
1080        if (r != 0) {
1081            log_err("Error 5 in test 4\n");
1082        }
1083
1084        ucol_setStrength(coll, UCOL_IDENTICAL);
1085        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1086        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1087        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1088        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1089
1090        r = strcmp(sortKeyA, sortKeyAz);
1091        if (r <= 0) {
1092            log_err("Error 6 in test 4\n");
1093        }
1094        r = strcmp(sortKeyA, sortKeyB);
1095        if (r <= 0) {
1096            log_err("Error 7 in test 4\n");
1097        }
1098        r = strcmp(sortKeyAz, sortKeyBz);
1099        if (r != 0) {
1100            log_err("Error 8 in test 4\n");
1101        }
1102        ucol_setStrength(coll, UCOL_TERTIARY);
1103    }
1104
1105
1106    /*  Test 5:  Null characters in non-normal source strings.*/
1107
1108    {
1109        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
1110        static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
1111        char  sortKeyA[50];
1112        char  sortKeyAz[50];
1113        char  sortKeyB[50];
1114        char  sortKeyBz[50];
1115        int   r;
1116
1117        result = ucol_strcoll(coll, strA, 6, strB, 6);
1118        if (result != UCOL_GREATER) {
1119            log_err("ERROR 1 in test 5\n");
1120        }
1121        result = ucol_strcoll(coll, strA, -1, strB, -1);
1122        if (result != UCOL_EQUAL) {
1123            log_err("ERROR 2 in test 5\n");
1124        }
1125
1126        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1127        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1128        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1129        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1130
1131        r = strcmp(sortKeyA, sortKeyAz);
1132        if (r <= 0) {
1133            log_err("Error 3 in test 5\n");
1134        }
1135        r = strcmp(sortKeyA, sortKeyB);
1136        if (r <= 0) {
1137            log_err("Error 4 in test 5\n");
1138        }
1139        r = strcmp(sortKeyAz, sortKeyBz);
1140        if (r != 0) {
1141            log_err("Error 5 in test 5\n");
1142        }
1143
1144        ucol_setStrength(coll, UCOL_IDENTICAL);
1145        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1146        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1147        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1148        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1149
1150        r = strcmp(sortKeyA, sortKeyAz);
1151        if (r <= 0) {
1152            log_err("Error 6 in test 5\n");
1153        }
1154        r = strcmp(sortKeyA, sortKeyB);
1155        if (r <= 0) {
1156            log_err("Error 7 in test 5\n");
1157        }
1158        r = strcmp(sortKeyAz, sortKeyBz);
1159        if (r != 0) {
1160            log_err("Error 8 in test 5\n");
1161        }
1162        ucol_setStrength(coll, UCOL_TERTIARY);
1163    }
1164
1165
1166    /*  Test 6:  Null character as base of a non-normal combining sequence.*/
1167
1168    {
1169        static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
1170        static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
1171
1172        result = ucol_strcoll(coll, strA, 5, strB, 5);
1173        if (result != UCOL_LESS) {
1174            log_err("Error 1 in test 6\n");
1175        }
1176        result = ucol_strcoll(coll, strA, -1, strB, -1);
1177        if (result != UCOL_EQUAL) {
1178            log_err("Error 2 in test 6\n");
1179        }
1180    }
1181
1182    ucol_close(coll);
1183}
1184
1185
1186
1187#if 0
1188static void TestGetCaseBit(void) {
1189  static const char *caseBitData[] = {
1190    "a", "A", "ch", "Ch", "CH",
1191      "\\uFF9E", "\\u0009"
1192  };
1193
1194  static const uint8_t results[] = {
1195    UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
1196      UCOL_UPPER_CASE, UCOL_LOWER_CASE
1197  };
1198
1199  uint32_t i, blen = 0;
1200  UChar b[256] = {0};
1201  UErrorCode status = U_ZERO_ERROR;
1202  UCollator *UCA = ucol_open("", &status);
1203  uint8_t res = 0;
1204
1205  for(i = 0; i<UPRV_LENGTHOF(results); i++) {
1206    blen = u_unescape(caseBitData[i], b, 256);
1207    res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
1208    if(results[i] != res) {
1209      log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
1210    }
1211  }
1212}
1213#endif
1214
1215static void TestHangulTailoring(void) {
1216    static const char *koreanData[] = {
1217        "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
1218            "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
1219            "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
1220            "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
1221            "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
1222            "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
1223    };
1224
1225    const char *rules =
1226        "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
1227        "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
1228        "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
1229        "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
1230        "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
1231        "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
1232
1233
1234  UErrorCode status = U_ZERO_ERROR;
1235  UChar rlz[2048] = { 0 };
1236  uint32_t rlen = u_unescape(rules, rlz, 2048);
1237
1238  UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
1239  if(status == U_FILE_ACCESS_ERROR) {
1240    log_data_err("Is your data around?\n");
1241    return;
1242  } else if(U_FAILURE(status)) {
1243    log_err("Error opening collator\n");
1244    return;
1245  }
1246
1247  log_verbose("Using start of korean rules\n");
1248
1249  if(U_SUCCESS(status)) {
1250    genericOrderingTest(coll, koreanData, UPRV_LENGTHOF(koreanData));
1251  } else {
1252    log_err("Unable to open collator with rules %s\n", rules);
1253  }
1254
1255  ucol_close(coll);
1256
1257  log_verbose("Using ko__LOTUS locale\n");
1258  genericLocaleStarter("ko__LOTUS", koreanData, UPRV_LENGTHOF(koreanData));
1259}
1260
1261/*
1262 * The secondary/tertiary compression middle byte
1263 * as used by the current implementation.
1264 * Subject to change as the sort key compression changes.
1265 * See class CollationKeys.
1266 */
1267enum {
1268    SEC_COMMON_MIDDLE = 0x25,  /* range 05..45 */
1269    TER_ONLY_COMMON_MIDDLE = 0x65  /* range 05..C5 */
1270};
1271
1272static void TestCompressOverlap(void) {
1273    UChar       secstr[150];
1274    UChar       tertstr[150];
1275    UErrorCode  status = U_ZERO_ERROR;
1276    UCollator  *coll;
1277    uint8_t     result[500];
1278    uint32_t    resultlen;
1279    int         count = 0;
1280    uint8_t    *tempptr;
1281
1282    coll = ucol_open("", &status);
1283
1284    if (U_FAILURE(status)) {
1285        log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
1286        return;
1287    }
1288    while (count < 149) {
1289        secstr[count] = 0x0020; /* [06, 05, 05] */
1290        tertstr[count] = 0x0020;
1291        count ++;
1292    }
1293
1294    /* top down compression ----------------------------------- */
1295    secstr[count] = 0x0332; /* [, 87, 05] */
1296    tertstr[count] = 0x3000; /* [06, 05, 07] */
1297
1298    /* no compression secstr should have 150 secondary bytes, tertstr should
1299    have 150 tertiary bytes.
1300    with correct compression, secstr should have 6 secondary
1301    bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
1302    resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1303    (void)resultlen;    /* Suppress set but not used warning. */
1304    tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1305    while (*(tempptr + 1) != 1) {
1306        /* the last secondary collation element is not checked since it is not
1307        part of the compression */
1308        if (*tempptr < SEC_COMMON_MIDDLE) {
1309            log_err("Secondary top down compression overlapped\n");
1310        }
1311        tempptr ++;
1312    }
1313
1314    /* tertiary top/bottom/common for en_US is similar to the secondary
1315    top/bottom/common */
1316    resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1317    tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1318    while (*(tempptr + 1) != 0) {
1319        /* the last secondary collation element is not checked since it is not
1320        part of the compression */
1321        if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
1322            log_err("Tertiary top down compression overlapped\n");
1323        }
1324        tempptr ++;
1325    }
1326
1327    /* bottom up compression ------------------------------------- */
1328    secstr[count] = 0;
1329    tertstr[count] = 0;
1330    resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1331    tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1332    while (*(tempptr + 1) != 1) {
1333        /* the last secondary collation element is not checked since it is not
1334        part of the compression */
1335        if (*tempptr > SEC_COMMON_MIDDLE) {
1336            log_err("Secondary bottom up compression overlapped\n");
1337        }
1338        tempptr ++;
1339    }
1340
1341    /* tertiary top/bottom/common for en_US is similar to the secondary
1342    top/bottom/common */
1343    resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1344    tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1345    while (*(tempptr + 1) != 0) {
1346        /* the last secondary collation element is not checked since it is not
1347        part of the compression */
1348        if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
1349            log_err("Tertiary bottom up compression overlapped\n");
1350        }
1351        tempptr ++;
1352    }
1353
1354    ucol_close(coll);
1355}
1356
1357static void TestCyrillicTailoring(void) {
1358  static const char *test[] = {
1359    "\\u0410b",
1360      "\\u0410\\u0306a",
1361      "\\u04d0A"
1362  };
1363
1364    /* Russian overrides contractions, so this test is not valid anymore */
1365    /*genericLocaleStarter("ru", test, 3);*/
1366
1367    // Most of the following are commented out because UCA 8.0
1368    // drops most of the Cyrillic contractions from the default order.
1369    // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
1370
1371    // genericLocaleStarter("root", test, 3);
1372    // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
1373    // genericRulesStarter("&Z < \\u0410", test, 3);
1374    genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
1375    genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
1376    // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
1377    // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
1378}
1379
1380static void TestSuppressContractions(void) {
1381
1382  static const char *testNoCont2[] = {
1383      "\\u0410\\u0302a",
1384      "\\u0410\\u0306b",
1385      "\\u0410c"
1386  };
1387  static const char *testNoCont[] = {
1388      "a\\u0410",
1389      "A\\u0410\\u0306",
1390      "\\uFF21\\u0410\\u0302"
1391  };
1392
1393  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
1394  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
1395}
1396
1397static void TestContraction(void) {
1398    const static char *testrules[] = {
1399        "&A = AB / B",
1400        "&A = A\\u0306/\\u0306",
1401        "&c = ch / h"
1402    };
1403    const static UChar testdata[][2] = {
1404        {0x0041 /* 'A' */, 0x0042 /* 'B' */},
1405        {0x0041 /* 'A' */, 0x0306 /* combining breve */},
1406        {0x0063 /* 'c' */, 0x0068 /* 'h' */}
1407    };
1408    const static UChar testdata2[][2] = {
1409        {0x0063 /* 'c' */, 0x0067 /* 'g' */},
1410        {0x0063 /* 'c' */, 0x0068 /* 'h' */},
1411        {0x0063 /* 'c' */, 0x006C /* 'l' */}
1412    };
1413#if 0
1414    /*
1415     * These pairs of rule strings are not guaranteed to yield the very same mappings.
1416     * In fact, LDML 24 recommends an improved way of creating mappings
1417     * which always yields different mappings for such pairs. See
1418     * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1419     */
1420    const static char *testrules3[] = {
1421        "&z < xyz &xyzw << B",
1422        "&z < xyz &xyz << B / w",
1423        "&z < ch &achm << B",
1424        "&z < ch &a << B / chm",
1425        "&\\ud800\\udc00w << B",
1426        "&\\ud800\\udc00 << B / w",
1427        "&a\\ud800\\udc00m << B",
1428        "&a << B / \\ud800\\udc00m",
1429    };
1430#endif
1431
1432    UErrorCode  status   = U_ZERO_ERROR;
1433    UCollator  *coll;
1434    UChar       rule[256] = {0};
1435    uint32_t    rlen     = 0;
1436    int         i;
1437
1438    for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1439        UCollationElements *iter1;
1440        int j = 0;
1441        log_verbose("Rule %s for testing\n", testrules[i]);
1442        rlen = u_unescape(testrules[i], rule, 32);
1443        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1444        if (U_FAILURE(status)) {
1445            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1446            return;
1447        }
1448        iter1 = ucol_openElements(coll, testdata[i], 2, &status);
1449        if (U_FAILURE(status)) {
1450            log_err("Collation iterator creation failed\n");
1451            return;
1452        }
1453        while (j < 2) {
1454            UCollationElements *iter2 = ucol_openElements(coll,
1455                                                         &(testdata[i][j]),
1456                                                         1, &status);
1457            uint32_t ce;
1458            if (U_FAILURE(status)) {
1459                log_err("Collation iterator creation failed\n");
1460                return;
1461            }
1462            ce = ucol_next(iter2, &status);
1463            while (ce != UCOL_NULLORDER) {
1464                if ((uint32_t)ucol_next(iter1, &status) != ce) {
1465                    log_err("Collation elements in contraction split does not match\n");
1466                    return;
1467                }
1468                ce = ucol_next(iter2, &status);
1469            }
1470            j ++;
1471            ucol_closeElements(iter2);
1472        }
1473        if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
1474            log_err("Collation elements not exhausted\n");
1475            return;
1476        }
1477        ucol_closeElements(iter1);
1478        ucol_close(coll);
1479    }
1480
1481    rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
1482    coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1483    if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
1484        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1485                testdata2[0][0], testdata2[0][1], testdata2[1][0],
1486                testdata2[1][1]);
1487        return;
1488    }
1489    if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
1490        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1491                testdata2[1][0], testdata2[1][1], testdata2[2][0],
1492                testdata2[2][1]);
1493        return;
1494    }
1495    ucol_close(coll);
1496#if 0  /* see above */
1497    for (i = 0; i < UPRV_LENGTHOF(testrules3); i += 2) {
1498        log_verbose("testrules3 i==%d  \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
1499        UCollator          *coll1,
1500                           *coll2;
1501        UCollationElements *iter1,
1502                           *iter2;
1503        UChar               ch = 0x0042 /* 'B' */;
1504        uint32_t            ce;
1505        rlen = u_unescape(testrules3[i], rule, 32);
1506        coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1507        rlen = u_unescape(testrules3[i + 1], rule, 32);
1508        coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1509        if (U_FAILURE(status)) {
1510            log_err("Collator creation failed %s\n", testrules[i]);
1511            return;
1512        }
1513        iter1 = ucol_openElements(coll1, &ch, 1, &status);
1514        iter2 = ucol_openElements(coll2, &ch, 1, &status);
1515        if (U_FAILURE(status)) {
1516            log_err("Collation iterator creation failed\n");
1517            return;
1518        }
1519        ce = ucol_next(iter1, &status);
1520        if (U_FAILURE(status)) {
1521            log_err("Retrieving ces failed\n");
1522            return;
1523        }
1524        while (ce != UCOL_NULLORDER) {
1525            uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
1526            if (ce == ce2) {
1527                log_verbose("CEs match: %08x\n", ce);
1528            } else {
1529                log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
1530                return;
1531            }
1532            ce = ucol_next(iter1, &status);
1533            if (U_FAILURE(status)) {
1534                log_err("Retrieving ces failed\n");
1535                return;
1536            }
1537        }
1538        if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
1539            log_err("CEs not exhausted\n");
1540            return;
1541        }
1542        ucol_closeElements(iter1);
1543        ucol_closeElements(iter2);
1544        ucol_close(coll1);
1545        ucol_close(coll2);
1546    }
1547#endif
1548}
1549
1550static void TestExpansion(void) {
1551    const static char *testrules[] = {
1552#if 0
1553        /*
1554         * This seems to have tested that M was not mapped to an expansion.
1555         * I believe the old builder just did that because it computed the extension CEs
1556         * at the very end, which was a bug.
1557         * Among other problems, it violated the core tailoring principle
1558         * by making an earlier rule depend on a later one.
1559         * And, of course, if M did not get an expansion, then it was primary different from K,
1560         * unlike what the rule &K<<M says.
1561         */
1562        "&J << K / B & K << M",
1563#endif
1564        "&J << K / B << M"
1565    };
1566    const static UChar testdata[][3] = {
1567        {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
1568        {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
1569        {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
1570        {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
1571        {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
1572        {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
1573    };
1574
1575    UErrorCode  status   = U_ZERO_ERROR;
1576    UCollator  *coll;
1577    UChar       rule[256] = {0};
1578    uint32_t    rlen     = 0;
1579    int         i;
1580
1581    for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1582        int j = 0;
1583        log_verbose("Rule %s for testing\n", testrules[i]);
1584        rlen = u_unescape(testrules[i], rule, 32);
1585        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1586        if (U_FAILURE(status)) {
1587            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1588            return;
1589        }
1590
1591        for (j = 0; j < 5; j ++) {
1592            doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
1593        }
1594        ucol_close(coll);
1595    }
1596}
1597
1598#if 0
1599/* this test tests the current limitations of the engine */
1600/* it always fail, so it is disabled by default */
1601static void TestLimitations(void) {
1602  /* recursive expansions */
1603  {
1604    static const char *rule = "&a=b/c&d=c/e";
1605    static const char *tlimit01[] = {"add","b","adf"};
1606    static const char *tlimit02[] = {"aa","b","af"};
1607    log_verbose("recursive expansions\n");
1608    genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1609    genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1610  }
1611  /* contractions spanning expansions */
1612  {
1613    static const char *rule = "&a<<<c/e&g<<<eh";
1614    static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
1615    static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
1616    log_verbose("contractions spanning expansions\n");
1617    genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1618    genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1619  }
1620  /* normalization: nulls in contractions */
1621  {
1622    static const char *rule = "&a<<<\\u0000\\u0302";
1623    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1624    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1625    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1626    static const UColAttributeValue valOn[] = { UCOL_ON };
1627    static const UColAttributeValue valOff[] = { UCOL_OFF };
1628
1629    log_verbose("NULL in contractions\n");
1630    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1631    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1632    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1633    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1634
1635  }
1636  /* normalization: contractions spanning normalization */
1637  {
1638    static const char *rule = "&a<<<\\u0000\\u0302";
1639    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1640    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1641    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1642    static const UColAttributeValue valOn[] = { UCOL_ON };
1643    static const UColAttributeValue valOff[] = { UCOL_OFF };
1644
1645    log_verbose("contractions spanning normalization\n");
1646    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1647    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1648    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1649    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1650
1651  }
1652  /* variable top:  */
1653  {
1654    /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
1655    static const char *rule = "&\\u2010<x<[variable top]=z";
1656    /*static const char *rule3 = "&' '<x<[variable top]=z";*/
1657    static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
1658    static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
1659    static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
1660    static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
1661    static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
1662    static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
1663
1664    log_verbose("variable top\n");
1665    genericRulesStarterWithOptions(rule, tlimit03, UPRV_LENGTHOF(tlimit03), att, valOn, UPRV_LENGTHOF(att));
1666    genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1667    genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1668    genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));
1669    genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));
1670
1671  }
1672  /* case level */
1673  {
1674    static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
1675    static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
1676    static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
1677    static const UColAttribute att[] = { UCOL_CASE_FIRST};
1678    static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
1679    /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
1680    log_verbose("case level\n");
1681    genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1682    genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1683    /*genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));*/
1684    /*genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));*/
1685  }
1686
1687}
1688#endif
1689
1690static void TestBocsuCoverage(void) {
1691  UErrorCode status = U_ZERO_ERROR;
1692  const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
1693  UChar       test[256] = {0};
1694  uint32_t    tlen     = u_unescape(testString, test, 32);
1695  uint8_t key[256]     = {0};
1696  uint32_t klen         = 0;
1697
1698  UCollator *coll = ucol_open("", &status);
1699  if(U_SUCCESS(status)) {
1700  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
1701
1702  klen = ucol_getSortKey(coll, test, tlen, key, 256);
1703  (void)klen;    /* Suppress set but not used warning. */
1704
1705  ucol_close(coll);
1706  } else {
1707    log_data_err("Couldn't open UCA\n");
1708  }
1709}
1710
1711static void TestVariableTopSetting(void) {
1712  UErrorCode status = U_ZERO_ERROR;
1713  uint32_t varTopOriginal = 0, varTop1, varTop2;
1714  UCollator *coll = ucol_open("", &status);
1715  if(U_SUCCESS(status)) {
1716
1717  static const UChar nul = 0;
1718  static const UChar space = 0x20;
1719  static const UChar dot = 0x2e;  /* punctuation */
1720  static const UChar degree = 0xb0;  /* symbol */
1721  static const UChar dollar = 0x24;  /* currency symbol */
1722  static const UChar zero = 0x30;  /* digit */
1723
1724  varTopOriginal = ucol_getVariableTop(coll, &status);
1725  log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
1726  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1727
1728  varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1729  varTop2 = ucol_getVariableTop(coll, &status);
1730  log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
1731  if(U_FAILURE(status) || varTop1 != varTop2 ||
1732      !ucol_equal(coll, &nul, 0, &space, 1) ||
1733      ucol_equal(coll, &nul, 0, &dot, 1) ||
1734      ucol_equal(coll, &nul, 0, &degree, 1) ||
1735      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1736      ucol_equal(coll, &nul, 0, &zero, 1) ||
1737      ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1738    log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
1739  }
1740
1741  varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
1742  varTop2 = ucol_getVariableTop(coll, &status);
1743  log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
1744  if(U_FAILURE(status) || varTop1 != varTop2 ||
1745      !ucol_equal(coll, &nul, 0, &space, 1) ||
1746      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1747      ucol_equal(coll, &nul, 0, &degree, 1) ||
1748      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1749      ucol_equal(coll, &nul, 0, &zero, 1) ||
1750      ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1751    log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
1752  }
1753
1754  varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
1755  varTop2 = ucol_getVariableTop(coll, &status);
1756  log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
1757  if(U_FAILURE(status) || varTop1 != varTop2 ||
1758      !ucol_equal(coll, &nul, 0, &space, 1) ||
1759      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1760      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1761      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1762      ucol_equal(coll, &nul, 0, &zero, 1) ||
1763      ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1764    log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
1765  }
1766
1767  varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
1768  varTop2 = ucol_getVariableTop(coll, &status);
1769  log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
1770  if(U_FAILURE(status) || varTop1 != varTop2 ||
1771      !ucol_equal(coll, &nul, 0, &space, 1) ||
1772      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1773      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1774      !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1775      ucol_equal(coll, &nul, 0, &zero, 1) ||
1776      ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1777    log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
1778  }
1779
1780  log_verbose("Testing setting variable top to contractions\n");
1781  {
1782    UChar first[4] = { 0 };
1783    first[0] = 0x0040;
1784    first[1] = 0x0050;
1785    first[2] = 0x0000;
1786
1787    status = U_ZERO_ERROR;
1788    ucol_setVariableTop(coll, first, -1, &status);
1789
1790    if(U_SUCCESS(status)) {
1791      log_err("Invalid contraction succeded in setting variable top!\n");
1792    }
1793
1794  }
1795
1796  log_verbose("Test restoring variable top\n");
1797
1798  status = U_ZERO_ERROR;
1799  ucol_restoreVariableTop(coll, varTopOriginal, &status);
1800  if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
1801    log_err("Couldn't restore old variable top\n");
1802  }
1803
1804  log_verbose("Testing calling with error set\n");
1805
1806  status = U_INTERNAL_PROGRAM_ERROR;
1807  varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1808  varTop2 = ucol_getVariableTop(coll, &status);
1809  ucol_restoreVariableTop(coll, varTop2, &status);
1810  varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
1811  varTop2 = ucol_getVariableTop(NULL, &status);
1812  ucol_restoreVariableTop(NULL, varTop2, &status);
1813  if(status != U_INTERNAL_PROGRAM_ERROR) {
1814    log_err("Bad reaction to passed error!\n");
1815  }
1816  ucol_close(coll);
1817  } else {
1818    log_data_err("Couldn't open UCA collator\n");
1819  }
1820}
1821
1822static void TestMaxVariable() {
1823  UErrorCode status = U_ZERO_ERROR;
1824  UColReorderCode oldMax, max;
1825  UCollator *coll;
1826
1827  static const UChar nul = 0;
1828  static const UChar space = 0x20;
1829  static const UChar dot = 0x2e;  /* punctuation */
1830  static const UChar degree = 0xb0;  /* symbol */
1831  static const UChar dollar = 0x24;  /* currency symbol */
1832  static const UChar zero = 0x30;  /* digit */
1833
1834  coll = ucol_open("", &status);
1835  if(U_FAILURE(status)) {
1836    log_data_err("Couldn't open root collator\n");
1837    return;
1838  }
1839
1840  oldMax = ucol_getMaxVariable(coll);
1841  log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
1842  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1843
1844  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1845  max = ucol_getMaxVariable(coll);
1846  log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
1847  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
1848      !ucol_equal(coll, &nul, 0, &space, 1) ||
1849      ucol_equal(coll, &nul, 0, &dot, 1) ||
1850      ucol_equal(coll, &nul, 0, &degree, 1) ||
1851      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1852      ucol_equal(coll, &nul, 0, &zero, 1) ||
1853      ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1854    log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
1855  }
1856
1857  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
1858  max = ucol_getMaxVariable(coll);
1859  log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
1860  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
1861      !ucol_equal(coll, &nul, 0, &space, 1) ||
1862      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1863      ucol_equal(coll, &nul, 0, &degree, 1) ||
1864      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1865      ucol_equal(coll, &nul, 0, &zero, 1) ||
1866      ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1867    log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
1868  }
1869
1870  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
1871  max = ucol_getMaxVariable(coll);
1872  log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
1873  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
1874      !ucol_equal(coll, &nul, 0, &space, 1) ||
1875      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1876      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1877      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1878      ucol_equal(coll, &nul, 0, &zero, 1) ||
1879      ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1880    log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
1881  }
1882
1883  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
1884  max = ucol_getMaxVariable(coll);
1885  log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
1886  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
1887      !ucol_equal(coll, &nul, 0, &space, 1) ||
1888      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1889      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1890      !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1891      ucol_equal(coll, &nul, 0, &zero, 1) ||
1892      ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1893    log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
1894  }
1895
1896  log_verbose("Test restoring maxVariable\n");
1897  status = U_ZERO_ERROR;
1898  ucol_setMaxVariable(coll, oldMax, &status);
1899  if(oldMax != ucol_getMaxVariable(coll)) {
1900    log_err("Couldn't restore old maxVariable\n");
1901  }
1902
1903  log_verbose("Testing calling with error set\n");
1904  status = U_INTERNAL_PROGRAM_ERROR;
1905  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1906  max = ucol_getMaxVariable(coll);
1907  if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
1908    log_err("Bad reaction to passed error!\n");
1909  }
1910  ucol_close(coll);
1911}
1912
1913static void TestNonChars(void) {
1914  static const char *test[] = {
1915      "\\u0000",  /* ignorable */
1916      "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
1917      "\\uFDD0", "\\uFDEF",
1918      "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
1919      "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
1920      "\\U0003FFFE", "\\U0003FFFF",
1921      "\\U0004FFFE", "\\U0004FFFF",
1922      "\\U0005FFFE", "\\U0005FFFF",
1923      "\\U0006FFFE", "\\U0006FFFF",
1924      "\\U0007FFFE", "\\U0007FFFF",
1925      "\\U0008FFFE", "\\U0008FFFF",
1926      "\\U0009FFFE", "\\U0009FFFF",
1927      "\\U000AFFFE", "\\U000AFFFF",
1928      "\\U000BFFFE", "\\U000BFFFF",
1929      "\\U000CFFFE", "\\U000CFFFF",
1930      "\\U000DFFFE", "\\U000DFFFF",
1931      "\\U000EFFFE", "\\U000EFFFF",
1932      "\\U000FFFFE", "\\U000FFFFF",
1933      "\\U0010FFFE", "\\U0010FFFF",
1934      "\\uFFFF"  /* special character with maximum primary weight */
1935  };
1936  UErrorCode status = U_ZERO_ERROR;
1937  UCollator *coll = ucol_open("en_US", &status);
1938
1939  log_verbose("Test non characters\n");
1940
1941  if(U_SUCCESS(status)) {
1942    genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
1943  } else {
1944    log_err_status(status, "Unable to open collator\n");
1945  }
1946
1947  ucol_close(coll);
1948}
1949
1950static void TestExtremeCompression(void) {
1951  static char *test[4];
1952  int32_t j = 0, i = 0;
1953
1954  for(i = 0; i<4; i++) {
1955    test[i] = (char *)malloc(2048*sizeof(char));
1956  }
1957
1958  for(j = 20; j < 500; j++) {
1959    for(i = 0; i<4; i++) {
1960      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1961      test[i][j-1] = (char)('a'+i);
1962      test[i][j] = 0;
1963    }
1964    genericLocaleStarter("en_US", (const char **)test, 4);
1965  }
1966
1967
1968  for(i = 0; i<4; i++) {
1969    free(test[i]);
1970  }
1971}
1972
1973#if 0
1974static void TestExtremeCompression(void) {
1975  static char *test[4];
1976  int32_t j = 0, i = 0;
1977  UErrorCode status = U_ZERO_ERROR;
1978  UCollator *coll = ucol_open("en_US", status);
1979  for(i = 0; i<4; i++) {
1980    test[i] = (char *)malloc(2048*sizeof(char));
1981  }
1982  for(j = 10; j < 2048; j++) {
1983    for(i = 0; i<4; i++) {
1984      uprv_memset(test[i], 'a', (j-2)*sizeof(char));
1985      test[i][j-1] = (char)('a'+i);
1986      test[i][j] = 0;
1987    }
1988  }
1989  genericLocaleStarter("en_US", (const char **)test, 4);
1990
1991  for(j = 10; j < 2048; j++) {
1992    for(i = 0; i<1; i++) {
1993      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1994      test[i][j] = 0;
1995    }
1996  }
1997  for(i = 0; i<4; i++) {
1998    free(test[i]);
1999  }
2000}
2001#endif
2002
2003static void TestSurrogates(void) {
2004  static const char *test[] = {
2005    "z","\\ud900\\udc25",  "\\ud805\\udc50",
2006       "\\ud800\\udc00y",  "\\ud800\\udc00r",
2007       "\\ud800\\udc00f",  "\\ud800\\udc00",
2008       "\\ud800\\udc00c", "\\ud800\\udc00b",
2009       "\\ud800\\udc00fa", "\\ud800\\udc00fb",
2010       "\\ud800\\udc00a",
2011       "c", "b"
2012  };
2013
2014  static const char *rule =
2015    "&z < \\ud900\\udc25   < \\ud805\\udc50"
2016       "< \\ud800\\udc00y  < \\ud800\\udc00r"
2017       "< \\ud800\\udc00f  << \\ud800\\udc00"
2018       "< \\ud800\\udc00fa << \\ud800\\udc00fb"
2019       "< \\ud800\\udc00a  < c < b" ;
2020
2021  genericRulesStarter(rule, test, 14);
2022}
2023
2024/* This is a test for prefix implementation, used by JIS X 4061 collation rules */
2025static void TestPrefix(void) {
2026  uint32_t i;
2027
2028  static const struct {
2029    const char *rules;
2030    const char *data[50];
2031    const uint32_t len;
2032  } tests[] = {
2033    { "&z <<< z|a",
2034      {"zz", "za"}, 2 },
2035
2036    { "&z <<< z|   a",
2037      {"zz", "za"}, 2 },
2038    { "[strength I]"
2039      "&a=\\ud900\\udc25"
2040      "&z<<<\\ud900\\udc25|a",
2041      {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
2042  };
2043
2044
2045  for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2046    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2047  }
2048}
2049
2050/* This test uses data suplied by Masashiko Maedera to test the implementation */
2051/* JIS X 4061 collation order implementation                                   */
2052static void TestNewJapanese(void) {
2053
2054  static const char * const test1[] = {
2055      "\\u30b7\\u30e3\\u30fc\\u30ec",
2056      "\\u30b7\\u30e3\\u30a4",
2057      "\\u30b7\\u30e4\\u30a3",
2058      "\\u30b7\\u30e3\\u30ec",
2059      "\\u3061\\u3087\\u3053",
2060      "\\u3061\\u3088\\u3053",
2061      "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
2062      "\\u3066\\u30fc\\u305f",
2063      "\\u30c6\\u30fc\\u30bf",
2064      "\\u30c6\\u30a7\\u30bf",
2065      "\\u3066\\u3048\\u305f",
2066      "\\u3067\\u30fc\\u305f",
2067      "\\u30c7\\u30fc\\u30bf",
2068      "\\u30c7\\u30a7\\u30bf",
2069      "\\u3067\\u3048\\u305f",
2070      "\\u3066\\u30fc\\u305f\\u30fc",
2071      "\\u30c6\\u30fc\\u30bf\\u30a1",
2072      "\\u30c6\\u30a7\\u30bf\\u30fc",
2073      "\\u3066\\u3047\\u305f\\u3041",
2074      "\\u3066\\u3048\\u305f\\u30fc",
2075      "\\u3067\\u30fc\\u305f\\u30fc",
2076      "\\u30c7\\u30fc\\u30bf\\u30a1",
2077      "\\u3067\\u30a7\\u305f\\u30a1",
2078      "\\u30c7\\u3047\\u30bf\\u3041",
2079      "\\u30c7\\u30a8\\u30bf\\u30a2",
2080      "\\u3072\\u3086",
2081      "\\u3073\\u3085\\u3042",
2082      "\\u3074\\u3085\\u3042",
2083      "\\u3073\\u3085\\u3042\\u30fc",
2084      "\\u30d3\\u30e5\\u30a2\\u30fc",
2085      "\\u3074\\u3085\\u3042\\u30fc",
2086      "\\u30d4\\u30e5\\u30a2\\u30fc",
2087      "\\u30d2\\u30e5\\u30a6",
2088      "\\u30d2\\u30e6\\u30a6",
2089      "\\u30d4\\u30e5\\u30a6\\u30a2",
2090      "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
2091      "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
2092      "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
2093      "\\u3072\\u3085\\u3093",
2094      "\\u3074\\u3085\\u3093",
2095      "\\u3075\\u30fc\\u308a",
2096      "\\u30d5\\u30fc\\u30ea",
2097      "\\u3075\\u3045\\u308a",
2098      "\\u3075\\u30a5\\u308a",
2099      "\\u3075\\u30a5\\u30ea",
2100      "\\u30d5\\u30a6\\u30ea",
2101      "\\u3076\\u30fc\\u308a",
2102      "\\u30d6\\u30fc\\u30ea",
2103      "\\u3076\\u3045\\u308a",
2104      "\\u30d6\\u30a5\\u308a",
2105      "\\u3077\\u3046\\u308a",
2106      "\\u30d7\\u30a6\\u30ea",
2107      "\\u3075\\u30fc\\u308a\\u30fc",
2108      "\\u30d5\\u30a5\\u30ea\\u30fc",
2109      "\\u3075\\u30a5\\u308a\\u30a3",
2110      "\\u30d5\\u3045\\u308a\\u3043",
2111      "\\u30d5\\u30a6\\u30ea\\u30fc",
2112      "\\u3075\\u3046\\u308a\\u3043",
2113      "\\u30d6\\u30a6\\u30ea\\u30a4",
2114      "\\u3077\\u30fc\\u308a\\u30fc",
2115      "\\u3077\\u30a5\\u308a\\u30a4",
2116      "\\u3077\\u3046\\u308a\\u30fc",
2117      "\\u30d7\\u30a6\\u30ea\\u30a4",
2118      "\\u30d5\\u30fd",
2119      "\\u3075\\u309e",
2120      "\\u3076\\u309d",
2121      "\\u3076\\u3075",
2122      "\\u3076\\u30d5",
2123      "\\u30d6\\u3075",
2124      "\\u30d6\\u30d5",
2125      "\\u3076\\u309e",
2126      "\\u3076\\u3077",
2127      "\\u30d6\\u3077",
2128      "\\u3077\\u309d",
2129      "\\u30d7\\u30fd",
2130      "\\u3077\\u3075",
2131};
2132
2133  static const char *test2[] = {
2134    "\\u306f\\u309d", /* H\\u309d */
2135    "\\u30cf\\u30fd", /* K\\u30fd */
2136    "\\u306f\\u306f", /* HH */
2137    "\\u306f\\u30cf", /* HK */
2138    "\\u30cf\\u30cf", /* KK */
2139    "\\u306f\\u309e", /* H\\u309e */
2140    "\\u30cf\\u30fe", /* K\\u30fe */
2141    "\\u306f\\u3070", /* HH\\u309b */
2142    "\\u30cf\\u30d0", /* KK\\u309b */
2143    "\\u306f\\u3071", /* HH\\u309c */
2144    "\\u30cf\\u3071", /* KH\\u309c */
2145    "\\u30cf\\u30d1", /* KK\\u309c */
2146    "\\u3070\\u309d", /* H\\u309b\\u309d */
2147    "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
2148    "\\u3070\\u306f", /* H\\u309bH */
2149    "\\u30d0\\u30cf", /* K\\u309bK */
2150    "\\u3070\\u309e", /* H\\u309b\\u309e */
2151    "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
2152    "\\u3070\\u3070", /* H\\u309bH\\u309b */
2153    "\\u30d0\\u3070", /* K\\u309bH\\u309b */
2154    "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
2155    "\\u3070\\u3071", /* H\\u309bH\\u309c */
2156    "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
2157    "\\u3071\\u309d", /* H\\u309c\\u309d */
2158    "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
2159    "\\u3071\\u306f", /* H\\u309cH */
2160    "\\u30d1\\u30cf", /* K\\u309cK */
2161    "\\u3071\\u3070", /* H\\u309cH\\u309b */
2162    "\\u3071\\u30d0", /* H\\u309cK\\u309b */
2163    "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
2164    "\\u3071\\u3071", /* H\\u309cH\\u309c */
2165    "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
2166  };
2167  /*
2168  static const char *test3[] = {
2169    "\\u221er\\u221e",
2170    "\\u221eR#",
2171    "\\u221et\\u221e",
2172    "#r\\u221e",
2173    "#R#",
2174    "#t%",
2175    "#T%",
2176    "8t\\u221e",
2177    "8T\\u221e",
2178    "8t#",
2179    "8T#",
2180    "8t%",
2181    "8T%",
2182    "8t8",
2183    "8T8",
2184    "\\u03c9r\\u221e",
2185    "\\u03a9R%",
2186    "rr\\u221e",
2187    "rR\\u221e",
2188    "Rr\\u221e",
2189    "RR\\u221e",
2190    "RT%",
2191    "rt8",
2192    "tr\\u221e",
2193    "tr8",
2194    "TR8",
2195    "tt8",
2196    "\\u30b7\\u30e3\\u30fc\\u30ec",
2197  };
2198  */
2199  static const UColAttribute att[] = { UCOL_STRENGTH };
2200  static const UColAttributeValue val[] = { UCOL_QUATERNARY };
2201
2202  static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
2203  static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
2204
2205  genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), att, val, 1);
2206  genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), att, val, 1);
2207  /*genericLocaleStarter("ja", test3, UPRV_LENGTHOF(test3));*/
2208  genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), attShifted, valShifted, 2);
2209  genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), attShifted, valShifted, 2);
2210}
2211
2212static void TestStrCollIdenticalPrefix(void) {
2213  const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
2214  const char* test[] = {
2215    "ab\\ud9b0\\udc70",
2216    "ab\\ud9b0\\udc71"
2217  };
2218  genericRulesStarterWithResult(rule, test, UPRV_LENGTHOF(test), UCOL_EQUAL);
2219}
2220/* Contractions should have all their canonically equivalent */
2221/* strings included */
2222static void TestContractionClosure(void) {
2223  static const struct {
2224    const char *rules;
2225    const char *data[10];
2226    const uint32_t len;
2227  } tests[] = {
2228    {   "&b=\\u00e4\\u00e4",
2229      { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
2230    {   "&b=\\u00C5",
2231      { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
2232  };
2233  uint32_t i;
2234
2235
2236  for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2237    genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
2238  }
2239}
2240
2241/* This tests also fails*/
2242static void TestBeforePrefixFailure(void) {
2243  static const struct {
2244    const char *rules;
2245    const char *data[10];
2246    const uint32_t len;
2247  } tests[] = {
2248    { "&g <<< a"
2249      "&[before 3]\\uff41 <<< x",
2250      {"x", "\\uff41"}, 2 },
2251    {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2252        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2253        "&[before 3]\\u30a7<<<\\u30a9",
2254      {"\\u30a9", "\\u30a7"}, 2 },
2255    {   "&[before 3]\\u30a7<<<\\u30a9"
2256        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2257        "&\\u30A8=\\u30A8=\\u3048=\\uff74",
2258      {"\\u30a9", "\\u30a7"}, 2 },
2259  };
2260  uint32_t i;
2261
2262
2263  for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2264    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2265  }
2266
2267#if 0
2268  const char* rule1 =
2269        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2270        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2271        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
2272  const char* rule2 =
2273        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
2274        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2275        "&\\u30A8=\\u30A8=\\u3048=\\uff74";
2276  const char* test[] = {
2277      "\\u30c6\\u30fc\\u30bf",
2278      "\\u30c6\\u30a7\\u30bf",
2279  };
2280  genericRulesStarter(rule1, test, UPRV_LENGTHOF(test));
2281  genericRulesStarter(rule2, test, UPRV_LENGTHOF(test));
2282/* this piece of code should be in some sort of verbose mode     */
2283/* it gets the collation elements for elements and prints them   */
2284/* This is useful when trying to see whether the problem is      */
2285  {
2286    UErrorCode status = U_ZERO_ERROR;
2287    uint32_t i = 0;
2288    UCollationElements *it = NULL;
2289    uint32_t CE;
2290    UChar string[256];
2291    uint32_t uStringLen;
2292    UCollator *coll = NULL;
2293
2294    uStringLen = u_unescape(rule1, string, 256);
2295
2296    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2297
2298    /*coll = ucol_open("ja_JP_JIS", &status);*/
2299    it = ucol_openElements(coll, string, 0, &status);
2300
2301    for(i = 0; i < UPRV_LENGTHOF(test); i++) {
2302      log_verbose("%s\n", test[i]);
2303      uStringLen = u_unescape(test[i], string, 256);
2304      ucol_setText(it, string, uStringLen, &status);
2305
2306      while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
2307        log_verbose("%08X\n", CE);
2308      }
2309      log_verbose("\n");
2310
2311    }
2312
2313    ucol_closeElements(it);
2314    ucol_close(coll);
2315  }
2316#endif
2317}
2318
2319static void TestPrefixCompose(void) {
2320  const char* rule1 =
2321        "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
2322  /*
2323  const char* test[] = {
2324      "\\u30c6\\u30fc\\u30bf",
2325      "\\u30c6\\u30a7\\u30bf",
2326  };
2327  */
2328  {
2329    UErrorCode status = U_ZERO_ERROR;
2330    /*uint32_t i = 0;*/
2331    /*UCollationElements *it = NULL;*/
2332/*    uint32_t CE;*/
2333    UChar string[256];
2334    uint32_t uStringLen;
2335    UCollator *coll = NULL;
2336
2337    uStringLen = u_unescape(rule1, string, 256);
2338
2339    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2340    ucol_close(coll);
2341  }
2342
2343
2344}
2345
2346/*
2347[last variable] last variable value
2348[last primary ignorable] largest CE for primary ignorable
2349[last secondary ignorable] largest CE for secondary ignorable
2350[last tertiary ignorable] largest CE for tertiary ignorable
2351[top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
2352*/
2353
2354static void TestRuleOptions(void) {
2355  /* values here are hardcoded and are correct for the current UCA
2356   * when the UCA changes, one might be forced to change these
2357   * values.
2358   */
2359
2360  /*
2361   * These strings contain the last character before [variable top]
2362   * and the first and second characters (by primary weights) after it.
2363   * See FractionalUCA.txt. For example:
2364      [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
2365      [variable top = 0C FE]
2366      [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
2367     and
2368      00B4; [0D 0C, 05, 05]
2369   *
2370   * Note: Starting with UCA 6.0, the [variable top] collation element
2371   * is not the weight of any character or string,
2372   * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
2373   */
2374#define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
2375#define FIRST_REGULAR_CHAR_STRING "\\u0060"
2376#define SECOND_REGULAR_CHAR_STRING "\\u00B4"
2377
2378  /*
2379   * This string has to match the character that has the [last regular] weight
2380   * which changes with each UCA version.
2381   * See the bottom of FractionalUCA.txt which says something like
2382      [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
2383   *
2384   * Note: Starting with UCA 6.0, the [last regular] collation element
2385   * is not the weight of any character or string,
2386   * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
2387   */
2388#define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2389
2390  static const struct {
2391    const char *rules;
2392    const char *data[10];
2393    const uint32_t len;
2394  } tests[] = {
2395#if 0
2396    /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
2397    /* - all befores here amount to zero */
2398    { "&[before 3][first tertiary ignorable]<<<a",
2399        { "\\u0000", "a"}, 2
2400    }, /* you cannot go before first tertiary ignorable */
2401
2402    { "&[before 3][last tertiary ignorable]<<<a",
2403        { "\\u0000", "a"}, 2
2404    }, /* you cannot go before last tertiary ignorable */
2405#endif
2406    /*
2407     * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
2408     * and it *is* possible to "go before" that.
2409     */
2410    { "&[before 3][first secondary ignorable]<<<a",
2411        { "\\u0000", "a"}, 2
2412    },
2413
2414    { "&[before 3][last secondary ignorable]<<<a",
2415        { "\\u0000", "a"}, 2
2416    },
2417
2418    /* 'normal' befores */
2419
2420    /*
2421     * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2422     * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
2423     * because there is no tailoring space before that boundary.
2424     * Made the tests work by tailoring to a space instead.
2425     */
2426    { "&[before 3][first primary ignorable]<<<c<<<b &' '<a",  /* was &[first primary ignorable]<a */
2427        {  "c", "b", "\\u0332", "a" }, 4
2428    },
2429
2430    /* we don't have a code point that corresponds to
2431     * the last primary ignorable
2432     */
2433    { "&[before 3][last primary ignorable]<<<c<<<b &' '<a",  /* was &[last primary ignorable]<a */
2434        {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
2435    },
2436
2437    { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
2438        {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
2439    },
2440
2441    { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
2442        { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
2443    },
2444
2445    { "&[first regular]<a"
2446      "&[before 1][first regular]<b",
2447      { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
2448    },
2449
2450    { "&[before 1][last regular]<b"
2451      "&[last regular]<a",
2452        { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
2453    },
2454
2455    { "&[before 1][first implicit]<b"
2456      "&[first implicit]<a",
2457        { "b", "\\u4e00", "a", "\\u4e01"}, 4
2458    },
2459#if 0  /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
2460    { "&[before 1][last implicit]<b"
2461      "&[last implicit]<a",
2462        { "b", "\\U0010FFFD", "a" }, 3
2463    },
2464#endif
2465    { "&[last variable]<z"
2466      "&' '<x"  /* was &[last primary ignorable]<x, see above */
2467      "&[last secondary ignorable]<<y"
2468      "&[last tertiary ignorable]<<<w"
2469      "&[top]<u",
2470      {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
2471    }
2472
2473  };
2474  uint32_t i;
2475
2476  for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2477    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2478  }
2479}
2480
2481
2482static void TestOptimize(void) {
2483  /* this is not really a test - just trying out
2484   * whether copying of UCA contents will fail
2485   * Cannot really test, since the functionality
2486   * remains the same.
2487   */
2488  static const struct {
2489    const char *rules;
2490    const char *data[10];
2491    const uint32_t len;
2492  } tests[] = {
2493    /* - all befores here amount to zero */
2494    { "[optimize [\\uAC00-\\uD7FF]]",
2495    { "a", "b"}, 2}
2496  };
2497  uint32_t i;
2498
2499  for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2500    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2501  }
2502}
2503
2504/*
2505cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
2506weiv    ucol_strcollIter?
2507cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
2508weiv    these are the input strings?
2509cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
2510weiv    will check - could be a problem with utf-8 iterator
2511cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
2512weiv    hmmm
2513cycheng@ca.ibm.c... note that we have a standalone high surrogate
2514weiv    that doesn't sound right
2515cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
2516weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
2517cycheng@ca.ibm.c... yes
2518weiv    and then do the comparison
2519cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
2520weiv    utf-16 strings look like a little endian ones in the example you sent me
2521weiv    It could be a bug - let me try to test it out
2522cycheng@ca.ibm.c... ok
2523cycheng@ca.ibm.c... we can wait till the conf. call
2524cycheng@ca.ibm.c... next weke
2525weiv    that would be great
2526weiv    hmmm
2527weiv    I might be wrong
2528weiv    let me play with it some more
2529cycheng@ca.ibm.c... ok
2530cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
2531cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
2532cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
2533weiv    ok
2534cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
2535weiv    thanks
2536cycheng@ca.ibm.c... the 4 strings we sent are just samples
2537*/
2538#if 0
2539static void Alexis(void) {
2540  UErrorCode status = U_ZERO_ERROR;
2541  UCollator *coll = ucol_open("", &status);
2542
2543
2544  const char utf16be[2][4] = {
2545    { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
2546    { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
2547  };
2548
2549  const char utf8[2][4] = {
2550    { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
2551    { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
2552  };
2553
2554  UCharIterator iterU161, iterU162;
2555  UCharIterator iterU81, iterU82;
2556
2557  UCollationResult resU16, resU8;
2558
2559  uiter_setUTF16BE(&iterU161, utf16be[0], 4);
2560  uiter_setUTF16BE(&iterU162, utf16be[1], 4);
2561
2562  uiter_setUTF8(&iterU81, utf8[0], 4);
2563  uiter_setUTF8(&iterU82, utf8[1], 4);
2564
2565  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2566
2567  resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
2568  resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
2569
2570
2571  if(resU16 != resU8) {
2572    log_err("different results\n");
2573  }
2574
2575  ucol_close(coll);
2576}
2577#endif
2578
2579#define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
2580static void Alexis2(void) {
2581  UErrorCode status = U_ZERO_ERROR;
2582  UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2583  char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2584  char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2585  int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
2586
2587  UConverter *conv = NULL;
2588
2589  UCharIterator U16BEItS, U16BEItT;
2590  UCharIterator U8ItS, U8ItT;
2591
2592  UCollationResult resU16, resU16BE, resU8;
2593
2594  static const char* const pairs[][2] = {
2595    { "\\ud800\\u0021", "\\uFFFC\\u0062"},
2596    { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
2597    { "\\u0E40\\u0021", "\\u00A1\\u0021"},
2598    { "\\u0E40\\u0021", "\\uFE57\\u0062"},
2599    { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
2600    { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
2601    { "\\u0020", "\\u0020\\u0000"}
2602/*
26035F20 (my result here)
26045F204E008E3F
26055F20 (your result here)
2606*/
2607  };
2608
2609  int32_t i = 0;
2610
2611  UCollator *coll = ucol_open("", &status);
2612  if(status == U_FILE_ACCESS_ERROR) {
2613    log_data_err("Is your data around?\n");
2614    return;
2615  } else if(U_FAILURE(status)) {
2616    log_err("Error opening collator\n");
2617    return;
2618  }
2619  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2620  conv = ucnv_open("UTF16BE", &status);
2621  for(i = 0; i < UPRV_LENGTHOF(pairs); i++) {
2622    U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2623    U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2624
2625    resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
2626
2627    log_verbose("Result of strcoll is %i\n", resU16);
2628
2629    U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
2630    U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
2631    (void)U16BELenS;    /* Suppress set but not used warnings. */
2632    (void)U16BELenT;
2633
2634    /* use the original sizes, as the result from converter is in bytes */
2635    uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
2636    uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
2637
2638    resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
2639
2640    log_verbose("Result of U16BE is %i\n", resU16BE);
2641
2642    if(resU16 != resU16BE) {
2643      log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
2644    }
2645
2646    u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
2647    u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
2648
2649    uiter_setUTF8(&U8ItS, U8Source, U8LenS);
2650    uiter_setUTF8(&U8ItT, U8Target, U8LenT);
2651
2652    resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
2653
2654    if(resU16 != resU8) {
2655      log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
2656    }
2657
2658  }
2659
2660  ucol_close(coll);
2661  ucnv_close(conv);
2662}
2663
2664static void TestHebrewUCA(void) {
2665  UErrorCode status = U_ZERO_ERROR;
2666  static const char *first[] = {
2667    "d790d6b8d79cd795d6bcd7a9",
2668    "d790d79cd79ed7a7d799d799d7a1",
2669    "d790d6b4d79ed795d6bcd7a9",
2670  };
2671
2672  char utf8String[3][256];
2673  UChar utf16String[3][256];
2674
2675  int32_t i = 0, j = 0;
2676  int32_t sizeUTF8[3];
2677  int32_t sizeUTF16[3];
2678
2679  UCollator *coll = ucol_open("", &status);
2680  if (U_FAILURE(status)) {
2681      log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
2682      return;
2683  }
2684  /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
2685
2686  for(i = 0; i < UPRV_LENGTHOF(first); i++) {
2687    sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
2688    u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
2689    log_verbose("%i: ");
2690    for(j = 0; j < sizeUTF16[i]; j++) {
2691      /*log_verbose("\\u%04X", utf16String[i][j]);*/
2692      log_verbose("%04X", utf16String[i][j]);
2693    }
2694    log_verbose("\n");
2695  }
2696  for(i = 0; i < UPRV_LENGTHOF(first)-1; i++) {
2697    for(j = i + 1; j < UPRV_LENGTHOF(first); j++) {
2698      doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
2699    }
2700  }
2701
2702  ucol_close(coll);
2703
2704}
2705
2706static void TestPartialSortKeyTermination(void) {
2707  static const char* cases[] = {
2708    "\\u1234\\u1234\\udc00",
2709    "\\udc00\\ud800\\ud800"
2710  };
2711
2712  int32_t i;
2713
2714  UErrorCode status = U_ZERO_ERROR;
2715
2716  UCollator *coll = ucol_open("", &status);
2717
2718  UCharIterator iter;
2719
2720  UChar currCase[256];
2721  int32_t length = 0;
2722  int32_t pKeyLen = 0;
2723
2724  uint8_t key[256];
2725
2726  for(i = 0; i < UPRV_LENGTHOF(cases); i++) {
2727    uint32_t state[2] = {0, 0};
2728    length = u_unescape(cases[i], currCase, 256);
2729    uiter_setString(&iter, currCase, length);
2730    pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
2731    (void)pKeyLen;   /* Suppress set but not used warning. */
2732
2733    log_verbose("Done\n");
2734
2735  }
2736  ucol_close(coll);
2737}
2738
2739static void TestSettings(void) {
2740  static const char* cases[] = {
2741    "apple",
2742      "Apple"
2743  };
2744
2745  static const char* locales[] = {
2746    "",
2747      "en"
2748  };
2749
2750  UErrorCode status = U_ZERO_ERROR;
2751
2752  int32_t i = 0, j = 0;
2753
2754  UChar source[256], target[256];
2755  int32_t sLen = 0, tLen = 0;
2756
2757  UCollator *collateObject = NULL;
2758  for(i = 0; i < UPRV_LENGTHOF(locales); i++) {
2759    collateObject = ucol_open(locales[i], &status);
2760    ucol_setStrength(collateObject, UCOL_PRIMARY);
2761    ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
2762    for(j = 1; j < UPRV_LENGTHOF(cases); j++) {
2763      sLen = u_unescape(cases[j-1], source, 256);
2764      source[sLen] = 0;
2765      tLen = u_unescape(cases[j], target, 256);
2766      source[tLen] = 0;
2767      doTest(collateObject, source, target, UCOL_EQUAL);
2768    }
2769    ucol_close(collateObject);
2770  }
2771}
2772
2773static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
2774    UErrorCode status = U_ZERO_ERROR;
2775    int32_t errorNo = 0;
2776    const UChar *sourceRules = NULL;
2777    int32_t sourceRulesLen = 0;
2778    UParseError parseError;
2779    UColAttributeValue french = UCOL_OFF;
2780
2781    if(!ucol_equals(source, target)) {
2782        log_err("Same collators, different address not equal\n");
2783        errorNo++;
2784    }
2785    ucol_close(target);
2786    if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
2787        target = ucol_safeClone(source, NULL, NULL, &status);
2788        if(U_FAILURE(status)) {
2789            log_err("Error creating clone\n");
2790            errorNo++;
2791            return errorNo;
2792        }
2793        if(!ucol_equals(source, target)) {
2794            log_err("Collator different from it's clone\n");
2795            errorNo++;
2796        }
2797        french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
2798        if(french == UCOL_ON) {
2799            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
2800        } else {
2801            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
2802        }
2803        if(U_FAILURE(status)) {
2804            log_err("Error setting attributes\n");
2805            errorNo++;
2806            return errorNo;
2807        }
2808        if(ucol_equals(source, target)) {
2809            log_err("Collators same even when options changed\n");
2810            errorNo++;
2811        }
2812        ucol_close(target);
2813
2814        sourceRules = ucol_getRules(source, &sourceRulesLen);
2815        target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2816        if(U_FAILURE(status)) {
2817            log_err("Error instantiating target from rules - %s\n", u_errorName(status));
2818            errorNo++;
2819            return errorNo;
2820        }
2821        /* Note: The tailoring rule string is an optional data item. */
2822        if(!ucol_equals(source, target) && sourceRulesLen != 0) {
2823            log_err("Collator different from collator that was created from the same rules\n");
2824            errorNo++;
2825        }
2826        ucol_close(target);
2827    }
2828    return errorNo;
2829}
2830
2831
2832static void TestEquals(void) {
2833    /* ucol_equals is not currently a public API. There is a chance that it will become
2834    * something like this.
2835    */
2836    /* test whether the two collators instantiated from the same locale are equal */
2837    UErrorCode status = U_ZERO_ERROR;
2838    UParseError parseError;
2839    int32_t noOfLoc = uloc_countAvailable();
2840    const char *locName = NULL;
2841    UCollator *source = NULL, *target = NULL;
2842    int32_t i = 0;
2843
2844    const char* rules[] = {
2845        "&l < lj <<< Lj <<< LJ",
2846        "&n < nj <<< Nj <<< NJ",
2847        "&ae <<< \\u00e4",
2848        "&AE <<< \\u00c4"
2849    };
2850    /*
2851    const char* badRules[] = {
2852    "&l <<< Lj",
2853    "&n < nj <<< nJ <<< NJ",
2854    "&a <<< \\u00e4",
2855    "&AE <<< \\u00c4 <<< x"
2856    };
2857    */
2858
2859    UChar sourceRules[1024], targetRules[1024];
2860    int32_t sourceRulesSize = 0, targetRulesSize = 0;
2861    int32_t rulesSize = UPRV_LENGTHOF(rules);
2862
2863    for(i = 0; i < rulesSize; i++) {
2864        sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
2865        targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
2866    }
2867
2868    source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2869    if(status == U_FILE_ACCESS_ERROR) {
2870        log_data_err("Is your data around?\n");
2871        return;
2872    } else if(U_FAILURE(status)) {
2873        log_err("Error opening collator\n");
2874        return;
2875    }
2876    target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2877    if(!ucol_equals(source, target)) {
2878        log_err("Equivalent collators not equal!\n");
2879    }
2880    ucol_close(source);
2881    ucol_close(target);
2882
2883    source = ucol_open("root", &status);
2884    target = ucol_open("root", &status);
2885    log_verbose("Testing root\n");
2886    if(!ucol_equals(source, source)) {
2887        log_err("Same collator not equal\n");
2888    }
2889    if(TestEqualsForCollator("root", source, target)) {
2890        log_err("Errors for root\n");
2891    }
2892    ucol_close(source);
2893
2894    for(i = 0; i<noOfLoc; i++) {
2895        status = U_ZERO_ERROR;
2896        locName = uloc_getAvailable(i);
2897        /*if(hasCollationElements(locName)) {*/
2898        log_verbose("Testing equality for locale %s\n", locName);
2899        source = ucol_open(locName, &status);
2900        target = ucol_open(locName, &status);
2901        if (U_FAILURE(status)) {
2902            log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
2903            continue;
2904        }
2905        if(TestEqualsForCollator(locName, source, target)) {
2906            log_err("Errors for locale %s\n", locName);
2907        }
2908        ucol_close(source);
2909        /*}*/
2910    }
2911}
2912
2913static void TestJ2726(void) {
2914    UChar a[2] = { 0x61, 0x00 }; /*"a"*/
2915    UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
2916    UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
2917    UErrorCode status = U_ZERO_ERROR;
2918    UCollator *coll = ucol_open("en", &status);
2919    ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2920    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
2921    doTest(coll, a, aSpace, UCOL_EQUAL);
2922    doTest(coll, aSpace, a, UCOL_EQUAL);
2923    doTest(coll, a, spaceA, UCOL_EQUAL);
2924    doTest(coll, spaceA, a, UCOL_EQUAL);
2925    doTest(coll, spaceA, aSpace, UCOL_EQUAL);
2926    doTest(coll, aSpace, spaceA, UCOL_EQUAL);
2927    ucol_close(coll);
2928}
2929
2930static void NullRule(void) {
2931    UChar r[3] = {0};
2932    UErrorCode status = U_ZERO_ERROR;
2933    UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2934    if(U_SUCCESS(status)) {
2935        log_err("This should have been an error!\n");
2936        ucol_close(coll);
2937    } else {
2938        status = U_ZERO_ERROR;
2939    }
2940    coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2941    if(U_FAILURE(status)) {
2942        log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
2943    } else {
2944        ucol_close(coll);
2945    }
2946}
2947
2948/**
2949 * Test for CollationElementIterator previous and next for the whole set of
2950 * unicode characters with normalization on.
2951 */
2952static void TestNumericCollation(void)
2953{
2954    UErrorCode status = U_ZERO_ERROR;
2955
2956    const static char *basicTestStrings[]={
2957    "hello1",
2958    "hello2",
2959    "hello2002",
2960    "hello2003",
2961    "hello123456",
2962    "hello1234567",
2963    "hello10000000",
2964    "hello100000000",
2965    "hello1000000000",
2966    "hello10000000000",
2967    };
2968
2969    const static char *preZeroTestStrings[]={
2970    "avery10000",
2971    "avery010000",
2972    "avery0010000",
2973    "avery00010000",
2974    "avery000010000",
2975    "avery0000010000",
2976    "avery00000010000",
2977    "avery000000010000",
2978    };
2979
2980    const static char *thirtyTwoBitNumericStrings[]={
2981    "avery42949672960",
2982    "avery42949672961",
2983    "avery42949672962",
2984    "avery429496729610"
2985    };
2986
2987     const static char *longNumericStrings[]={
2988     /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
2989        In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
2990        are treated as multiple collation elements. */
2991    "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
2992    "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
2993    "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
2994    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
2995    "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
2996    "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
2997    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
2998    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
2999    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
3000    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
3001    };
3002
3003    const static char *supplementaryDigits[] = {
3004      "\\uD835\\uDFCE", /* 0 */
3005      "\\uD835\\uDFCF", /* 1 */
3006      "\\uD835\\uDFD0", /* 2 */
3007      "\\uD835\\uDFD1", /* 3 */
3008      "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
3009      "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
3010      "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
3011      "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
3012      "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
3013      "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
3014    };
3015
3016    const static char *foreignDigits[] = {
3017      "\\u0661",
3018        "\\u0662",
3019        "\\u0663",
3020      "\\u0661\\u0660",
3021      "\\u0661\\u0662",
3022      "\\u0661\\u0663",
3023      "\\u0662\\u0660",
3024      "\\u0662\\u0662",
3025      "\\u0662\\u0663",
3026      "\\u0663\\u0660",
3027      "\\u0663\\u0662",
3028      "\\u0663\\u0663"
3029    };
3030
3031    const static char *evenZeroes[] = {
3032      "2000",
3033      "2001",
3034        "2002",
3035        "2003"
3036    };
3037
3038    UColAttribute att = UCOL_NUMERIC_COLLATION;
3039    UColAttributeValue val = UCOL_ON;
3040
3041    /* Open our collator. */
3042    UCollator* coll = ucol_open("root", &status);
3043    if (U_FAILURE(status)){
3044        log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
3045              myErrorName(status));
3046        return;
3047    }
3048    genericLocaleStarterWithOptions("root", basicTestStrings, UPRV_LENGTHOF(basicTestStrings), &att, &val, 1);
3049    genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, UPRV_LENGTHOF(thirtyTwoBitNumericStrings), &att, &val, 1);
3050    genericLocaleStarterWithOptions("root", longNumericStrings, UPRV_LENGTHOF(longNumericStrings), &att, &val, 1);
3051    genericLocaleStarterWithOptions("en_US", foreignDigits, UPRV_LENGTHOF(foreignDigits), &att, &val, 1);
3052    genericLocaleStarterWithOptions("root", supplementaryDigits, UPRV_LENGTHOF(supplementaryDigits), &att, &val, 1);
3053    genericLocaleStarterWithOptions("root", evenZeroes, UPRV_LENGTHOF(evenZeroes), &att, &val, 1);
3054
3055    /* Setting up our collator to do digits. */
3056    ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
3057    if (U_FAILURE(status)){
3058        log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
3059              myErrorName(status));
3060        return;
3061    }
3062
3063    /*
3064       Testing that prepended zeroes still yield the correct collation behavior.
3065       We expect that every element in our strings array will be equal.
3066    */
3067    genericOrderingTestWithResult(coll, preZeroTestStrings, UPRV_LENGTHOF(preZeroTestStrings), UCOL_EQUAL);
3068
3069    ucol_close(coll);
3070}
3071
3072static void TestTibetanConformance(void)
3073{
3074    const char* test[] = {
3075        "\\u0FB2\\u0591\\u0F71\\u0061",
3076        "\\u0FB2\\u0F71\\u0061"
3077    };
3078
3079    UErrorCode status = U_ZERO_ERROR;
3080    UCollator *coll = ucol_open("", &status);
3081    UChar source[100];
3082    UChar target[100];
3083    int result;
3084    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3085    if (U_SUCCESS(status)) {
3086        u_unescape(test[0], source, 100);
3087        u_unescape(test[1], target, 100);
3088        doTest(coll, source, target, UCOL_EQUAL);
3089        result = ucol_strcoll(coll, source, -1,   target, -1);
3090        log_verbose("result %d\n", result);
3091        if (UCOL_EQUAL != result) {
3092            log_err("Tibetan comparison error\n");
3093        }
3094    }
3095    ucol_close(coll);
3096
3097    genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
3098}
3099
3100static void TestPinyinProblem(void) {
3101    static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
3102    genericLocaleStarter("zh__PINYIN", test, UPRV_LENGTHOF(test));
3103}
3104
3105/**
3106 * Iterate through the given iterator, checking to see that all the strings
3107 * in the expected array are present.
3108 * @param expected array of strings we expect to see, or NULL
3109 * @param expectedCount number of elements of expected, or 0
3110 */
3111static int32_t checkUEnumeration(const char* msg,
3112                                 UEnumeration* iter,
3113                                 const char** expected,
3114                                 int32_t expectedCount) {
3115    UErrorCode ec = U_ZERO_ERROR;
3116    int32_t i = 0, n, j, bit;
3117    int32_t seenMask = 0;
3118
3119    U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
3120    n = uenum_count(iter, &ec);
3121    if (!assertSuccess("count", &ec)) return -1;
3122    log_verbose("%s = [", msg);
3123    for (;; ++i) {
3124        const char* s = uenum_next(iter, NULL, &ec);
3125        if (!assertSuccess("snext", &ec) || s == NULL) break;
3126        if (i != 0) log_verbose(",");
3127        log_verbose("%s", s);
3128        /* check expected list */
3129        for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3130            if ((seenMask&bit) == 0 &&
3131                uprv_strcmp(s, expected[j]) == 0) {
3132                seenMask |= bit;
3133                break;
3134            }
3135        }
3136    }
3137    log_verbose("] (%d)\n", i);
3138    assertTrue("count verified", i==n);
3139    /* did we see all expected strings? */
3140    for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3141        if ((seenMask&bit)!=0) {
3142            log_verbose("Ok: \"%s\" seen\n", expected[j]);
3143        } else {
3144            log_err("FAIL: \"%s\" not seen\n", expected[j]);
3145        }
3146    }
3147    return n;
3148}
3149
3150/**
3151 * Test new API added for separate collation tree.
3152 */
3153static void TestSeparateTrees(void) {
3154    UErrorCode ec = U_ZERO_ERROR;
3155    UEnumeration *e = NULL;
3156    int32_t n = -1;
3157    UBool isAvailable;
3158    char loc[256];
3159
3160    static const char* AVAIL[] = { "en", "de" };
3161
3162    static const char* KW[] = { "collation" };
3163
3164    static const char* KWVAL[] = { "phonebook", "stroke" };
3165
3166#if !UCONFIG_NO_SERVICE
3167    e = ucol_openAvailableLocales(&ec);
3168    if (e != NULL) {
3169        assertSuccess("ucol_openAvailableLocales", &ec);
3170        assertTrue("ucol_openAvailableLocales!=0", e!=0);
3171        n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, UPRV_LENGTHOF(AVAIL));
3172        (void)n;    /* Suppress set but not used warnings. */
3173        /* Don't need to check n because we check list */
3174        uenum_close(e);
3175    } else {
3176        log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
3177    }
3178#endif
3179
3180    e = ucol_getKeywords(&ec);
3181    if (e != NULL) {
3182        assertSuccess("ucol_getKeywords", &ec);
3183        assertTrue("ucol_getKeywords!=0", e!=0);
3184        n = checkUEnumeration("ucol_getKeywords", e, KW, UPRV_LENGTHOF(KW));
3185        /* Don't need to check n because we check list */
3186        uenum_close(e);
3187    } else {
3188        log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
3189    }
3190
3191    e = ucol_getKeywordValues(KW[0], &ec);
3192    if (e != NULL) {
3193        assertSuccess("ucol_getKeywordValues", &ec);
3194        assertTrue("ucol_getKeywordValues!=0", e!=0);
3195        n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3196        /* Don't need to check n because we check list */
3197        uenum_close(e);
3198    } else {
3199        log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
3200    }
3201
3202    /* Try setting a warning before calling ucol_getKeywordValues */
3203    ec = U_USING_FALLBACK_WARNING;
3204    e = ucol_getKeywordValues(KW[0], &ec);
3205    if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
3206        assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
3207        n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3208        /* Don't need to check n because we check list */
3209        uenum_close(e);
3210    }
3211
3212    /*
3213U_DRAFT int32_t U_EXPORT2
3214ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
3215                             const char* locale, UBool* isAvailable,
3216                             UErrorCode* status);
3217}
3218*/
3219    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
3220                                     &isAvailable, &ec);
3221    if (assertSuccess("getFunctionalEquivalent", &ec)) {
3222        assertEquals("getFunctionalEquivalent(de)", "root", loc);
3223        assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
3224                   isAvailable == TRUE);
3225    }
3226
3227    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
3228                                     &isAvailable, &ec);
3229    if (assertSuccess("getFunctionalEquivalent", &ec)) {
3230        assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
3231        assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
3232                   isAvailable == FALSE);
3233    }
3234}
3235
3236/* supercedes TestJ784 */
3237static void TestBeforePinyin(void) {
3238    const static char rules[] = {
3239        "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
3240        "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
3241        "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
3242        "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
3243        "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
3244        "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
3245    };
3246
3247    const static char *test[] = {
3248        "l\\u0101",
3249        "la",
3250        "l\\u0101n",
3251        "lan ",
3252        "l\\u0113",
3253        "le",
3254        "l\\u0113n",
3255        "len"
3256    };
3257
3258    const static char *test2[] = {
3259        "x\\u0101",
3260        "x\\u0100",
3261        "X\\u0101",
3262        "X\\u0100",
3263        "x\\u00E1",
3264        "x\\u00C1",
3265        "X\\u00E1",
3266        "X\\u00C1",
3267        "x\\u01CE",
3268        "x\\u01CD",
3269        "X\\u01CE",
3270        "X\\u01CD",
3271        "x\\u00E0",
3272        "x\\u00C0",
3273        "X\\u00E0",
3274        "X\\u00C0",
3275        "xa",
3276        "xA",
3277        "Xa",
3278        "XA",
3279        "x\\u0101x",
3280        "x\\u0100x",
3281        "x\\u00E1x",
3282        "x\\u00C1x",
3283        "x\\u01CEx",
3284        "x\\u01CDx",
3285        "x\\u00E0x",
3286        "x\\u00C0x",
3287        "xax",
3288        "xAx"
3289    };
3290
3291    genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3292    genericLocaleStarter("zh", test, UPRV_LENGTHOF(test));
3293    genericRulesStarter(rules, test2, UPRV_LENGTHOF(test2));
3294    genericLocaleStarter("zh", test2, UPRV_LENGTHOF(test2));
3295}
3296
3297static void TestBeforeTightening(void) {
3298    static const struct {
3299        const char *rules;
3300        UErrorCode expectedStatus;
3301    } tests[] = {
3302        { "&[before 1]a<x", U_ZERO_ERROR },
3303        { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
3304        { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
3305        { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
3306        { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
3307        { "&[before 2]a<<x",U_ZERO_ERROR },
3308        { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
3309        { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
3310        { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
3311        { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
3312        { "&[before 3]a<<<x",U_ZERO_ERROR },
3313        { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
3314        { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
3315    };
3316
3317    int32_t i = 0;
3318
3319    UErrorCode status = U_ZERO_ERROR;
3320    UChar rlz[RULE_BUFFER_LEN] = { 0 };
3321    uint32_t rlen = 0;
3322
3323    UCollator *coll = NULL;
3324
3325
3326    for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3327        rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
3328        coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3329        if(status != tests[i].expectedStatus) {
3330            log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
3331                tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
3332        }
3333        ucol_close(coll);
3334        status = U_ZERO_ERROR;
3335    }
3336
3337}
3338
3339/*
3340&m < a
3341&[before 1] a < x <<< X << q <<< Q < z
3342assert: m <<< M < x <<< X << q <<< Q < z < a < n
3343
3344&m < a
3345&[before 2] a << x <<< X << q <<< Q < z
3346assert: m <<< M < x <<< X << q <<< Q << a < z < n
3347
3348&m < a
3349&[before 3] a <<< x <<< X << q <<< Q < z
3350assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
3351
3352
3353&m << a
3354&[before 1] a < x <<< X << q <<< Q < z
3355assert: x <<< X << q <<< Q < z < m <<< M << a < n
3356
3357&m << a
3358&[before 2] a << x <<< X << q <<< Q < z
3359assert: m <<< M << x <<< X << q <<< Q << a < z < n
3360
3361&m << a
3362&[before 3] a <<< x <<< X << q <<< Q < z
3363assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
3364
3365
3366&m <<< a
3367&[before 1] a < x <<< X << q <<< Q < z
3368assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
3369
3370&m <<< a
3371&[before 2] a << x <<< X << q <<< Q < z
3372assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
3373
3374&m <<< a
3375&[before 3] a <<< x <<< X << q <<< Q < z
3376assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
3377
3378
3379&[before 1] s < x <<< X << q <<< Q < z
3380assert: r <<< R < x <<< X << q <<< Q < z < s < n
3381
3382&[before 2] s << x <<< X << q <<< Q < z
3383assert: r <<< R < x <<< X << q <<< Q << s < z < n
3384
3385&[before 3] s <<< x <<< X << q <<< Q < z
3386assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
3387
3388
3389&[before 1] \u24DC < x <<< X << q <<< Q < z
3390assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
3391
3392&[before 2] \u24DC << x <<< X << q <<< Q < z
3393assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
3394
3395&[before 3] \u24DC <<< x <<< X << q <<< Q < z
3396assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
3397*/
3398
3399
3400#if 0
3401/* requires features not yet supported */
3402static void TestMoreBefore(void) {
3403    static const struct {
3404        const char* rules;
3405        const char* order[16];
3406        int32_t size;
3407    } tests[] = {
3408        { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
3409        { "m","M","x","X","q","Q","z","a","n" }, 9},
3410        { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
3411        { "m","M","x","X","q","Q","a","z","n" }, 9},
3412        { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
3413        { "m","M","x","X","a","q","Q","z","n" }, 9},
3414        { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
3415        { "x","X","q","Q","z","m","M","a","n" }, 9},
3416        { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
3417        { "m","M","x","X","q","Q","a","z","n" }, 9},
3418        { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
3419        { "m","M","x","X","a","q","Q","z","n" }, 9},
3420        { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
3421        { "x","X","q","Q","z","n","m","a","M" }, 9},
3422        { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
3423        { "x","X","q","Q","m","a","M","z","n" }, 9},
3424        { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
3425        { "m","x","X","a","M","q","Q","z","n" }, 9},
3426        { "&[before 1] s < x <<< X << q <<< Q < z",
3427        { "r","R","x","X","q","Q","z","s","n" }, 9},
3428        { "&[before 2] s << x <<< X << q <<< Q < z",
3429        { "r","R","x","X","q","Q","s","z","n" }, 9},
3430        { "&[before 3] s <<< x <<< X << q <<< Q < z",
3431        { "r","R","x","X","s","q","Q","z","n" }, 9},
3432        { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
3433        { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
3434        { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
3435        { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
3436        { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
3437        { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
3438    };
3439
3440    int32_t i = 0;
3441
3442    for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3443        genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
3444    }
3445}
3446#endif
3447
3448static void TestTailorNULL( void ) {
3449    const static char* rule = "&a <<< '\\u0000'";
3450    UErrorCode status = U_ZERO_ERROR;
3451    UChar rlz[RULE_BUFFER_LEN] = { 0 };
3452    uint32_t rlen = 0;
3453    UChar a = 1, null = 0;
3454    UCollationResult res = UCOL_EQUAL;
3455
3456    UCollator *coll = NULL;
3457
3458
3459    rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
3460    coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3461
3462    if(U_FAILURE(status)) {
3463        log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
3464    } else {
3465        res = ucol_strcoll(coll, &a, 1, &null, 1);
3466
3467        if(res != UCOL_LESS) {
3468            log_err("NULL was not tailored properly!\n");
3469        }
3470    }
3471
3472    ucol_close(coll);
3473}
3474
3475static void
3476TestUpperFirstQuaternary(void)
3477{
3478  const char* tests[] = { "B", "b", "Bb", "bB" };
3479  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
3480  UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
3481  genericLocaleStarterWithOptions("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att));
3482}
3483
3484static void
3485TestJ4960(void)
3486{
3487  const char* tests[] = { "\\u00e2T", "aT" };
3488  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
3489  UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
3490  const char* tests2[] = { "a", "A" };
3491  const char* rule = "&[first tertiary ignorable]=A=a";
3492  UColAttribute att2[] = { UCOL_CASE_LEVEL };
3493  UColAttributeValue attVals2[] = { UCOL_ON };
3494  /* Test whether we correctly ignore primary ignorables on case level when */
3495  /* we have only primary & case level */
3496  genericLocaleStarterWithOptionsAndResult("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att), UCOL_EQUAL);
3497  /* Test whether ICU4J will make case level for sortkeys that have primary strength */
3498  /* and case level */
3499  genericLocaleStarterWithOptions("root", tests2, UPRV_LENGTHOF(tests2), att, attVals, UPRV_LENGTHOF(att));
3500  /* Test whether completely ignorable letters have case level info (they shouldn't) */
3501  genericRulesStarterWithOptionsAndResult(rule, tests2, UPRV_LENGTHOF(tests2), att2, attVals2, UPRV_LENGTHOF(att2), UCOL_EQUAL);
3502}
3503
3504static void
3505TestJ5223(void)
3506{
3507  static const char *test = "this is a test string";
3508  UChar ustr[256];
3509  int32_t ustr_length = u_unescape(test, ustr, 256);
3510  unsigned char sortkey[256];
3511  int32_t sortkey_length;
3512  UErrorCode status = U_ZERO_ERROR;
3513  static UCollator *coll = NULL;
3514  coll = ucol_open("root", &status);
3515  if(U_FAILURE(status)) {
3516    log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
3517    return;
3518  }
3519  ucol_setStrength(coll, UCOL_PRIMARY);
3520  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
3521  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3522  if (U_FAILURE(status)) {
3523    log_err("Failed setting atributes\n");
3524    return;
3525  }
3526  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
3527  if (sortkey_length > 256) return;
3528
3529  /* we mark the position where the null byte should be written in advance */
3530  sortkey[sortkey_length-1] = 0xAA;
3531
3532  /* we set the buffer size one byte higher than needed */
3533  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3534    sortkey_length+1);
3535
3536  /* no error occurs (for me) */
3537  if (sortkey[sortkey_length-1] == 0xAA) {
3538    log_err("Hit bug at first try\n");
3539  }
3540
3541  /* we mark the position where the null byte should be written again */
3542  sortkey[sortkey_length-1] = 0xAA;
3543
3544  /* this time we set the buffer size to the exact amount needed */
3545  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3546    sortkey_length);
3547
3548  /* now the trailing null byte is not written */
3549  if (sortkey[sortkey_length-1] == 0xAA) {
3550    log_err("Hit bug at second try\n");
3551  }
3552
3553  ucol_close(coll);
3554}
3555
3556/* Regression test for Thai partial sort key problem */
3557static void
3558TestJ5232(void)
3559{
3560    const static char *test[] = {
3561        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
3562        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
3563    };
3564
3565    genericLocaleStarter("th", test, UPRV_LENGTHOF(test));
3566}
3567
3568static void
3569TestJ5367(void)
3570{
3571    const static char *test[] = { "a", "y" };
3572    const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
3573    genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3574}
3575
3576static void
3577TestVI5913(void)
3578{
3579    UErrorCode status = U_ZERO_ERROR;
3580    int32_t i, j;
3581    UCollator *coll =NULL;
3582    uint8_t  resColl[100], expColl[100];
3583    int32_t  rLen, tLen, ruleLen, sLen, kLen;
3584    UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &b<0x1FF3-omega with Ypogegrammeni*/
3585    UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
3586    /*
3587     * Note: Just tailoring &z<ae^ does not work as expected:
3588     * The UCA spec requires for discontiguous contractions that they
3589     * extend an *existing match* by one combining mark at a time.
3590     * Therefore, ae must be a contraction so that the builder finds
3591     * discontiguous contractions for ae^, for example with an intervening underdot.
3592     * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
3593     */
3594    UChar rule3[256]={
3595        0x26, 0x78, 0x3c, 0x61, 0x65,      /* &x<ae */
3596        0x26, 0x7a, 0x3c, 0x0061, 0x00ea,  /* &z<a+e with circumflex.*/
3597        0};
3598    static const UChar tData[][20]={
3599        {0x1EAC, 0},
3600        {0x0041, 0x0323, 0x0302, 0},
3601        {0x1EA0, 0x0302, 0},
3602        {0x00C2, 0x0323, 0},
3603        {0x1ED8, 0},  /* O with dot and circumflex */
3604        {0x1ECC, 0x0302, 0},
3605        {0x1EB7, 0},
3606        {0x1EA1, 0x0306, 0},
3607    };
3608    static const UChar tailorData[][20]={
3609        {0x1FA2, 0},  /* Omega with 3 combining marks */
3610        {0x03C9, 0x0313, 0x0300, 0x0345, 0},
3611        {0x1FF3, 0x0313, 0x0300, 0},
3612        {0x1F60, 0x0300, 0x0345, 0},
3613        {0x1F62, 0x0345, 0},
3614        {0x1FA0, 0x0300, 0},
3615    };
3616    static const UChar tailorData2[][20]={
3617        {0x1E63, 0x030C, 0},  /* s with dot below + caron */
3618        {0x0073, 0x0323, 0x030C, 0},
3619        {0x0073, 0x030C, 0x0323, 0},
3620    };
3621    static const UChar tailorData3[][20]={
3622        {0x007a, 0},  /*  z */
3623        {0x0061, 0x0065, 0},  /*  a + e */
3624        {0x0061, 0x00ea, 0}, /* a + e with circumflex */
3625        {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
3626        {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
3627        {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
3628        {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
3629        {0x00EA, 0},  /* e with circumflex  */
3630    };
3631
3632    /* Test Vietnamese sort. */
3633    coll = ucol_open("vi", &status);
3634    if(U_FAILURE(status)) {
3635        log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
3636        return;
3637    }
3638    log_verbose("\n\nVI collation:");
3639    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
3640        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3641    }
3642    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
3643        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3644    }
3645    if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
3646        log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
3647    }
3648    if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
3649        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3650    }
3651
3652    for (j=0; j<8; j++) {
3653        tLen = u_strlen(tData[j]);
3654        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3655        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3656        for(i = 0; i<rLen; i++) {
3657            log_verbose(" %02X", resColl[i]);
3658        }
3659    }
3660
3661    ucol_close(coll);
3662
3663    /* Test Romanian sort. */
3664    coll = ucol_open("ro", &status);
3665    log_verbose("\n\nRO collation:");
3666    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
3667        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3668    }
3669    if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
3670        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3671    }
3672    if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
3673        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3674    }
3675
3676    for (j=4; j<8; j++) {
3677        tLen = u_strlen(tData[j]);
3678        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3679        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3680        for(i = 0; i<rLen; i++) {
3681            log_verbose(" %02X", resColl[i]);
3682        }
3683    }
3684    ucol_close(coll);
3685
3686    /* Test the precomposed Greek character with 3 combining marks. */
3687    log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
3688    ruleLen = u_strlen(rule);
3689    coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3690    if (U_FAILURE(status)) {
3691        log_err("ucol_openRules failed with %s\n", u_errorName(status));
3692        return;
3693    }
3694    sLen = u_strlen(tailorData[0]);
3695    for (j=1; j<6; j++) {
3696        tLen = u_strlen(tailorData[j]);
3697        if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
3698            log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
3699        }
3700    }
3701    /* Test getSortKey. */
3702    tLen = u_strlen(tailorData[0]);
3703    kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
3704    for (j=0; j<6; j++) {
3705        tLen = u_strlen(tailorData[j]);
3706        rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
3707        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3708            log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3709            for(i = 0; i<rLen; i++) {
3710                log_err(" %02X", resColl[i]);
3711            }
3712        }
3713    }
3714    ucol_close(coll);
3715
3716    log_verbose("\n\nTailoring test for s with caron:");
3717    ruleLen = u_strlen(rule2);
3718    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3719    tLen = u_strlen(tailorData2[0]);
3720    kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
3721    for (j=1; j<3; j++) {
3722        tLen = u_strlen(tailorData2[j]);
3723        rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
3724        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3725            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3726            for(i = 0; i<rLen; i++) {
3727                log_err(" %02X", resColl[i]);
3728            }
3729        }
3730    }
3731    ucol_close(coll);
3732
3733    log_verbose("\n\nTailoring test for &z< ae with circumflex:");
3734    ruleLen = u_strlen(rule3);
3735    coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3736    tLen = u_strlen(tailorData3[3]);
3737    kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
3738    log_verbose("\n Test Data[3] :%s  \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
3739    for(i = 0; i<kLen; i++) {
3740        log_verbose(" %02X", expColl[i]);
3741    }
3742    for (j=4; j<6; j++) {
3743        tLen = u_strlen(tailorData3[j]);
3744        rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
3745
3746        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3747            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3748            for(i = 0; i<rLen; i++) {
3749                log_err(" %02X", resColl[i]);
3750            }
3751        }
3752
3753        log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3754         for(i = 0; i<rLen; i++) {
3755             log_verbose(" %02X", resColl[i]);
3756         }
3757    }
3758    ucol_close(coll);
3759}
3760
3761static void
3762TestTailor6179(void)
3763{
3764    UErrorCode status = U_ZERO_ERROR;
3765    int32_t i;
3766    UCollator *coll =NULL;
3767    uint8_t  resColl[100];
3768    int32_t  rLen, tLen, ruleLen;
3769    /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
3770    static const UChar rule1[]={
3771            0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
3772            0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
3773            0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
3774            0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
3775    /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
3776    static const UChar rule2[]={
3777            0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
3778            0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
3779            0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
3780            0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
3781            0x3C,0x3C,0x20,0x62,0};
3782
3783    static const UChar tData1[][4]={
3784        {0x61, 0},
3785        {0x62, 0},
3786        { 0xFDD0,0x009E, 0}
3787    };
3788    static const UChar tData2[][4]={
3789        {0x61, 0},
3790        {0x62, 0},
3791        { 0xFDD0,0x009E, 0}
3792     };
3793
3794    /*
3795     * These values from FractionalUCA.txt will change,
3796     * and need to be updated here.
3797     * TODO: Make this not check for particular sort keys.
3798     * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
3799     */
3800    static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
3801    static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
3802    static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
3803    static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
3804
3805    UParseError parseError;
3806
3807    /* Test [Last Primary ignorable] */
3808
3809    log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
3810    ruleLen = u_strlen(rule1);
3811    coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3812    if (U_FAILURE(status)) {
3813        log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
3814        return;
3815    }
3816    tLen = u_strlen(tData1[0]);
3817    rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
3818    if (rLen != UPRV_LENGTHOF(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
3819        log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
3820        for(i = 0; i<rLen; i++) {
3821            log_err(" %02X", resColl[i]);
3822        }
3823        log_err("\n");
3824    }
3825    tLen = u_strlen(tData1[1]);
3826    rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
3827    if (rLen != UPRV_LENGTHOF(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
3828        log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
3829        for(i = 0; i<rLen; i++) {
3830            log_err(" %02X", resColl[i]);
3831        }
3832        log_err("\n");
3833    }
3834    ucol_close(coll);
3835
3836
3837    /* Test [Last Secondary ignorable] */
3838    log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
3839    ruleLen = u_strlen(rule2);
3840    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
3841    if (U_FAILURE(status)) {
3842        log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
3843        log_info("  offset=%d  \"%s\" | \"%s\"\n",
3844                 parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
3845        return;
3846    }
3847    tLen = u_strlen(tData2[0]);
3848    rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
3849    if (rLen != UPRV_LENGTHOF(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
3850        log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
3851        for(i = 0; i<rLen; i++) {
3852            log_err(" %02X", resColl[i]);
3853        }
3854        log_err("\n");
3855    }
3856    tLen = u_strlen(tData2[1]);
3857    rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
3858    if (rLen != UPRV_LENGTHOF(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
3859      log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
3860      for(i = 0; i<rLen; i++) {
3861        log_err(" %02X", resColl[i]);
3862      }
3863      log_err("\n");
3864    }
3865    ucol_close(coll);
3866}
3867
3868static void
3869TestUCAPrecontext(void)
3870{
3871    UErrorCode status = U_ZERO_ERROR;
3872    int32_t i, j;
3873    UCollator *coll =NULL;
3874    uint8_t  resColl[100], prevColl[100];
3875    int32_t  rLen, tLen, ruleLen;
3876    UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
3877    UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
3878    /* & l middle-dot << a  a is an expansion. */
3879
3880    UChar tData1[][20]={
3881            { 0xb7, 0},  /* standalone middle dot(0xb7) */
3882            { 0x387, 0}, /* standalone middle dot(0x387) */
3883            { 0x61, 0},  /* a */
3884            { 0x6C, 0},  /* l */
3885            { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
3886            { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
3887            { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
3888            { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
3889            { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
3890            { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
3891            { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
3892     };
3893
3894    log_verbose("\n\nEN collation:");
3895    coll = ucol_open("en", &status);
3896    if (U_FAILURE(status)) {
3897        log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
3898        return;
3899    }
3900    for (j=0; j<11; j++) {
3901        tLen = u_strlen(tData1[j]);
3902        rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3903        if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3904            log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3905                    j, tData1[j]);
3906        }
3907        log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3908        for(i = 0; i<rLen; i++) {
3909            log_verbose(" %02X", resColl[i]);
3910        }
3911        uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3912     }
3913     ucol_close(coll);
3914
3915
3916     log_verbose("\n\nJA collation:");
3917     coll = ucol_open("ja", &status);
3918     if (U_FAILURE(status)) {
3919         log_err("Tailoring test: &z <<a|- failed!");
3920         return;
3921     }
3922     for (j=0; j<11; j++) {
3923         tLen = u_strlen(tData1[j]);
3924         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3925         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3926             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3927                     j, tData1[j]);
3928         }
3929         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3930         for(i = 0; i<rLen; i++) {
3931             log_verbose(" %02X", resColl[i]);
3932         }
3933         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3934      }
3935      ucol_close(coll);
3936
3937
3938      log_verbose("\n\nTailoring test: & middle dot < a ");
3939      ruleLen = u_strlen(rule1);
3940      coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3941      if (U_FAILURE(status)) {
3942          log_err("Tailoring test: & middle dot < a failed!");
3943          return;
3944      }
3945      for (j=0; j<11; j++) {
3946          tLen = u_strlen(tData1[j]);
3947          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3948          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3949              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3950                      j, tData1[j]);
3951          }
3952          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3953          for(i = 0; i<rLen; i++) {
3954              log_verbose(" %02X", resColl[i]);
3955          }
3956          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3957       }
3958       ucol_close(coll);
3959
3960
3961       log_verbose("\n\nTailoring test: & l middle-dot << a ");
3962       ruleLen = u_strlen(rule2);
3963       coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3964       if (U_FAILURE(status)) {
3965           log_err("Tailoring test: & l middle-dot << a failed!");
3966           return;
3967       }
3968       for (j=0; j<11; j++) {
3969           tLen = u_strlen(tData1[j]);
3970           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3971           if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3972               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3973                       j, tData1[j]);
3974           }
3975           if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
3976               log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
3977                       j, tData1[j]);
3978           }
3979           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3980           for(i = 0; i<rLen; i++) {
3981               log_verbose(" %02X", resColl[i]);
3982           }
3983           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3984        }
3985        ucol_close(coll);
3986}
3987
3988static void
3989TestOutOfBuffer5468(void)
3990{
3991    static const char *test = "\\u4e00";
3992    UChar ustr[256];
3993    int32_t ustr_length = u_unescape(test, ustr, 256);
3994    unsigned char shortKeyBuf[1];
3995    int32_t sortkey_length;
3996    UErrorCode status = U_ZERO_ERROR;
3997    static UCollator *coll = NULL;
3998
3999    coll = ucol_open("root", &status);
4000    if(U_FAILURE(status)) {
4001      log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4002      return;
4003    }
4004    ucol_setStrength(coll, UCOL_PRIMARY);
4005    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4006    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4007    if (U_FAILURE(status)) {
4008      log_err("Failed setting atributes\n");
4009      return;
4010    }
4011
4012    sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
4013    if (sortkey_length != 4) {
4014        log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
4015    }
4016    log_verbose("length of sortKey is %d", sortkey_length);
4017    ucol_close(coll);
4018}
4019
4020#define TSKC_DATA_SIZE 5
4021#define TSKC_BUF_SIZE  50
4022static void
4023TestSortKeyConsistency(void)
4024{
4025    UErrorCode icuRC = U_ZERO_ERROR;
4026    UCollator* ucol;
4027    UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4028
4029    uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4030    uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4031    int32_t i, j, i2;
4032
4033    ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
4034    if (U_FAILURE(icuRC))
4035    {
4036        log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
4037        return;
4038    }
4039
4040    for (i = 0; i < TSKC_DATA_SIZE; i++)
4041    {
4042        UCharIterator uiter;
4043        uint32_t state[2] = { 0, 0 };
4044        int32_t dataLen = i+1;
4045        for (j=0; j<TSKC_BUF_SIZE; j++)
4046            bufFull[i][j] = bufPart[i][j] = 0;
4047
4048        /* Full sort key */
4049        ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
4050
4051        /* Partial sort key */
4052        uiter_setString(&uiter, data, dataLen);
4053        ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
4054        if (U_FAILURE(icuRC))
4055        {
4056            log_err("ucol_nextSortKeyPart failed\n");
4057            ucol_close(ucol);
4058            return;
4059        }
4060
4061        for (i2=0; i2<i; i2++)
4062        {
4063            UBool fullMatch = TRUE;
4064            UBool partMatch = TRUE;
4065            for (j=0; j<TSKC_BUF_SIZE; j++)
4066            {
4067                fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
4068                partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
4069            }
4070            if (fullMatch != partMatch) {
4071                log_err(fullMatch ? "full key was consistent, but partial key changed\n"
4072                                  : "partial key was consistent, but full key changed\n");
4073                ucol_close(ucol);
4074                return;
4075            }
4076        }
4077    }
4078
4079    /*=============================================*/
4080   ucol_close(ucol);
4081}
4082
4083/* ticket: 6101 */
4084static void TestCroatianSortKey(void) {
4085    const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
4086    UErrorCode status = U_ZERO_ERROR;
4087    UCollator *ucol;
4088    UCharIterator iter;
4089
4090    static const UChar text[] = { 0x0044, 0xD81A };
4091
4092    size_t length = UPRV_LENGTHOF(text);
4093
4094    uint8_t textSortKey[32];
4095    size_t lenSortKey = 32;
4096    size_t actualSortKeyLen;
4097    uint32_t uStateInfo[2] = { 0, 0 };
4098
4099    ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
4100    if (U_FAILURE(status)) {
4101        log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
4102        return;
4103    }
4104
4105    uiter_setString(&iter, text, length);
4106
4107    actualSortKeyLen = ucol_nextSortKeyPart(
4108        ucol, &iter, (uint32_t*)uStateInfo,
4109        textSortKey, lenSortKey, &status
4110        );
4111
4112    if (actualSortKeyLen == lenSortKey) {
4113        log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
4114    }
4115
4116    ucol_close(ucol);
4117}
4118
4119/* ticket: 6140 */
4120/* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
4121 * they are both Hiragana and Katakana
4122 */
4123#define SORTKEYLEN 50
4124static void TestHiragana(void) {
4125    UErrorCode status = U_ZERO_ERROR;
4126    UCollator* ucol;
4127    UCollationResult strcollresult;
4128    UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
4129    UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
4130    int32_t data1Len = UPRV_LENGTHOF(data1);
4131    int32_t data2Len = UPRV_LENGTHOF(data2);
4132    int32_t i, j;
4133    uint8_t sortKey1[SORTKEYLEN];
4134    uint8_t sortKey2[SORTKEYLEN];
4135
4136    UCharIterator uiter1;
4137    UCharIterator uiter2;
4138    uint32_t state1[2] = { 0, 0 };
4139    uint32_t state2[2] = { 0, 0 };
4140    int32_t keySize1;
4141    int32_t keySize2;
4142
4143    ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
4144            &status);
4145    if (U_FAILURE(status)) {
4146        log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
4147        return;
4148    }
4149
4150    /* Start of full sort keys */
4151    /* Full sort key1 */
4152    keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
4153    /* Full sort key2 */
4154    keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
4155    if (keySize1 == keySize2) {
4156        for (i = 0; i < keySize1; i++) {
4157            if (sortKey1[i] != sortKey2[i]) {
4158                log_err("Full sort keys are different. Should be equal.");
4159            }
4160        }
4161    } else {
4162        log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
4163    }
4164    /* End of full sort keys */
4165
4166    /* Start of partial sort keys */
4167    /* Partial sort key1 */
4168    uiter_setString(&uiter1, data1, data1Len);
4169    keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
4170    /* Partial sort key2 */
4171    uiter_setString(&uiter2, data2, data2Len);
4172    keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
4173    if (U_SUCCESS(status) && keySize1 == keySize2) {
4174        for (j = 0; j < keySize1; j++) {
4175            if (sortKey1[j] != sortKey2[j]) {
4176                log_err("Partial sort keys are different. Should be equal");
4177            }
4178        }
4179    } else {
4180        log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
4181    }
4182    /* End of partial sort keys */
4183
4184    /* Start of strcoll */
4185    /* Use ucol_strcoll() to determine ordering */
4186    strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
4187    if (strcollresult != UCOL_EQUAL) {
4188        log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
4189    }
4190
4191    ucol_close(ucol);
4192}
4193
4194/* Convenient struct for running collation tests */
4195typedef struct {
4196  const UChar source[MAX_TOKEN_LEN];  /* String on left */
4197  const UChar target[MAX_TOKEN_LEN];  /* String on right */
4198  UCollationResult result;            /* -1, 0 or +1, depending on collation */
4199} OneTestCase;
4200
4201/*
4202 * Utility function to test one collation test case.
4203 * @param testcases Array of test cases.
4204 * @param n_testcases Size of the array testcases.
4205 * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
4206 * @param n_rules Size of the array str_rules.
4207 */
4208static void doTestOneTestCase(const OneTestCase testcases[],
4209                              int n_testcases,
4210                              const char* str_rules[],
4211                              int n_rules)
4212{
4213  int rule_no, testcase_no;
4214  UChar rule[500];
4215  int32_t length = 0;
4216  UErrorCode status = U_ZERO_ERROR;
4217  UParseError parse_error;
4218  UCollator  *myCollation;
4219
4220  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4221
4222    length = u_unescape(str_rules[rule_no], rule, 500);
4223    if (length == 0) {
4224        log_err("ERROR: The rule cannot be unescaped: %s\n");
4225        return;
4226    }
4227    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4228    if(U_FAILURE(status)){
4229        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4230        log_info("  offset=%d  \"%s\" | \"%s\"\n",
4231                 parse_error.offset,
4232                 aescstrdup(parse_error.preContext, -1),
4233                 aescstrdup(parse_error.postContext, -1));
4234        return;
4235    }
4236    log_verbose("Testing the <<* syntax\n");
4237    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4238    ucol_setStrength(myCollation, UCOL_TERTIARY);
4239    for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
4240      doTest(myCollation,
4241             testcases[testcase_no].source,
4242             testcases[testcase_no].target,
4243             testcases[testcase_no].result
4244             );
4245    }
4246    ucol_close(myCollation);
4247  }
4248}
4249
4250const static OneTestCase rangeTestcases[] = {
4251  { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
4252  { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
4253  { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
4254
4255  { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
4256  { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
4257  { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
4258  { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
4259  { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
4260
4261  { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
4262  { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
4263  { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
4264  { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
4265
4266  { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
4267  { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
4268  { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
4269  { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
4270  { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
4271  { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
4272  { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
4273  { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
4274};
4275
4276static int nRangeTestcases = UPRV_LENGTHOF(rangeTestcases);
4277
4278const static OneTestCase rangeTestcasesSupplemental[] = {
4279  { {0x4e00},                            {0xfffb},                          UCOL_LESS }, /* U+4E00 < U+FFFB */
4280  { {0xfffb},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFB < U+10000 */
4281  { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
4282  { {0x4e00},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4283  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4284  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4285  { {0x4e00},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4286};
4287
4288static int nRangeTestcasesSupplemental = UPRV_LENGTHOF(rangeTestcasesSupplemental);
4289
4290const static OneTestCase rangeTestcasesQwerty[] = {
4291  { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
4292  { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
4293
4294  { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
4295  { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
4296
4297  { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
4298  { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
4299
4300  { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
4301  { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
4302
4303  { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
4304    {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
4305  { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
4306    {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
4307};
4308
4309static int nRangeTestcasesQwerty = UPRV_LENGTHOF(rangeTestcasesQwerty);
4310
4311static void TestSameStrengthList(void)
4312{
4313  const char* strRules[] = {
4314    /* Normal */
4315    "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
4316
4317    /* Lists */
4318    "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
4319  };
4320  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4321}
4322
4323static void TestSameStrengthListQuoted(void)
4324{
4325  const char* strRules[] = {
4326    /* Lists with quoted characters */
4327    "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
4328    "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
4329
4330    "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
4331    "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
4332
4333    "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
4334    "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
4335  };
4336  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4337}
4338
4339static void TestSameStrengthListSupplemental(void)
4340{
4341  const char* strRules[] = {
4342    "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
4343    "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
4344    "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
4345    "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
4346  };
4347  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4348}
4349
4350static void TestSameStrengthListQwerty(void)
4351{
4352  const char* strRules[] = {
4353    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4354    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4355    "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
4356    "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
4357    "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
4358
4359    /* Quoted characters also will work if two quoted characters are not consecutive.  */
4360    "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
4361
4362    /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
4363    /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
4364
4365 };
4366  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4367}
4368
4369static void TestSameStrengthListQuotedQwerty(void)
4370{
4371  const char* strRules[] = {
4372    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4373    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4374    "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
4375
4376    /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
4377    /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
4378   };
4379  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4380}
4381
4382static void TestSameStrengthListRanges(void)
4383{
4384  const char* strRules[] = {
4385    "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
4386  };
4387  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4388}
4389
4390static void TestSameStrengthListSupplementalRanges(void)
4391{
4392  const char* strRules[] = {
4393    /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4394    "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
4395  };
4396  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4397}
4398
4399static void TestSpecialCharacters(void)
4400{
4401  const char* strRules[] = {
4402    /* Normal */
4403    "&';'<'+'<','<'-'<'&'<'*'",
4404
4405    /* List */
4406    "&';'<*'+,-&*'",
4407
4408    /* Range */
4409    "&';'<*'+'-'-&*'",
4410  };
4411
4412  const static OneTestCase specialCharacterStrings[] = {
4413    { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
4414    { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
4415    { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
4416    { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
4417  };
4418  doTestOneTestCase(specialCharacterStrings, UPRV_LENGTHOF(specialCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4419}
4420
4421static void TestPrivateUseCharacters(void)
4422{
4423  const char* strRules[] = {
4424    /* Normal */
4425    "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
4426    "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
4427  };
4428
4429  const static OneTestCase privateUseCharacterStrings[] = {
4430    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4431    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4432    { {0xe2d9}, {0xe2da}, UCOL_LESS },
4433    { {0xe2da}, {0xe2db}, UCOL_LESS },
4434    { {0xe2db}, {0xe2dc}, UCOL_LESS },
4435    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4436  };
4437  doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4438}
4439
4440static void TestPrivateUseCharactersInList(void)
4441{
4442  const char* strRules[] = {
4443    /* List */
4444    "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
4445    /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
4446    "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
4447  };
4448
4449  const static OneTestCase privateUseCharacterStrings[] = {
4450    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4451    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4452    { {0xe2d9}, {0xe2da}, UCOL_LESS },
4453    { {0xe2da}, {0xe2db}, UCOL_LESS },
4454    { {0xe2db}, {0xe2dc}, UCOL_LESS },
4455    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4456  };
4457  doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4458}
4459
4460static void TestPrivateUseCharactersInRange(void)
4461{
4462  const char* strRules[] = {
4463    /* Range */
4464    "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
4465    "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
4466    /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
4467  };
4468
4469  const static OneTestCase privateUseCharacterStrings[] = {
4470    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4471    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4472    { {0xe2d9}, {0xe2da}, UCOL_LESS },
4473    { {0xe2da}, {0xe2db}, UCOL_LESS },
4474    { {0xe2db}, {0xe2dc}, UCOL_LESS },
4475    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4476  };
4477  doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4478}
4479
4480static void TestInvalidListsAndRanges(void)
4481{
4482  const char* invalidRules[] = {
4483    /* Range not in starred expression */
4484    "&\\ufffe<\\uffff-\\U00010002",
4485
4486    /* Range without start */
4487    "&a<*-c",
4488
4489    /* Range without end */
4490    "&a<*b-",
4491
4492    /* More than one hyphen */
4493    "&a<*b-g-l",
4494
4495    /* Range in the wrong order */
4496    "&a<*k-b",
4497
4498  };
4499
4500  UChar rule[500];
4501  UErrorCode status = U_ZERO_ERROR;
4502  UParseError parse_error;
4503  int n_rules = UPRV_LENGTHOF(invalidRules);
4504  int rule_no;
4505  int length;
4506  UCollator  *myCollation;
4507
4508  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4509
4510    length = u_unescape(invalidRules[rule_no], rule, 500);
4511    if (length == 0) {
4512        log_err("ERROR: The rule cannot be unescaped: %s\n");
4513        return;
4514    }
4515    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4516    (void)myCollation;      /* Suppress set but not used warning. */
4517    if(!U_FAILURE(status)){
4518      log_err("ERROR: Could not cause a failure as expected: \n");
4519    }
4520    status = U_ZERO_ERROR;
4521  }
4522}
4523
4524/*
4525 * This test ensures that characters placed before a character in a different script have the same lead byte
4526 * in their collation key before and after script reordering.
4527 */
4528static void TestBeforeRuleWithScriptReordering(void)
4529{
4530    UParseError error;
4531    UErrorCode status = U_ZERO_ERROR;
4532    UCollator  *myCollation;
4533    char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
4534    UChar rules[500];
4535    uint32_t rulesLength = 0;
4536    int32_t reorderCodes[1] = {USCRIPT_GREEK};
4537    UCollationResult collResult;
4538
4539    uint8_t baseKey[256];
4540    uint32_t baseKeyLength;
4541    uint8_t beforeKey[256];
4542    uint32_t beforeKeyLength;
4543
4544    UChar base[] = { 0x03b1 }; /* base */
4545    int32_t baseLen = UPRV_LENGTHOF(base);
4546
4547    UChar before[] = { 0x0e01 }; /* ko kai */
4548    int32_t beforeLen = UPRV_LENGTHOF(before);
4549
4550    /*UChar *data[] = { before, base };
4551    genericRulesStarter(srules, data, 2);*/
4552
4553    log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
4554
4555    (void)beforeKeyLength;   /* Suppress set but not used warnings. */
4556    (void)baseKeyLength;
4557
4558    /* build collator */
4559    log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
4560
4561    rulesLength = u_unescape(srules, rules, UPRV_LENGTHOF(rules));
4562    myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
4563    if(U_FAILURE(status)) {
4564        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4565        return;
4566    }
4567
4568    /* check collation results - before rule applied but not script reordering */
4569    collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4570    if (collResult != UCOL_GREATER) {
4571        log_err("Collation result not correct before script reordering = %d\n", collResult);
4572    }
4573
4574    /* check the lead byte of the collation keys before script reordering */
4575    baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4576    beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4577    if (baseKey[0] != beforeKey[0]) {
4578      log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4579   }
4580
4581    /* reorder the scripts */
4582    ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
4583    if(U_FAILURE(status)) {
4584        log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
4585        return;
4586    }
4587
4588    /* check collation results - before rule applied and after script reordering */
4589    collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4590    if (collResult != UCOL_GREATER) {
4591        log_err("Collation result not correct after script reordering = %d\n", collResult);
4592    }
4593
4594    /* check the lead byte of the collation keys after script reordering */
4595    ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4596    ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4597    if (baseKey[0] != beforeKey[0]) {
4598        log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4599    }
4600
4601    ucol_close(myCollation);
4602}
4603
4604/*
4605 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
4606 */
4607static void TestNonLeadBytesDuringCollationReordering(void)
4608{
4609    UErrorCode status = U_ZERO_ERROR;
4610    UCollator  *myCollation;
4611    int32_t reorderCodes[1] = {USCRIPT_GREEK};
4612
4613    uint8_t baseKey[256];
4614    uint32_t baseKeyLength;
4615    uint8_t reorderKey[256];
4616    uint32_t reorderKeyLength;
4617
4618    UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
4619
4620    uint32_t i;
4621
4622
4623    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4624
4625    /* build collator tertiary */
4626    myCollation = ucol_open("", &status);
4627    ucol_setStrength(myCollation, UCOL_TERTIARY);
4628    if(U_FAILURE(status)) {
4629        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4630        return;
4631    }
4632    baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4633
4634    ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4635    if(U_FAILURE(status)) {
4636        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4637        return;
4638    }
4639    reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4640
4641    if (baseKeyLength != reorderKeyLength) {
4642        log_err("Key lengths not the same during reordering.\n");
4643        return;
4644    }
4645
4646    for (i = 1; i < baseKeyLength; i++) {
4647        if (baseKey[i] != reorderKey[i]) {
4648            log_err("Collation key bytes not the same at position %d.\n", i);
4649            return;
4650        }
4651    }
4652    ucol_close(myCollation);
4653
4654    /* build collator quaternary */
4655    myCollation = ucol_open("", &status);
4656    ucol_setStrength(myCollation, UCOL_QUATERNARY);
4657    if(U_FAILURE(status)) {
4658        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4659        return;
4660    }
4661    baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4662
4663    ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4664    if(U_FAILURE(status)) {
4665        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4666        return;
4667    }
4668    reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4669
4670    if (baseKeyLength != reorderKeyLength) {
4671        log_err("Key lengths not the same during reordering.\n");
4672        return;
4673    }
4674
4675    for (i = 1; i < baseKeyLength; i++) {
4676        if (baseKey[i] != reorderKey[i]) {
4677            log_err("Collation key bytes not the same at position %d.\n", i);
4678            return;
4679        }
4680    }
4681    ucol_close(myCollation);
4682}
4683
4684/*
4685 * Test reordering API.
4686 */
4687static void TestReorderingAPI(void)
4688{
4689    UErrorCode status = U_ZERO_ERROR;
4690    UCollator  *myCollation;
4691    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4692    int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_KATAKANA};
4693    int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4694    int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
4695    UCollationResult collResult;
4696    int32_t retrievedReorderCodesLength;
4697    int32_t retrievedReorderCodes[10];
4698    UChar greekString[] = { 0x03b1 };
4699    UChar punctuationString[] = { 0x203e };
4700    int loopIndex;
4701
4702    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4703
4704    /* build collator tertiary */
4705    myCollation = ucol_open("", &status);
4706    ucol_setStrength(myCollation, UCOL_TERTIARY);
4707    if(U_FAILURE(status)) {
4708        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4709        return;
4710    }
4711
4712    /* set the reorderding */
4713    ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4714    if (U_FAILURE(status)) {
4715        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4716        return;
4717    }
4718
4719    /* get the reordering */
4720    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4721    if (status != U_BUFFER_OVERFLOW_ERROR) {
4722        log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4723        return;
4724    }
4725    status = U_ZERO_ERROR;
4726    if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4727        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4728        return;
4729    }
4730    /* now let's really get it */
4731    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4732    if (U_FAILURE(status)) {
4733        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4734        return;
4735    }
4736    if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4737        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4738        return;
4739    }
4740    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4741        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4742            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4743            return;
4744        }
4745    }
4746    collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4747    if (collResult != UCOL_LESS) {
4748        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4749        return;
4750    }
4751
4752    /* clear the reordering */
4753    ucol_setReorderCodes(myCollation, NULL, 0, &status);
4754    if (U_FAILURE(status)) {
4755        log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4756        return;
4757    }
4758
4759    /* get the reordering again */
4760    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4761    if (retrievedReorderCodesLength != 0) {
4762        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4763        return;
4764    }
4765
4766    collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4767    if (collResult != UCOL_GREATER) {
4768        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4769        return;
4770    }
4771
4772    /* clear the reordering using [NONE] */
4773    ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
4774    if (U_FAILURE(status)) {
4775        log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));
4776        return;
4777    }
4778
4779    /* get the reordering again */
4780    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4781    if (retrievedReorderCodesLength != 0) {
4782        log_err_status(status,
4783                       "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
4784                       retrievedReorderCodesLength);
4785        return;
4786    }
4787
4788    /* test for error condition on duplicate reorder codes */
4789    ucol_setReorderCodes(myCollation, duplicateReorderCodes, UPRV_LENGTHOF(duplicateReorderCodes), &status);
4790    if (!U_FAILURE(status)) {
4791        log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
4792        return;
4793    }
4794
4795    status = U_ZERO_ERROR;
4796    /* test for reorder codes after a reset code */
4797    ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, UPRV_LENGTHOF(reorderCodesStartingWithDefault), &status);
4798    if (!U_FAILURE(status)) {
4799        log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
4800        return;
4801    }
4802
4803    ucol_close(myCollation);
4804}
4805
4806/*
4807 * Test reordering API.
4808 */
4809static void TestReorderingAPIWithRuleCreatedCollator(void)
4810{
4811    UErrorCode status = U_ZERO_ERROR;
4812    UCollator  *myCollation;
4813    UChar rules[90];
4814    static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
4815    static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4816    static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
4817    UCollationResult collResult;
4818    int32_t retrievedReorderCodesLength;
4819    int32_t retrievedReorderCodes[10];
4820    static const UChar greekString[] = { 0x03b1 };
4821    static const UChar punctuationString[] = { 0x203e };
4822    static const UChar hanString[] = { 0x65E5, 0x672C };
4823    int loopIndex;
4824
4825    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4826
4827    /* build collator from rules */
4828    u_uastrcpy(rules, "[reorder Hani Grek]");
4829    myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
4830    if(U_FAILURE(status)) {
4831        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4832        return;
4833    }
4834
4835    /* get the reordering */
4836    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4837    if (U_FAILURE(status)) {
4838        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4839        return;
4840    }
4841    if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4842        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4843        return;
4844    }
4845    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4846        if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4847            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4848            return;
4849        }
4850    }
4851    collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), hanString, UPRV_LENGTHOF(hanString));
4852    if (collResult != UCOL_GREATER) {
4853        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4854        return;
4855    }
4856
4857    /* set the reordering */
4858    ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4859    if (U_FAILURE(status)) {
4860        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4861        return;
4862    }
4863
4864    /* get the reordering */
4865    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4866    if (status != U_BUFFER_OVERFLOW_ERROR) {
4867        log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4868        return;
4869    }
4870    status = U_ZERO_ERROR;
4871    if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4872        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4873        return;
4874    }
4875    /* now let's really get it */
4876    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4877    if (U_FAILURE(status)) {
4878        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4879        return;
4880    }
4881    if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4882        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4883        return;
4884    }
4885    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4886        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4887            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4888            return;
4889        }
4890    }
4891    collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4892    if (collResult != UCOL_LESS) {
4893        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4894        return;
4895    }
4896
4897    /* clear the reordering */
4898    ucol_setReorderCodes(myCollation, NULL, 0, &status);
4899    if (U_FAILURE(status)) {
4900        log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4901        return;
4902    }
4903
4904    /* get the reordering again */
4905    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4906    if (retrievedReorderCodesLength != 0) {
4907        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4908        return;
4909    }
4910
4911    collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4912    if (collResult != UCOL_GREATER) {
4913        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4914        return;
4915    }
4916
4917    /* reset the reordering */
4918    ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
4919    if (U_FAILURE(status)) {
4920        log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
4921        return;
4922    }
4923    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4924    if (U_FAILURE(status)) {
4925        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4926        return;
4927    }
4928    if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4929        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4930        return;
4931    }
4932    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4933        if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4934            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4935            return;
4936        }
4937    }
4938
4939    ucol_close(myCollation);
4940}
4941
4942static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {
4943    int32_t i;
4944    for (i = 0; i < length; ++i) {
4945        if (expectedScript == scripts[i]) { return TRUE; }
4946    }
4947    return FALSE;
4948}
4949
4950static void TestEquivalentReorderingScripts(void) {
4951    // Beginning with ICU 55, collation reordering moves single scripts
4952    // rather than groups of scripts,
4953    // except where scripts share a range and sort primary-equal.
4954    UErrorCode status = U_ZERO_ERROR;
4955    int32_t equivalentScripts[100];
4956    int32_t length;
4957    int i;
4958    int32_t prevScript;
4959    /* These scripts are expected to be equivalent. */
4960    static const int32_t expectedScripts[] = {
4961        USCRIPT_HIRAGANA,
4962        USCRIPT_KATAKANA,
4963        USCRIPT_KATAKANA_OR_HIRAGANA
4964    };
4965
4966    equivalentScripts[0] = 0;
4967    length = ucol_getEquivalentReorderCodes(
4968            USCRIPT_GOTHIC, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4969    if (U_FAILURE(status)) {
4970        log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4971        return;
4972    }
4973    if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) {
4974        log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "
4975                "length expected 1, was = %d; expected [%d] was [%d]\n",
4976                length, USCRIPT_GOTHIC, equivalentScripts[0]);
4977    }
4978
4979    length = ucol_getEquivalentReorderCodes(
4980            USCRIPT_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4981    if (U_FAILURE(status)) {
4982        log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4983        return;
4984    }
4985    if (length != UPRV_LENGTHOF(expectedScripts)) {
4986        log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "
4987                "expected %d, was = %d\n",
4988                UPRV_LENGTHOF(expectedScripts), length);
4989    }
4990    prevScript = -1;
4991    for (i = 0; i < length; ++i) {
4992        int32_t script = equivalentScripts[i];
4993        if (script <= prevScript) {
4994            log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i);
4995        }
4996        prevScript = script;
4997    }
4998    for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
4999        if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5000            log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",
5001                    expectedScripts[i]);
5002        }
5003    }
5004
5005    length = ucol_getEquivalentReorderCodes(
5006            USCRIPT_KATAKANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5007    if (U_FAILURE(status)) {
5008        log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
5009        return;
5010    }
5011    if (length != UPRV_LENGTHOF(expectedScripts)) {
5012        log_err("ERROR/Katakana: retrieved equivalent script length wrong: "
5013                "expected %d, was = %d\n",
5014                UPRV_LENGTHOF(expectedScripts), length);
5015    }
5016    for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
5017        if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5018            log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",
5019                    expectedScripts[i]);
5020        }
5021    }
5022
5023    length = ucol_getEquivalentReorderCodes(
5024            USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5025    if (U_FAILURE(status) || length != UPRV_LENGTHOF(expectedScripts)) {
5026        log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "
5027                "expected %d, was = %d\n",
5028                UPRV_LENGTHOF(expectedScripts), length);
5029    }
5030
5031    length = ucol_getEquivalentReorderCodes(
5032            USCRIPT_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5033    if (U_FAILURE(status) || length != 3) {
5034        log_err("ERROR/Hani: retrieved equivalent script length wrong: "
5035                "expected 3, was = %d\n", length);
5036    }
5037    length = ucol_getEquivalentReorderCodes(
5038            USCRIPT_SIMPLIFIED_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5039    if (U_FAILURE(status) || length != 3) {
5040        log_err("ERROR/Hans: retrieved equivalent script length wrong: "
5041                "expected 3, was = %d\n", length);
5042    }
5043    length = ucol_getEquivalentReorderCodes(
5044            USCRIPT_TRADITIONAL_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5045    if (U_FAILURE(status) || length != 3) {
5046        log_err("ERROR/Hant: retrieved equivalent script length wrong: "
5047                "expected 3, was = %d\n", length);
5048    }
5049
5050    length = ucol_getEquivalentReorderCodes(
5051            USCRIPT_MEROITIC_CURSIVE, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5052    if (U_FAILURE(status) || length != 2) {
5053        log_err("ERROR/Merc: retrieved equivalent script length wrong: "
5054                "expected 2, was = %d\n", length);
5055    }
5056    length = ucol_getEquivalentReorderCodes(
5057            USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5058    if (U_FAILURE(status) || length != 2) {
5059        log_err("ERROR/Mero: retrieved equivalent script length wrong: "
5060                "expected 2, was = %d\n", length);
5061    }
5062}
5063
5064static void TestReorderingAcrossCloning(void)
5065{
5066    UErrorCode status = U_ZERO_ERROR;
5067    UCollator  *myCollation;
5068    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5069    UCollator *clonedCollation;
5070    int32_t retrievedReorderCodesLength;
5071    int32_t retrievedReorderCodes[10];
5072    int loopIndex;
5073
5074    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5075
5076    /* build collator tertiary */
5077    myCollation = ucol_open("", &status);
5078    ucol_setStrength(myCollation, UCOL_TERTIARY);
5079    if(U_FAILURE(status)) {
5080        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5081        return;
5082    }
5083
5084    /* set the reorderding */
5085    ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5086    if (U_FAILURE(status)) {
5087        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5088        return;
5089    }
5090
5091    /* clone the collator */
5092    clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
5093    if (U_FAILURE(status)) {
5094        log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
5095        return;
5096    }
5097
5098    /* get the reordering */
5099    retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
5100    if (U_FAILURE(status)) {
5101        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
5102        return;
5103    }
5104    if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
5105        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
5106        return;
5107    }
5108    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
5109        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
5110            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
5111            return;
5112        }
5113    }
5114
5115    /*uprv_free(buffer);*/
5116    ucol_close(myCollation);
5117    ucol_close(clonedCollation);
5118}
5119
5120/*
5121 * Utility function to test one collation reordering test case set.
5122 * @param testcases Array of test cases.
5123 * @param n_testcases Size of the array testcases.
5124 * @param reorderTokens Array of reordering codes.
5125 * @param reorderTokensLen Size of the array reorderTokens.
5126 */
5127static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
5128{
5129    uint32_t testCaseNum;
5130    UErrorCode status = U_ZERO_ERROR;
5131    UCollator  *myCollation;
5132
5133    myCollation = ucol_open("", &status);
5134    if (U_FAILURE(status)) {
5135        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5136        return;
5137    }
5138    ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
5139    if(U_FAILURE(status)) {
5140        log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5141        return;
5142    }
5143
5144    for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
5145        doTest(myCollation,
5146            testCases[testCaseNum].source,
5147            testCases[testCaseNum].target,
5148            testCases[testCaseNum].result
5149        );
5150    }
5151    ucol_close(myCollation);
5152}
5153
5154static void TestGreekFirstReorder(void)
5155{
5156    const char* strRules[] = {
5157        "[reorder Grek]"
5158    };
5159
5160    const int32_t apiRules[] = {
5161        USCRIPT_GREEK
5162    };
5163
5164    const static OneTestCase privateUseCharacterStrings[] = {
5165        { {0x0391}, {0x0391}, UCOL_EQUAL },
5166        { {0x0041}, {0x0391}, UCOL_GREATER },
5167        { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
5168        { {0x0060}, {0x0391}, UCOL_LESS },
5169        { {0x0391}, {0xe2dc}, UCOL_LESS },
5170        { {0x0391}, {0x0060}, UCOL_GREATER },
5171    };
5172
5173    /* Test rules creation */
5174    doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5175
5176    /* Test collation reordering API */
5177    doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5178}
5179
5180static void TestGreekLastReorder(void)
5181{
5182    const char* strRules[] = {
5183        "[reorder Zzzz Grek]"
5184    };
5185
5186    const int32_t apiRules[] = {
5187        USCRIPT_UNKNOWN, USCRIPT_GREEK
5188    };
5189
5190    const static OneTestCase privateUseCharacterStrings[] = {
5191        { {0x0391}, {0x0391}, UCOL_EQUAL },
5192        { {0x0041}, {0x0391}, UCOL_LESS },
5193        { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
5194        { {0x0060}, {0x0391}, UCOL_LESS },
5195        { {0x0391}, {0xe2dc}, UCOL_GREATER },
5196    };
5197
5198    /* Test rules creation */
5199    doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5200
5201    /* Test collation reordering API */
5202    doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5203}
5204
5205static void TestNonScriptReorder(void)
5206{
5207    const char* strRules[] = {
5208        "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
5209    };
5210
5211    const int32_t apiRules[] = {
5212        USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
5213        UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
5214        UCOL_REORDER_CODE_CURRENCY
5215    };
5216
5217    const static OneTestCase privateUseCharacterStrings[] = {
5218        { {0x0391}, {0x0041}, UCOL_LESS },
5219        { {0x0041}, {0x0391}, UCOL_GREATER },
5220        { {0x0060}, {0x0041}, UCOL_LESS },
5221        { {0x0060}, {0x0391}, UCOL_GREATER },
5222        { {0x0024}, {0x0041}, UCOL_GREATER },
5223    };
5224
5225    /* Test rules creation */
5226    doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5227
5228    /* Test collation reordering API */
5229    doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5230}
5231
5232static void TestHaniReorder(void)
5233{
5234    const char* strRules[] = {
5235        "[reorder Hani]"
5236    };
5237    const int32_t apiRules[] = {
5238        USCRIPT_HAN
5239    };
5240
5241    const static OneTestCase privateUseCharacterStrings[] = {
5242        { {0x4e00}, {0x0041}, UCOL_LESS },
5243        { {0x4e00}, {0x0060}, UCOL_GREATER },
5244        { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5245        { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5246        { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5247        { {0xfa27}, {0x0041}, UCOL_LESS },
5248        { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5249    };
5250
5251    /* Test rules creation */
5252    doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5253
5254    /* Test collation reordering API */
5255    doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5256}
5257
5258static void TestHaniReorderWithOtherRules(void)
5259{
5260    const char* strRules[] = {
5261        "[reorder Hani] &b<a"
5262    };
5263    /*const int32_t apiRules[] = {
5264        USCRIPT_HAN
5265    };*/
5266
5267    const static OneTestCase privateUseCharacterStrings[] = {
5268        { {0x4e00}, {0x0041}, UCOL_LESS },
5269        { {0x4e00}, {0x0060}, UCOL_GREATER },
5270        { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5271        { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5272        { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5273        { {0xfa27}, {0x0041}, UCOL_LESS },
5274        { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5275        { {0x0062}, {0x0061}, UCOL_LESS },
5276    };
5277
5278    /* Test rules creation */
5279    doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5280}
5281
5282static void TestMultipleReorder(void)
5283{
5284    const char* strRules[] = {
5285        "[reorder Grek Zzzz DIGIT Latn Hani]"
5286    };
5287
5288    const int32_t apiRules[] = {
5289        USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
5290    };
5291
5292    const static OneTestCase collationTestCases[] = {
5293        { {0x0391}, {0x0041}, UCOL_LESS},
5294        { {0x0031}, {0x0041}, UCOL_LESS},
5295        { {0x0041}, {0x4e00}, UCOL_LESS},
5296    };
5297
5298    /* Test rules creation */
5299    doTestOneTestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), strRules, UPRV_LENGTHOF(strRules));
5300
5301    /* Test collation reordering API */
5302    doTestOneReorderingAPITestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), apiRules, UPRV_LENGTHOF(apiRules));
5303}
5304
5305/*
5306 * Test that covers issue reported in ticket 8814
5307 */
5308static void TestReorderWithNumericCollation(void)
5309{
5310    UErrorCode status = U_ZERO_ERROR;
5311    UCollator  *myCollation;
5312    UCollator  *myReorderCollation;
5313    int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
5314    /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
5315    UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
5316    UChar fortyS[] = { 0x0053 };
5317    UChar fortyThreeP[] = { 0x0050 };
5318    uint8_t fortyS_sortKey[128];
5319    int32_t fortyS_sortKey_Length;
5320    uint8_t fortyThreeP_sortKey[128];
5321    int32_t fortyThreeP_sortKey_Length;
5322    uint8_t fortyS_sortKey_reorder[128];
5323    int32_t fortyS_sortKey_reorder_Length;
5324    uint8_t fortyThreeP_sortKey_reorder[128];
5325    int32_t fortyThreeP_sortKey_reorder_Length;
5326    UCollationResult collResult;
5327    UCollationResult collResultReorder;
5328
5329    log_verbose("Testing reordering with and without numeric collation\n");
5330
5331    /* build collator tertiary with numeric */
5332    myCollation = ucol_open("", &status);
5333    /*
5334    ucol_setStrength(myCollation, UCOL_TERTIARY);
5335    */
5336    ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5337    if(U_FAILURE(status)) {
5338        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5339        return;
5340    }
5341
5342    /* build collator tertiary with numeric and reordering */
5343    myReorderCollation = ucol_open("", &status);
5344    /*
5345    ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
5346    */
5347    ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5348    ucol_setReorderCodes(myReorderCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5349    if(U_FAILURE(status)) {
5350        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5351        return;
5352    }
5353
5354    fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey, 128);
5355    fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey, 128);
5356    fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey_reorder, 128);
5357    fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
5358
5359    if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
5360        log_err_status(status, "ERROR: couldn't generate sort keys\n");
5361        return;
5362    }
5363    collResult = ucol_strcoll(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5364    collResultReorder = ucol_strcoll(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5365    /*
5366    fprintf(stderr, "\tcollResult = %x\n", collResult);
5367    fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
5368    fprintf(stderr, "\nfortyS\n");
5369    for (i = 0; i < fortyS_sortKey_Length; i++) {
5370        fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
5371    }
5372    fprintf(stderr, "\nfortyThreeP\n");
5373    for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
5374        fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
5375    }
5376    */
5377    if (collResult != collResultReorder) {
5378        log_err_status(status, "ERROR: collation results should have been the same.\n");
5379        return;
5380    }
5381
5382    ucol_close(myCollation);
5383    ucol_close(myReorderCollation);
5384}
5385
5386static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
5387{
5388  for (; *a == *b; ++a, ++b) {
5389    if (*a == 0) {
5390      return 0;
5391    }
5392  }
5393  return (*a < *b ? -1 : 1);
5394}
5395
5396static void TestImportRulesDeWithPhonebook(void)
5397{
5398  const char* normalRules[] = {
5399    "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
5400    "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
5401    "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
5402  };
5403  const OneTestCase normalTests[] = {
5404    { {0x00e6}, {0x00c6}, UCOL_LESS},
5405    { {0x00fc}, {0x00dc}, UCOL_GREATER},
5406  };
5407
5408  const char* importRules[] = {
5409    "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
5410    "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5411    "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5412  };
5413  const OneTestCase importTests[] = {
5414    { {0x00e6}, {0x00c6}, UCOL_LESS},
5415    { {0x00fc}, {0x00dc}, UCOL_LESS},
5416  };
5417
5418  doTestOneTestCase(normalTests, UPRV_LENGTHOF(normalTests), normalRules, UPRV_LENGTHOF(normalRules));
5419  doTestOneTestCase(importTests, UPRV_LENGTHOF(importTests), importRules, UPRV_LENGTHOF(importRules));
5420}
5421
5422#if 0
5423static void TestImportRulesFiWithEor(void)
5424{
5425  /* DUCET. */
5426  const char* defaultRules[] = {
5427    "&a<b",                                    /* Dummy rule. */
5428  };
5429
5430  const OneTestCase defaultTests[] = {
5431    { {0x0110}, {0x00F0}, UCOL_LESS},
5432    { {0x00a3}, {0x00a5}, UCOL_LESS},
5433    { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5434  };
5435
5436  /* European Ordering rules: ignore currency characters. */
5437  const char* eorRules[] = {
5438    "[import root-u-co-eor]",
5439  };
5440
5441  const OneTestCase eorTests[] = {
5442    { {0x0110}, {0x00F0}, UCOL_LESS},
5443    { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5444    { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5445  };
5446
5447  const char* fiStdRules[] = {
5448    "[import fi-u-co-standard]",
5449  };
5450
5451  const OneTestCase fiStdTests[] = {
5452    { {0x0110}, {0x00F0}, UCOL_GREATER},
5453    { {0x00a3}, {0x00a5}, UCOL_LESS},
5454    { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5455  };
5456
5457  /* Both European Ordering Rules and Fi Standard Rules. */
5458  const char* eorFiStdRules[] = {
5459    "[import root-u-co-eor][import fi-u-co-standard]",
5460  };
5461
5462  /* This is essentially same as the one before once fi.txt is updated with import. */
5463  const char* fiEorRules[] = {
5464    "[import fi-u-co-eor]",
5465  };
5466
5467  const OneTestCase fiEorTests[] = {
5468    { {0x0110}, {0x00F0}, UCOL_GREATER},
5469    { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5470    { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5471  };
5472
5473  doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5474  doTestOneTestCase(eorTests, UPRV_LENGTHOF(eorTests), eorRules, UPRV_LENGTHOF(eorRules));
5475  doTestOneTestCase(fiStdTests, UPRV_LENGTHOF(fiStdTests), fiStdRules, UPRV_LENGTHOF(fiStdRules));
5476  doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), eorFiStdRules, UPRV_LENGTHOF(eorFiStdRules));
5477
5478  log_knownIssue("8962", NULL);
5479  /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
5480        eor{
5481            Sequence{
5482                "[import root-u-co-eor][import fi-u-co-standard]"
5483            }
5484            Version{"21.0"}
5485        }
5486  */
5487  /* doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), fiEorRules, UPRV_LENGTHOF(fiEorRules)); */
5488
5489}
5490#endif
5491
5492#if 0
5493/*
5494 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
5495 * the resource files are built with -includeUnihanColl option.
5496 * TODO: Uncomment this function and make it work when unihan rules are built by default.
5497 */
5498static void TestImportRulesCJKWithUnihan(void)
5499{
5500  /* DUCET. */
5501  const char* defaultRules[] = {
5502    "&a<b",                                    /* Dummy rule. */
5503  };
5504
5505  const OneTestCase defaultTests[] = {
5506    { {0x3402}, {0x4e1e}, UCOL_GREATER},
5507  };
5508
5509  /* European Ordering rules: ignore currency characters. */
5510  const char* unihanRules[] = {
5511    "[import ko-u-co-unihan]",
5512  };
5513
5514  const OneTestCase unihanTests[] = {
5515    { {0x3402}, {0x4e1e}, UCOL_LESS},
5516  };
5517
5518  doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5519  doTestOneTestCase(unihanTests, UPRV_LENGTHOF(unihanTests), unihanRules, UPRV_LENGTHOF(unihanRules));
5520
5521}
5522#endif
5523
5524static void TestImport(void)
5525{
5526    UCollator* vicoll;
5527    UCollator* escoll;
5528    UCollator* viescoll;
5529    UCollator* importviescoll;
5530    UParseError error;
5531    UErrorCode status = U_ZERO_ERROR;
5532    UChar* virules;
5533    int32_t viruleslength;
5534    UChar* esrules;
5535    int32_t esruleslength;
5536    UChar* viesrules;
5537    int32_t viesruleslength;
5538    char srules[500] = "[import vi][import es]";
5539    UChar rules[500];
5540    uint32_t length = 0;
5541    int32_t itemCount;
5542    int32_t i, k;
5543    UChar32 start;
5544    UChar32 end;
5545    UChar str[500];
5546    int32_t strLength;
5547
5548    uint8_t sk1[500];
5549    uint8_t sk2[500];
5550
5551    UBool b;
5552    USet* tailoredSet;
5553    USet* importTailoredSet;
5554
5555
5556    vicoll = ucol_open("vi", &status);
5557    if(U_FAILURE(status)){
5558        log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
5559        return;
5560    }
5561
5562    virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
5563    if(viruleslength == 0) {
5564        log_data_err("missing vi tailoring rule string\n");
5565        ucol_close(vicoll);
5566        return;
5567    }
5568    escoll = ucol_open("es", &status);
5569    esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
5570    viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
5571    viesrules[0] = 0;
5572    u_strcat(viesrules, virules);
5573    u_strcat(viesrules, esrules);
5574    viesruleslength = viruleslength + esruleslength;
5575    viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5576
5577    /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5578    length = u_unescape(srules, rules, 500);
5579    importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5580    if(U_FAILURE(status)){
5581        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5582        return;
5583    }
5584
5585    tailoredSet = ucol_getTailoredSet(viescoll, &status);
5586    importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
5587
5588    if(!uset_equals(tailoredSet, importTailoredSet)){
5589        log_err("Tailored sets not equal");
5590    }
5591
5592    uset_close(importTailoredSet);
5593
5594    itemCount = uset_getItemCount(tailoredSet);
5595
5596    for( i = 0; i < itemCount; i++){
5597        strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5598        if(strLength < 2){
5599            for (; start <= end; start++){
5600                k = 0;
5601                U16_APPEND(str, k, 500, start, b);
5602                (void)b;    /* Suppress set but not used warning. */
5603                ucol_getSortKey(viescoll, str, 1, sk1, 500);
5604                ucol_getSortKey(importviescoll, str, 1, sk2, 500);
5605                if(compare_uint8_t_arrays(sk1, sk2) != 0){
5606                    log_err("Sort key for %s not equal\n", str);
5607                    break;
5608                }
5609            }
5610        }else{
5611            ucol_getSortKey(viescoll, str, strLength, sk1, 500);
5612            ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
5613            if(compare_uint8_t_arrays(sk1, sk2) != 0){
5614                log_err("ZZSort key for %s not equal\n", str);
5615                break;
5616            }
5617
5618        }
5619    }
5620
5621    uset_close(tailoredSet);
5622
5623    uprv_free(viesrules);
5624
5625    ucol_close(vicoll);
5626    ucol_close(escoll);
5627    ucol_close(viescoll);
5628    ucol_close(importviescoll);
5629}
5630
5631static void TestImportWithType(void)
5632{
5633    UCollator* vicoll;
5634    UCollator* decoll;
5635    UCollator* videcoll;
5636    UCollator* importvidecoll;
5637    UParseError error;
5638    UErrorCode status = U_ZERO_ERROR;
5639    const UChar* virules;
5640    int32_t viruleslength;
5641    const UChar* derules;
5642    int32_t deruleslength;
5643    UChar* viderules;
5644    int32_t videruleslength;
5645    const char srules[500] = "[import vi][import de-u-co-phonebk]";
5646    UChar rules[500];
5647    uint32_t length = 0;
5648    int32_t itemCount;
5649    int32_t i, k;
5650    UChar32 start;
5651    UChar32 end;
5652    UChar str[500];
5653    int32_t strLength;
5654
5655    uint8_t sk1[500];
5656    uint8_t sk2[500];
5657
5658    USet* tailoredSet;
5659    USet* importTailoredSet;
5660
5661    vicoll = ucol_open("vi", &status);
5662    if(U_FAILURE(status)){
5663        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5664        return;
5665    }
5666    virules = ucol_getRules(vicoll, &viruleslength);
5667    if(viruleslength == 0) {
5668        log_data_err("missing vi tailoring rule string\n");
5669        ucol_close(vicoll);
5670        return;
5671    }
5672    /* decoll = ucol_open("de@collation=phonebook", &status); */
5673    decoll = ucol_open("de-u-co-phonebk", &status);
5674    if(U_FAILURE(status)){
5675        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5676        return;
5677    }
5678
5679
5680    derules = ucol_getRules(decoll, &deruleslength);
5681    viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
5682    viderules[0] = 0;
5683    u_strcat(viderules, virules);
5684    u_strcat(viderules, derules);
5685    videruleslength = viruleslength + deruleslength;
5686    videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5687
5688    /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5689    length = u_unescape(srules, rules, 500);
5690    importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5691    if(U_FAILURE(status)){
5692        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5693        return;
5694    }
5695
5696    tailoredSet = ucol_getTailoredSet(videcoll, &status);
5697    importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
5698
5699    if(!uset_equals(tailoredSet, importTailoredSet)){
5700        log_err("Tailored sets not equal");
5701    }
5702
5703    uset_close(importTailoredSet);
5704
5705    itemCount = uset_getItemCount(tailoredSet);
5706
5707    for( i = 0; i < itemCount; i++){
5708        strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5709        if(strLength < 2){
5710            for (; start <= end; start++){
5711                k = 0;
5712                U16_APPEND_UNSAFE(str, k, start);
5713                ucol_getSortKey(videcoll, str, 1, sk1, 500);
5714                ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
5715                if(compare_uint8_t_arrays(sk1, sk2) != 0){
5716                    log_err("Sort key for %s not equal\n", str);
5717                    break;
5718                }
5719            }
5720        }else{
5721            ucol_getSortKey(videcoll, str, strLength, sk1, 500);
5722            ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
5723            if(compare_uint8_t_arrays(sk1, sk2) != 0){
5724                log_err("Sort key for %s not equal\n", str);
5725                break;
5726            }
5727
5728        }
5729    }
5730
5731    uset_close(tailoredSet);
5732
5733    uprv_free(viderules);
5734
5735    ucol_close(videcoll);
5736    ucol_close(importvidecoll);
5737    ucol_close(vicoll);
5738    ucol_close(decoll);
5739}
5740
5741/* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
5742static const UChar longUpperStr1[]= { /* 155 chars */
5743    0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
5744    0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
5745    0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
5746    0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
5747    0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
5748    0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
5749    0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
5750    0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
5751    0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
5752    0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
5753};
5754
5755/* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
5756static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
5757    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5758    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5759    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5760    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5761    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
5762};
5763
5764/* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
5765static const UChar longUpperStr3[]= { /* 324 chars */
5766    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5767    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5768    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5769    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5770    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5771    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5772    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5773    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5774    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5775    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5776    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5777    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
5778};
5779
5780typedef struct {
5781    const UChar * longUpperStrPtr;
5782    int32_t       longUpperStrLen;
5783} LongUpperStrItem;
5784
5785/* String pointers must be in reverse collation order of the corresponding strings */
5786static const LongUpperStrItem longUpperStrItems[] = {
5787    { longUpperStr1, UPRV_LENGTHOF(longUpperStr1) },
5788    { longUpperStr2, UPRV_LENGTHOF(longUpperStr2) },
5789    { longUpperStr3, UPRV_LENGTHOF(longUpperStr3) },
5790    { NULL,          0                           }
5791};
5792
5793enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
5794
5795/* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
5796static void TestCaseLevelBufferOverflow(void)
5797{
5798    UErrorCode status = U_ZERO_ERROR;
5799    UCollator * ucol = ucol_open("root", &status);
5800    if ( U_SUCCESS(status) ) {
5801        ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
5802        if ( U_SUCCESS(status) ) {
5803            const LongUpperStrItem * itemPtr;
5804            uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
5805            for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
5806                int32_t sortKeyLen;
5807                if (itemPtr > longUpperStrItems) {
5808                    uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
5809                }
5810                sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
5811                if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
5812                    log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
5813                    break;
5814                }
5815                if ( itemPtr > longUpperStrItems ) {
5816                    int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
5817                    if (compareResult >= 0) {
5818                        log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
5819                    }
5820                }
5821            }
5822        } else {
5823            log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
5824        }
5825        ucol_close(ucol);
5826    } else {
5827        log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
5828    }
5829}
5830
5831/* Test for #10595 */
5832static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5833#define KEY_PART_SIZE 16
5834
5835static void TestNextSortKeyPartJaIdentical(void)
5836{
5837    UErrorCode status = U_ZERO_ERROR;
5838    UCollator *coll;
5839    uint8_t keyPart[KEY_PART_SIZE];
5840    UCharIterator iter;
5841    uint32_t state[2] = {0, 0};
5842    int32_t keyPartLen;
5843
5844    coll = ucol_open("ja", &status);
5845    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
5846    if (U_FAILURE(status)) {
5847        log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
5848        return;
5849    }
5850
5851    uiter_setString(&iter, testJapaneseName, 5);
5852    keyPartLen = KEY_PART_SIZE;
5853    while (keyPartLen == KEY_PART_SIZE) {
5854        keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
5855        if (U_FAILURE(status)) {
5856            log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
5857            break;
5858        }
5859    }
5860
5861    ucol_close(coll);
5862}
5863
5864#define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5865
5866void addMiscCollTest(TestNode** root)
5867{
5868    TEST(TestRuleOptions);
5869    TEST(TestBeforePrefixFailure);
5870    TEST(TestContractionClosure);
5871    TEST(TestPrefixCompose);
5872    TEST(TestStrCollIdenticalPrefix);
5873    TEST(TestPrefix);
5874    TEST(TestNewJapanese);
5875    /*TEST(TestLimitations);*/
5876    TEST(TestNonChars);
5877    TEST(TestExtremeCompression);
5878    TEST(TestSurrogates);
5879    TEST(TestVariableTopSetting);
5880    TEST(TestMaxVariable);
5881    TEST(TestBocsuCoverage);
5882    TEST(TestCyrillicTailoring);
5883    TEST(TestCase);
5884    TEST(IncompleteCntTest);
5885    TEST(BlackBirdTest);
5886    TEST(FunkyATest);
5887    TEST(BillFairmanTest);
5888    TEST(TestChMove);
5889    TEST(TestImplicitTailoring);
5890    TEST(TestFCDProblem);
5891    TEST(TestEmptyRule);
5892    /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5893    TEST(TestJ815);
5894    TEST(TestUpperCaseFirst);
5895    TEST(TestBefore);
5896    TEST(TestHangulTailoring);
5897    TEST(TestUCARules);
5898    TEST(TestIncrementalNormalize);
5899    TEST(TestComposeDecompose);
5900    TEST(TestCompressOverlap);
5901    TEST(TestContraction);
5902    TEST(TestExpansion);
5903    /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5904    /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5905    TEST(TestOptimize);
5906    TEST(TestSuppressContractions);
5907    TEST(Alexis2);
5908    TEST(TestHebrewUCA);
5909    TEST(TestPartialSortKeyTermination);
5910    TEST(TestSettings);
5911    TEST(TestEquals);
5912    TEST(TestJ2726);
5913    TEST(NullRule);
5914    TEST(TestNumericCollation);
5915    TEST(TestTibetanConformance);
5916    TEST(TestPinyinProblem);
5917    TEST(TestSeparateTrees);
5918    TEST(TestBeforePinyin);
5919    TEST(TestBeforeTightening);
5920    /*TEST(TestMoreBefore);*/
5921    TEST(TestTailorNULL);
5922    TEST(TestUpperFirstQuaternary);
5923    TEST(TestJ4960);
5924    TEST(TestJ5223);
5925    TEST(TestJ5232);
5926    TEST(TestJ5367);
5927    TEST(TestHiragana);
5928    TEST(TestSortKeyConsistency);
5929    TEST(TestVI5913);  /* VI, RO tailored rules */
5930    TEST(TestCroatianSortKey);
5931    TEST(TestTailor6179);
5932    TEST(TestUCAPrecontext);
5933    TEST(TestOutOfBuffer5468);
5934    TEST(TestSameStrengthList);
5935
5936    TEST(TestSameStrengthListQuoted);
5937    TEST(TestSameStrengthListSupplemental);
5938    TEST(TestSameStrengthListQwerty);
5939    TEST(TestSameStrengthListQuotedQwerty);
5940    TEST(TestSameStrengthListRanges);
5941    TEST(TestSameStrengthListSupplementalRanges);
5942    TEST(TestSpecialCharacters);
5943    TEST(TestPrivateUseCharacters);
5944    TEST(TestPrivateUseCharactersInList);
5945    TEST(TestPrivateUseCharactersInRange);
5946    TEST(TestInvalidListsAndRanges);
5947    TEST(TestImportRulesDeWithPhonebook);
5948    /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
5949    /* TEST(TestImportRulesCJKWithUnihan); */
5950    TEST(TestImport);
5951    TEST(TestImportWithType);
5952
5953    TEST(TestBeforeRuleWithScriptReordering);
5954    TEST(TestNonLeadBytesDuringCollationReordering);
5955    TEST(TestReorderingAPI);
5956    TEST(TestReorderingAPIWithRuleCreatedCollator);
5957    TEST(TestEquivalentReorderingScripts);
5958    TEST(TestGreekFirstReorder);
5959    TEST(TestGreekLastReorder);
5960    TEST(TestNonScriptReorder);
5961    TEST(TestHaniReorder);
5962    TEST(TestHaniReorderWithOtherRules);
5963    TEST(TestMultipleReorder);
5964    TEST(TestReorderingAcrossCloning);
5965    TEST(TestReorderWithNumericCollation);
5966
5967    TEST(TestCaseLevelBufferOverflow);
5968    TEST(TestNextSortKeyPartJaIdentical);
5969}
5970
5971#endif /* #if !UCONFIG_NO_COLLATION */
5972