1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2001-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8/*******************************************************************************
9*
10* File cmsccoll.C
11*
12*******************************************************************************/
13/**
14 * These are the tests specific to ICU 1.8 and above, that I didn't know where
15 * to fit.
16 */
17
18#include <stdio.h>
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_COLLATION
23
24#include "unicode/ucol.h"
25#include "unicode/ucoleitr.h"
26#include "unicode/uloc.h"
27#include "cintltst.h"
28#include "ccolltst.h"
29#include "callcoll.h"
30#include "unicode/ustring.h"
31#include "string.h"
32#include "ucol_imp.h"
33#include "cmemory.h"
34#include "cstring.h"
35#include "uassert.h"
36#include "unicode/parseerr.h"
37#include "unicode/ucnv.h"
38#include "unicode/ures.h"
39#include "unicode/uscript.h"
40#include "unicode/utf16.h"
41#include "uparse.h"
42#include "putilimp.h"
43
44
45#define MAX_TOKEN_LEN 16
46
47typedef UCollationResult tst_strcoll(void *collator, const int object,
48                        const UChar *source, const int sLen,
49                        const UChar *target, const int tLen);
50
51
52
53const static char cnt1[][10] = {
54
55  "AA",
56  "AC",
57  "AZ",
58  "AQ",
59  "AB",
60  "ABZ",
61  "ABQ",
62  "Z",
63  "ABC",
64  "Q",
65  "B"
66};
67
68const static char cnt2[][10] = {
69  "DA",
70  "DAD",
71  "DAZ",
72  "MAR",
73  "Z",
74  "DAVIS",
75  "MARK",
76  "DAV",
77  "DAVI"
78};
79
80static void IncompleteCntTest(void)
81{
82  UErrorCode status = U_ZERO_ERROR;
83  UChar temp[90];
84  UChar t1[90];
85  UChar t2[90];
86
87  UCollator *coll =  NULL;
88  uint32_t i = 0, j = 0;
89  uint32_t size = 0;
90
91  u_uastrcpy(temp, " & Z < ABC < Q < B");
92
93  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
94
95  if(U_SUCCESS(status)) {
96    size = UPRV_LENGTHOF(cnt1);
97    for(i = 0; i < size-1; i++) {
98      for(j = i+1; j < size; j++) {
99        UCollationElements *iter;
100        u_uastrcpy(t1, cnt1[i]);
101        u_uastrcpy(t2, cnt1[j]);
102        doTest(coll, t1, t2, UCOL_LESS);
103        /* synwee : added collation element iterator test */
104        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
105        if (U_FAILURE(status)) {
106          log_err("Creation of iterator failed\n");
107          break;
108        }
109        backAndForth(iter);
110        ucol_closeElements(iter);
111      }
112    }
113  }
114
115  ucol_close(coll);
116
117
118  u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
119  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
120
121  if(U_SUCCESS(status)) {
122    size = UPRV_LENGTHOF(cnt2);
123    for(i = 0; i < size-1; i++) {
124      for(j = i+1; j < size; j++) {
125        UCollationElements *iter;
126        u_uastrcpy(t1, cnt2[i]);
127        u_uastrcpy(t2, cnt2[j]);
128        doTest(coll, t1, t2, UCOL_LESS);
129
130        /* synwee : added collation element iterator test */
131        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
132        if (U_FAILURE(status)) {
133          log_err("Creation of iterator failed\n");
134          break;
135        }
136        backAndForth(iter);
137        ucol_closeElements(iter);
138      }
139    }
140  }
141
142  ucol_close(coll);
143
144
145}
146
147const static char shifted[][20] = {
148  "black bird",
149  "black-bird",
150  "blackbird",
151  "black Bird",
152  "black-Bird",
153  "blackBird",
154  "black birds",
155  "black-birds",
156  "blackbirds"
157};
158
159const static UCollationResult shiftedTert[] = {
160  UCOL_EQUAL,
161  UCOL_EQUAL,
162  UCOL_EQUAL,
163  UCOL_LESS,
164  UCOL_EQUAL,
165  UCOL_EQUAL,
166  UCOL_LESS,
167  UCOL_EQUAL,
168  UCOL_EQUAL
169};
170
171const static char nonignorable[][20] = {
172  "black bird",
173  "black Bird",
174  "black birds",
175  "black-bird",
176  "black-Bird",
177  "black-birds",
178  "blackbird",
179  "blackBird",
180  "blackbirds"
181};
182
183static void BlackBirdTest(void) {
184  UErrorCode status = U_ZERO_ERROR;
185  UChar t1[90];
186  UChar t2[90];
187
188  uint32_t i = 0, j = 0;
189  uint32_t size = 0;
190  UCollator *coll = ucol_open("en_US", &status);
191
192  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
193  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
194
195  if(U_SUCCESS(status)) {
196    size = UPRV_LENGTHOF(nonignorable);
197    for(i = 0; i < size-1; i++) {
198      for(j = i+1; j < size; j++) {
199        u_uastrcpy(t1, nonignorable[i]);
200        u_uastrcpy(t2, nonignorable[j]);
201        doTest(coll, t1, t2, UCOL_LESS);
202      }
203    }
204  }
205
206  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
207  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
208
209  if(U_SUCCESS(status)) {
210    size = UPRV_LENGTHOF(shifted);
211    for(i = 0; i < size-1; i++) {
212      for(j = i+1; j < size; j++) {
213        u_uastrcpy(t1, shifted[i]);
214        u_uastrcpy(t2, shifted[j]);
215        doTest(coll, t1, t2, UCOL_LESS);
216      }
217    }
218  }
219
220  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
221  if(U_SUCCESS(status)) {
222    size = UPRV_LENGTHOF(shifted);
223    for(i = 1; i < size; i++) {
224      u_uastrcpy(t1, shifted[i-1]);
225      u_uastrcpy(t2, shifted[i]);
226      doTest(coll, t1, t2, shiftedTert[i]);
227    }
228  }
229
230  ucol_close(coll);
231}
232
233const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
234    {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
235    {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
236    {0x0041/*'A'*/, 0x0300, 0x0000},
237    {0x00C0, 0x0301, 0x0000},
238    /* this would work with forced normalization */
239    {0x00C0, 0x0316, 0x0000}
240};
241
242const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
243    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
244    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
245    {0x00C0, 0},
246    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
247    /* this would work with forced normalization */
248    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
249};
250
251const static UCollationResult results[] = {
252    UCOL_GREATER,
253    UCOL_EQUAL,
254    UCOL_EQUAL,
255    UCOL_GREATER,
256    UCOL_EQUAL
257};
258
259static void FunkyATest(void)
260{
261
262    int32_t i;
263    UErrorCode status = U_ZERO_ERROR;
264    UCollator  *myCollation;
265    myCollation = ucol_open("en_US", &status);
266    if(U_FAILURE(status)){
267        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
268        return;
269    }
270    log_verbose("Testing some A letters, for some reason\n");
271    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
272    ucol_setStrength(myCollation, UCOL_TERTIARY);
273    for (i = 0; i < 4 ; i++)
274    {
275        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
276    }
277    ucol_close(myCollation);
278}
279
280UColAttributeValue caseFirst[] = {
281    UCOL_OFF,
282    UCOL_LOWER_FIRST,
283    UCOL_UPPER_FIRST
284};
285
286
287UColAttributeValue alternateHandling[] = {
288    UCOL_NON_IGNORABLE,
289    UCOL_SHIFTED
290};
291
292UColAttributeValue caseLevel[] = {
293    UCOL_OFF,
294    UCOL_ON
295};
296
297UColAttributeValue strengths[] = {
298    UCOL_PRIMARY,
299    UCOL_SECONDARY,
300    UCOL_TERTIARY,
301    UCOL_QUATERNARY,
302    UCOL_IDENTICAL
303};
304
305#if 0
306static const char * strengthsC[] = {
307    "UCOL_PRIMARY",
308    "UCOL_SECONDARY",
309    "UCOL_TERTIARY",
310    "UCOL_QUATERNARY",
311    "UCOL_IDENTICAL"
312};
313
314static const char * caseFirstC[] = {
315    "UCOL_OFF",
316    "UCOL_LOWER_FIRST",
317    "UCOL_UPPER_FIRST"
318};
319
320
321static const char * alternateHandlingC[] = {
322    "UCOL_NON_IGNORABLE",
323    "UCOL_SHIFTED"
324};
325
326static const char * caseLevelC[] = {
327    "UCOL_OFF",
328    "UCOL_ON"
329};
330
331/* not used currently - does not test only prints */
332static void PrintMarkDavis(void)
333{
334  UErrorCode status = U_ZERO_ERROR;
335  UChar m[256];
336  uint8_t sortkey[256];
337  UCollator *coll = ucol_open("en_US", &status);
338  uint32_t h,i,j,k, sortkeysize;
339  uint32_t sizem = 0;
340  char buffer[512];
341  uint32_t len = 512;
342
343  log_verbose("PrintMarkDavis");
344
345  u_uastrcpy(m, "Mark Davis");
346  sizem = u_strlen(m);
347
348
349  m[1] = 0xe4;
350
351  for(i = 0; i<sizem; i++) {
352    fprintf(stderr, "\\u%04X ", m[i]);
353  }
354  fprintf(stderr, "\n");
355
356  for(h = 0; h<UPRV_LENGTHOF(caseFirst); h++) {
357    ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
358    fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
359
360    for(i = 0; i<UPRV_LENGTHOF(alternateHandling); i++) {
361      ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
362      fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
363
364      for(j = 0; j<UPRV_LENGTHOF(caseLevel); j++) {
365        ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
366        fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
367
368        for(k = 0; k<UPRV_LENGTHOF(strengths); k++) {
369          ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
370          sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
371          fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
372          fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
373        }
374
375      }
376
377    }
378
379  }
380}
381#endif
382
383static void BillFairmanTest(void) {
384/*
385** check for actual locale via ICU resource bundles
386**
387** lp points to the original locale ("fr_FR_....")
388*/
389
390    UResourceBundle *lr,*cr;
391    UErrorCode              lec = U_ZERO_ERROR;
392    const char *lp = "fr_FR_you_ll_never_find_this_locale";
393
394    log_verbose("BillFairmanTest\n");
395
396    lr = ures_open(NULL,lp,&lec);
397    if (lr) {
398        cr = ures_getByKey(lr,"collations",0,&lec);
399        if (cr) {
400            lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
401            if (lp) {
402                if (U_SUCCESS(lec)) {
403                    if(strcmp(lp, "fr") != 0) {
404                        log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
405                    }
406                }
407            }
408            ures_close(cr);
409        }
410        ures_close(lr);
411    }
412}
413
414const static char chTest[][20] = {
415  "c",
416  "C",
417  "ca", "cb", "cx", "cy", "CZ",
418  "c\\u030C", "C\\u030C",
419  "h",
420  "H",
421  "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
422  "ch", "cH", "Ch", "CH",
423  "cha", "charly", "che", "chh", "chch", "chr",
424  "i", "I", "iarly",
425  "r", "R",
426  "r\\u030C", "R\\u030C",
427  "s",
428  "S",
429  "s\\u030C", "S\\u030C",
430  "z", "Z",
431  "z\\u030C", "Z\\u030C"
432};
433
434static void TestChMove(void) {
435    UChar t1[256] = {0};
436    UChar t2[256] = {0};
437
438    uint32_t i = 0, j = 0;
439    uint32_t size = 0;
440    UErrorCode status = U_ZERO_ERROR;
441
442    UCollator *coll = ucol_open("cs", &status);
443
444    if(U_SUCCESS(status)) {
445        size = UPRV_LENGTHOF(chTest);
446        for(i = 0; i < size-1; i++) {
447            for(j = i+1; j < size; j++) {
448                u_unescape(chTest[i], t1, 256);
449                u_unescape(chTest[j], t2, 256);
450                doTest(coll, t1, t2, UCOL_LESS);
451            }
452        }
453    }
454    else {
455        log_data_err("Can't open collator");
456    }
457    ucol_close(coll);
458}
459
460
461
462
463/*
464const static char impTest[][20] = {
465  "\\u4e00",
466    "a",
467    "A",
468    "b",
469    "B",
470    "\\u4e01"
471};
472*/
473
474
475static void TestImplicitTailoring(void) {
476  static const struct {
477    const char *rules;
478    const char *data[10];
479    const uint32_t len;
480  } tests[] = {
481      {
482        /* Tailor b and c before U+4E00. */
483        "&[before 1]\\u4e00 < b < c "
484        /* Now, before U+4E00 is c; put d and e after that. */
485        "&[before 1]\\u4e00 < d < e",
486        { "b", "c", "d", "e", "\\u4e00"}, 5 },
487      { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
488      { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
489      { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
490  };
491
492  int32_t i = 0;
493
494  for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
495      genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
496  }
497
498/*
499  UChar t1[256] = {0};
500  UChar t2[256] = {0};
501
502  const char *rule = "&\\u4e00 < a <<< A < b <<< B";
503
504  uint32_t i = 0, j = 0;
505  uint32_t size = 0;
506  uint32_t ruleLen = 0;
507  UErrorCode status = U_ZERO_ERROR;
508  UCollator *coll = NULL;
509  ruleLen = u_unescape(rule, t1, 256);
510
511  coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
512
513  if(U_SUCCESS(status)) {
514    size = UPRV_LENGTHOF(impTest);
515    for(i = 0; i < size-1; i++) {
516      for(j = i+1; j < size; j++) {
517        u_unescape(impTest[i], t1, 256);
518        u_unescape(impTest[j], t2, 256);
519        doTest(coll, t1, t2, UCOL_LESS);
520      }
521    }
522  }
523  else {
524    log_err("Can't open collator");
525  }
526  ucol_close(coll);
527  */
528}
529
530static void TestFCDProblem(void) {
531  UChar t1[256] = {0};
532  UChar t2[256] = {0};
533
534  const char *s1 = "\\u0430\\u0306\\u0325";
535  const char *s2 = "\\u04D1\\u0325";
536
537  UErrorCode status = U_ZERO_ERROR;
538  UCollator *coll = ucol_open("", &status);
539  u_unescape(s1, t1, 256);
540  u_unescape(s2, t2, 256);
541
542  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
543  doTest(coll, t1, t2, UCOL_EQUAL);
544
545  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
546  doTest(coll, t1, t2, UCOL_EQUAL);
547
548  ucol_close(coll);
549}
550
551/*
552The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
553We're only using NFC/NFD in this test.
554*/
555#define NORM_BUFFER_TEST_LEN 18
556typedef struct {
557  UChar32 u;
558  UChar NFC[NORM_BUFFER_TEST_LEN];
559  UChar NFD[NORM_BUFFER_TEST_LEN];
560} tester;
561
562static void TestComposeDecompose(void) {
563    /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
564    static const UChar UNICODESET_STR[] = {
565        0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
566        0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
567        0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
568    };
569    int32_t noOfLoc;
570    int32_t i = 0, j = 0;
571
572    UErrorCode status = U_ZERO_ERROR;
573    const char *locName = NULL;
574    uint32_t nfcSize;
575    uint32_t nfdSize;
576    tester **t;
577    uint32_t noCases = 0;
578    UCollator *coll = NULL;
579    UChar32 u = 0;
580    UChar comp[NORM_BUFFER_TEST_LEN];
581    uint32_t len = 0;
582    UCollationElements *iter;
583    USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
584    int32_t charsToTestSize;
585
586    noOfLoc = uloc_countAvailable();
587
588    coll = ucol_open("", &status);
589    if (U_FAILURE(status)) {
590        log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
591        return;
592    }
593    charsToTestSize = uset_size(charsToTest);
594    if (charsToTestSize <= 0) {
595        log_err("Set was zero. Missing data?\n");
596        return;
597    }
598    t = (tester **)malloc(charsToTestSize * sizeof(tester *));
599    t[0] = (tester *)malloc(sizeof(tester));
600    log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
601
602    for(u = 0; u < charsToTestSize; u++) {
603        UChar32 ch = uset_charAt(charsToTest, u);
604        len = 0;
605        U16_APPEND_UNSAFE(comp, len, ch);
606        nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
607        nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
608
609        if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
610          || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
611            t[noCases]->u = ch;
612            if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
613                u_strncpy(t[noCases]->NFC, comp, len);
614                t[noCases]->NFC[len] = 0;
615            }
616            noCases++;
617            t[noCases] = (tester *)malloc(sizeof(tester));
618            uprv_memset(t[noCases], 0, sizeof(tester));
619        }
620    }
621    log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
622    uset_close(charsToTest);
623    charsToTest = NULL;
624
625    for(u=0; u<(UChar32)noCases; u++) {
626        if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
627            log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
628            doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
629        }
630    }
631    /*
632    for(u = 0; u < charsToTestSize; u++) {
633      if(!(u&0xFFFF)) {
634        log_verbose("%08X ", u);
635      }
636      uprv_memset(t[noCases], 0, sizeof(tester));
637      t[noCases]->u = u;
638      len = 0;
639      U16_APPEND_UNSAFE(comp, len, u);
640      comp[len] = 0;
641      nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
642      nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
643      doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
644      doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
645    }
646    */
647
648    ucol_close(coll);
649
650    log_verbose("Testing locales, number of cases = %i\n", noCases);
651    for(i = 0; i<noOfLoc; i++) {
652        status = U_ZERO_ERROR;
653        locName = uloc_getAvailable(i);
654        if(hasCollationElements(locName)) {
655            char cName[256];
656            UChar name[256];
657            int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
658
659            for(j = 0; j<nameSize; j++) {
660                cName[j] = (char)name[j];
661            }
662            cName[nameSize] = 0;
663            log_verbose("\nTesting locale %s (%s)\n", locName, cName);
664
665            coll = ucol_open(locName, &status);
666            ucol_setStrength(coll, UCOL_IDENTICAL);
667            iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
668
669            for(u=0; u<(UChar32)noCases; u++) {
670                if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
671                    log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
672                    doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
673                    log_verbose("Testing NFC\n");
674                    ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
675                    backAndForth(iter);
676                    log_verbose("Testing NFD\n");
677                    ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
678                    backAndForth(iter);
679                }
680            }
681            ucol_closeElements(iter);
682            ucol_close(coll);
683        }
684    }
685    for(u = 0; u <= (UChar32)noCases; u++) {
686        free(t[u]);
687    }
688    free(t);
689}
690
691static void TestEmptyRule(void) {
692  UErrorCode status = U_ZERO_ERROR;
693  UChar rulez[] = { 0 };
694  UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
695
696  ucol_close(coll);
697}
698
699static void TestUCARules(void) {
700  UErrorCode status = U_ZERO_ERROR;
701  UChar b[256];
702  UChar *rules = b;
703  uint32_t ruleLen = 0;
704  UCollator *UCAfromRules = NULL;
705  UCollator *coll = ucol_open("", &status);
706  if(status == U_FILE_ACCESS_ERROR) {
707    log_data_err("Is your data around?\n");
708    return;
709  } else if(U_FAILURE(status)) {
710    log_err("Error opening collator\n");
711    return;
712  }
713  ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
714
715  log_verbose("TestUCARules\n");
716  if(ruleLen > 256) {
717    rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
718    ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
719  }
720  log_verbose("Rules length is %d\n", ruleLen);
721  UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
722  if(U_SUCCESS(status)) {
723    ucol_close(UCAfromRules);
724  } else {
725    log_verbose("Unable to create a collator from UCARules!\n");
726  }
727/*
728  u_unescape(blah, b, 256);
729  ucol_getSortKey(coll, b, 1, res, 256);
730*/
731  ucol_close(coll);
732  if(rules != b) {
733    free(rules);
734  }
735}
736
737
738/* Pinyin tonal order */
739/*
740    A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
741          (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
742    E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
743    I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
744    O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
745    U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
746      < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
747.. (\u00fc)
748
749However, in testing we got the following order:
750    A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
751          (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
752    E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
753.. (\u0113)
754    I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
755    O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
756    U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
757.. (\u01d8)
758      < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
759*/
760
761static void TestBefore(void) {
762  const static char *data[] = {
763      "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
764      "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
765      "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
766      "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
767      "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
768      "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
769  };
770  genericRulesStarter(
771    "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
772    "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
773    "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
774    "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
775    "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
776    "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
777    data, UPRV_LENGTHOF(data));
778}
779
780#if 0
781/* superceded by TestBeforePinyin */
782static void TestJ784(void) {
783  const static char *data[] = {
784      "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
785      "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
786      "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
787      "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
788      "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
789      "\\u00fc",
790           "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
791  };
792  genericLocaleStarter("zh", data, UPRV_LENGTHOF(data));
793}
794#endif
795
796#if 0
797/* superceded by the changes to the lv locale */
798static void TestJ831(void) {
799  const static char *data[] = {
800    "I",
801      "i",
802      "Y",
803      "y"
804  };
805  genericLocaleStarter("lv", data, UPRV_LENGTHOF(data));
806}
807#endif
808
809static void TestJ815(void) {
810  const static char *data[] = {
811    "aa",
812      "Aa",
813      "ab",
814      "Ab",
815      "ad",
816      "Ad",
817      "ae",
818      "Ae",
819      "\\u00e6",
820      "\\u00c6",
821      "af",
822      "Af",
823      "b",
824      "B"
825  };
826  genericLocaleStarter("fr", data, UPRV_LENGTHOF(data));
827  genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, UPRV_LENGTHOF(data));
828}
829
830
831static void TestCase(void)
832{
833    const static UChar gRules[MAX_TOKEN_LEN] =
834    /*" & 0 < 1,\u2461<a,A"*/
835    { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
836
837    const static UChar testCase[][MAX_TOKEN_LEN] =
838    {
839        /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
840        /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
841        /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
842        /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
843    };
844
845    const static UCollationResult caseTestResults[][9] =
846    {
847        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
848        { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
849        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
850        { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
851    };
852
853    const static UColAttributeValue caseTestAttributes[][2] =
854    {
855        { UCOL_LOWER_FIRST, UCOL_OFF},
856        { UCOL_UPPER_FIRST, UCOL_OFF},
857        { UCOL_LOWER_FIRST, UCOL_ON},
858        { UCOL_UPPER_FIRST, UCOL_ON}
859    };
860    int32_t i,j,k;
861    UErrorCode status = U_ZERO_ERROR;
862    UCollationElements *iter;
863    UCollator  *myCollation;
864    myCollation = ucol_open("en_US", &status);
865
866    if(U_FAILURE(status)){
867        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
868        return;
869    }
870    log_verbose("Testing different case settings\n");
871    ucol_setStrength(myCollation, UCOL_TERTIARY);
872
873    for(k = 0; k<4; k++) {
874      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
875      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
876      log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
877      for (i = 0; i < 3 ; i++) {
878        for(j = i+1; j<4; j++) {
879          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
880        }
881      }
882    }
883    ucol_close(myCollation);
884
885    myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
886    if(U_FAILURE(status)){
887        log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
888        return;
889    }
890    log_verbose("Testing different case settings with custom rules\n");
891    ucol_setStrength(myCollation, UCOL_TERTIARY);
892
893    for(k = 0; k<4; k++) {
894      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
895      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
896      for (i = 0; i < 3 ; i++) {
897        for(j = i+1; j<4; j++) {
898          log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
899          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
900          iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
901          backAndForth(iter);
902          ucol_closeElements(iter);
903          iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
904          backAndForth(iter);
905          ucol_closeElements(iter);
906        }
907      }
908    }
909    ucol_close(myCollation);
910    {
911      const static char *lowerFirst[] = {
912        "h",
913        "H",
914        "ch",
915        "Ch",
916        "CH",
917        "cha",
918        "chA",
919        "Cha",
920        "ChA",
921        "CHa",
922        "CHA",
923        "i",
924        "I"
925      };
926
927      const static char *upperFirst[] = {
928        "H",
929        "h",
930        "CH",
931        "Ch",
932        "ch",
933        "CHA",
934        "CHa",
935        "ChA",
936        "Cha",
937        "chA",
938        "cha",
939        "I",
940        "i"
941      };
942      log_verbose("mixed case test\n");
943      log_verbose("lower first, case level off\n");
944      genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
945      log_verbose("upper first, case level off\n");
946      genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
947      log_verbose("lower first, case level on\n");
948      genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, UPRV_LENGTHOF(lowerFirst));
949      log_verbose("upper first, case level on\n");
950      genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, UPRV_LENGTHOF(upperFirst));
951    }
952
953}
954
955static void TestIncrementalNormalize(void) {
956
957    /*UChar baseA     =0x61;*/
958    UChar baseA     =0x41;
959/*    UChar baseB     = 0x42;*/
960    static const UChar ccMix[]   = {0x316, 0x321, 0x300};
961    /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
962    /*
963        0x316 is combining grave accent below, cc=220
964        0x321 is combining palatalized hook below, cc=202
965        0x300 is combining grave accent, cc=230
966    */
967
968#define MAXSLEN 2000
969    /*int          maxSLen   = 64000;*/
970    int          sLen;
971    int          i;
972
973    UCollator        *coll;
974    UErrorCode       status = U_ZERO_ERROR;
975    UCollationResult result;
976
977    int32_t myQ = getTestOption(QUICK_OPTION);
978
979    if(getTestOption(QUICK_OPTION) < 0) {
980        setTestOption(QUICK_OPTION, 1);
981    }
982
983    {
984        /* Test 1.  Run very long unnormalized strings, to force overflow of*/
985        /*          most buffers along the way.*/
986        UChar            strA[MAXSLEN+1];
987        UChar            strB[MAXSLEN+1];
988
989        coll = ucol_open("en_US", &status);
990        if(status == U_FILE_ACCESS_ERROR) {
991          log_data_err("Is your data around?\n");
992          return;
993        } else if(U_FAILURE(status)) {
994          log_err("Error opening collator\n");
995          return;
996        }
997        ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
998
999        /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
1000        /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
1001        /*for (sLen = 1000; sLen<1001; sLen++) {*/
1002        for (sLen = 500; sLen<501; sLen++) {
1003        /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
1004            strA[0] = baseA;
1005            strB[0] = baseA;
1006            for (i=1; i<=sLen-1; i++) {
1007                strA[i] = ccMix[i % 3];
1008                strB[sLen-i] = ccMix[i % 3];
1009            }
1010            strA[sLen]   = 0;
1011            strB[sLen]   = 0;
1012
1013            ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
1014            doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
1015            ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
1016            doTest(coll, strA, strB, UCOL_EQUAL);
1017        }
1018    }
1019
1020    setTestOption(QUICK_OPTION, myQ);
1021
1022
1023    /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
1024    /*         of the string.  Checks a couple of edge cases.*/
1025
1026    {
1027        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
1028        static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
1029        ucol_setStrength(coll, UCOL_TERTIARY);
1030        doTest(coll, strA, strB, UCOL_EQUAL);
1031    }
1032
1033    /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
1034
1035    {
1036      /* New UCA  3.1.1.
1037       * test below used a code point from Desseret, which sorts differently
1038       * than d800 dc00
1039       */
1040        /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
1041        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
1042        static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
1043        ucol_setStrength(coll, UCOL_TERTIARY);
1044        doTest(coll, strA, strB, UCOL_GREATER);
1045    }
1046
1047    /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
1048
1049    {
1050        static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
1051        static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
1052        char  sortKeyA[50];
1053        char  sortKeyAz[50];
1054        char  sortKeyB[50];
1055        char  sortKeyBz[50];
1056        int   r;
1057
1058        /* there used to be -3 here. Hmmmm.... */
1059        /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
1060        result = ucol_strcoll(coll, strA, 3, strB, 3);
1061        if (result != UCOL_GREATER) {
1062            log_err("ERROR 1 in test 4\n");
1063        }
1064        result = ucol_strcoll(coll, strA, -1, strB, -1);
1065        if (result != UCOL_EQUAL) {
1066            log_err("ERROR 2 in test 4\n");
1067        }
1068
1069        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1070        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1071        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1072        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1073
1074        r = strcmp(sortKeyA, sortKeyAz);
1075        if (r <= 0) {
1076            log_err("Error 3 in test 4\n");
1077        }
1078        r = strcmp(sortKeyA, sortKeyB);
1079        if (r <= 0) {
1080            log_err("Error 4 in test 4\n");
1081        }
1082        r = strcmp(sortKeyAz, sortKeyBz);
1083        if (r != 0) {
1084            log_err("Error 5 in test 4\n");
1085        }
1086
1087        ucol_setStrength(coll, UCOL_IDENTICAL);
1088        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1089        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1090        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1091        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1092
1093        r = strcmp(sortKeyA, sortKeyAz);
1094        if (r <= 0) {
1095            log_err("Error 6 in test 4\n");
1096        }
1097        r = strcmp(sortKeyA, sortKeyB);
1098        if (r <= 0) {
1099            log_err("Error 7 in test 4\n");
1100        }
1101        r = strcmp(sortKeyAz, sortKeyBz);
1102        if (r != 0) {
1103            log_err("Error 8 in test 4\n");
1104        }
1105        ucol_setStrength(coll, UCOL_TERTIARY);
1106    }
1107
1108
1109    /*  Test 5:  Null characters in non-normal source strings.*/
1110
1111    {
1112        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
1113        static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
1114        char  sortKeyA[50];
1115        char  sortKeyAz[50];
1116        char  sortKeyB[50];
1117        char  sortKeyBz[50];
1118        int   r;
1119
1120        result = ucol_strcoll(coll, strA, 6, strB, 6);
1121        if (result != UCOL_GREATER) {
1122            log_err("ERROR 1 in test 5\n");
1123        }
1124        result = ucol_strcoll(coll, strA, -1, strB, -1);
1125        if (result != UCOL_EQUAL) {
1126            log_err("ERROR 2 in test 5\n");
1127        }
1128
1129        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1130        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1131        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1132        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1133
1134        r = strcmp(sortKeyA, sortKeyAz);
1135        if (r <= 0) {
1136            log_err("Error 3 in test 5\n");
1137        }
1138        r = strcmp(sortKeyA, sortKeyB);
1139        if (r <= 0) {
1140            log_err("Error 4 in test 5\n");
1141        }
1142        r = strcmp(sortKeyAz, sortKeyBz);
1143        if (r != 0) {
1144            log_err("Error 5 in test 5\n");
1145        }
1146
1147        ucol_setStrength(coll, UCOL_IDENTICAL);
1148        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1149        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1150        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1151        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1152
1153        r = strcmp(sortKeyA, sortKeyAz);
1154        if (r <= 0) {
1155            log_err("Error 6 in test 5\n");
1156        }
1157        r = strcmp(sortKeyA, sortKeyB);
1158        if (r <= 0) {
1159            log_err("Error 7 in test 5\n");
1160        }
1161        r = strcmp(sortKeyAz, sortKeyBz);
1162        if (r != 0) {
1163            log_err("Error 8 in test 5\n");
1164        }
1165        ucol_setStrength(coll, UCOL_TERTIARY);
1166    }
1167
1168
1169    /*  Test 6:  Null character as base of a non-normal combining sequence.*/
1170
1171    {
1172        static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
1173        static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
1174
1175        result = ucol_strcoll(coll, strA, 5, strB, 5);
1176        if (result != UCOL_LESS) {
1177            log_err("Error 1 in test 6\n");
1178        }
1179        result = ucol_strcoll(coll, strA, -1, strB, -1);
1180        if (result != UCOL_EQUAL) {
1181            log_err("Error 2 in test 6\n");
1182        }
1183    }
1184
1185    ucol_close(coll);
1186}
1187
1188
1189
1190#if 0
1191static void TestGetCaseBit(void) {
1192  static const char *caseBitData[] = {
1193    "a", "A", "ch", "Ch", "CH",
1194      "\\uFF9E", "\\u0009"
1195  };
1196
1197  static const uint8_t results[] = {
1198    UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
1199      UCOL_UPPER_CASE, UCOL_LOWER_CASE
1200  };
1201
1202  uint32_t i, blen = 0;
1203  UChar b[256] = {0};
1204  UErrorCode status = U_ZERO_ERROR;
1205  UCollator *UCA = ucol_open("", &status);
1206  uint8_t res = 0;
1207
1208  for(i = 0; i<UPRV_LENGTHOF(results); i++) {
1209    blen = u_unescape(caseBitData[i], b, 256);
1210    res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
1211    if(results[i] != res) {
1212      log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
1213    }
1214  }
1215}
1216#endif
1217
1218static void TestHangulTailoring(void) {
1219    static const char *koreanData[] = {
1220        "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
1221            "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
1222            "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
1223            "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
1224            "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
1225            "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
1226    };
1227
1228    const char *rules =
1229        "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
1230        "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
1231        "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
1232        "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
1233        "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
1234        "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
1235
1236
1237  UErrorCode status = U_ZERO_ERROR;
1238  UChar rlz[2048] = { 0 };
1239  uint32_t rlen = u_unescape(rules, rlz, 2048);
1240
1241  UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
1242  if(status == U_FILE_ACCESS_ERROR) {
1243    log_data_err("Is your data around?\n");
1244    return;
1245  } else if(U_FAILURE(status)) {
1246    log_err("Error opening collator\n");
1247    return;
1248  }
1249
1250  log_verbose("Using start of korean rules\n");
1251
1252  if(U_SUCCESS(status)) {
1253    genericOrderingTest(coll, koreanData, UPRV_LENGTHOF(koreanData));
1254  } else {
1255    log_err("Unable to open collator with rules %s\n", rules);
1256  }
1257
1258  ucol_close(coll);
1259
1260  log_verbose("Using ko__LOTUS locale\n");
1261  genericLocaleStarter("ko__LOTUS", koreanData, UPRV_LENGTHOF(koreanData));
1262}
1263
1264/*
1265 * The secondary/tertiary compression middle byte
1266 * as used by the current implementation.
1267 * Subject to change as the sort key compression changes.
1268 * See class CollationKeys.
1269 */
1270enum {
1271    SEC_COMMON_MIDDLE = 0x25,  /* range 05..45 */
1272    TER_ONLY_COMMON_MIDDLE = 0x65  /* range 05..C5 */
1273};
1274
1275static void TestCompressOverlap(void) {
1276    UChar       secstr[150];
1277    UChar       tertstr[150];
1278    UErrorCode  status = U_ZERO_ERROR;
1279    UCollator  *coll;
1280    uint8_t     result[500];
1281    uint32_t    resultlen;
1282    int         count = 0;
1283    uint8_t    *tempptr;
1284
1285    coll = ucol_open("", &status);
1286
1287    if (U_FAILURE(status)) {
1288        log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
1289        return;
1290    }
1291    while (count < 149) {
1292        secstr[count] = 0x0020; /* [06, 05, 05] */
1293        tertstr[count] = 0x0020;
1294        count ++;
1295    }
1296
1297    /* top down compression ----------------------------------- */
1298    secstr[count] = 0x0332; /* [, 87, 05] */
1299    tertstr[count] = 0x3000; /* [06, 05, 07] */
1300
1301    /* no compression secstr should have 150 secondary bytes, tertstr should
1302    have 150 tertiary bytes.
1303    with correct compression, secstr should have 6 secondary
1304    bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
1305    resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1306    (void)resultlen;    /* Suppress set but not used warning. */
1307    tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1308    while (*(tempptr + 1) != 1) {
1309        /* the last secondary collation element is not checked since it is not
1310        part of the compression */
1311        if (*tempptr < SEC_COMMON_MIDDLE) {
1312            log_err("Secondary top down compression overlapped\n");
1313        }
1314        tempptr ++;
1315    }
1316
1317    /* tertiary top/bottom/common for en_US is similar to the secondary
1318    top/bottom/common */
1319    resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1320    tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1321    while (*(tempptr + 1) != 0) {
1322        /* the last secondary collation element is not checked since it is not
1323        part of the compression */
1324        if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
1325            log_err("Tertiary top down compression overlapped\n");
1326        }
1327        tempptr ++;
1328    }
1329
1330    /* bottom up compression ------------------------------------- */
1331    secstr[count] = 0;
1332    tertstr[count] = 0;
1333    resultlen = ucol_getSortKey(coll, secstr, 150, result, UPRV_LENGTHOF(result));
1334    tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1335    while (*(tempptr + 1) != 1) {
1336        /* the last secondary collation element is not checked since it is not
1337        part of the compression */
1338        if (*tempptr > SEC_COMMON_MIDDLE) {
1339            log_err("Secondary bottom up compression overlapped\n");
1340        }
1341        tempptr ++;
1342    }
1343
1344    /* tertiary top/bottom/common for en_US is similar to the secondary
1345    top/bottom/common */
1346    resultlen = ucol_getSortKey(coll, tertstr, 150, result, UPRV_LENGTHOF(result));
1347    tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1348    while (*(tempptr + 1) != 0) {
1349        /* the last secondary collation element is not checked since it is not
1350        part of the compression */
1351        if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
1352            log_err("Tertiary bottom up compression overlapped\n");
1353        }
1354        tempptr ++;
1355    }
1356
1357    ucol_close(coll);
1358}
1359
1360static void TestCyrillicTailoring(void) {
1361  static const char *test[] = {
1362    "\\u0410b",
1363      "\\u0410\\u0306a",
1364      "\\u04d0A"
1365  };
1366
1367    /* Russian overrides contractions, so this test is not valid anymore */
1368    /*genericLocaleStarter("ru", test, 3);*/
1369
1370    // Most of the following are commented out because UCA 8.0
1371    // drops most of the Cyrillic contractions from the default order.
1372    // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".
1373
1374    // genericLocaleStarter("root", test, 3);
1375    // genericRulesStarter("&\\u0410 = \\u0410", test, 3);
1376    // genericRulesStarter("&Z < \\u0410", test, 3);
1377    genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
1378    genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
1379    // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
1380    // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
1381}
1382
1383static void TestSuppressContractions(void) {
1384
1385  static const char *testNoCont2[] = {
1386      "\\u0410\\u0302a",
1387      "\\u0410\\u0306b",
1388      "\\u0410c"
1389  };
1390  static const char *testNoCont[] = {
1391      "a\\u0410",
1392      "A\\u0410\\u0306",
1393      "\\uFF21\\u0410\\u0302"
1394  };
1395
1396  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
1397  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
1398}
1399
1400static void TestContraction(void) {
1401    const static char *testrules[] = {
1402        "&A = AB / B",
1403        "&A = A\\u0306/\\u0306",
1404        "&c = ch / h"
1405    };
1406    const static UChar testdata[][2] = {
1407        {0x0041 /* 'A' */, 0x0042 /* 'B' */},
1408        {0x0041 /* 'A' */, 0x0306 /* combining breve */},
1409        {0x0063 /* 'c' */, 0x0068 /* 'h' */}
1410    };
1411    const static UChar testdata2[][2] = {
1412        {0x0063 /* 'c' */, 0x0067 /* 'g' */},
1413        {0x0063 /* 'c' */, 0x0068 /* 'h' */},
1414        {0x0063 /* 'c' */, 0x006C /* 'l' */}
1415    };
1416#if 0
1417    /*
1418     * These pairs of rule strings are not guaranteed to yield the very same mappings.
1419     * In fact, LDML 24 recommends an improved way of creating mappings
1420     * which always yields different mappings for such pairs. See
1421     * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1422     */
1423    const static char *testrules3[] = {
1424        "&z < xyz &xyzw << B",
1425        "&z < xyz &xyz << B / w",
1426        "&z < ch &achm << B",
1427        "&z < ch &a << B / chm",
1428        "&\\ud800\\udc00w << B",
1429        "&\\ud800\\udc00 << B / w",
1430        "&a\\ud800\\udc00m << B",
1431        "&a << B / \\ud800\\udc00m",
1432    };
1433#endif
1434
1435    UErrorCode  status   = U_ZERO_ERROR;
1436    UCollator  *coll;
1437    UChar       rule[256] = {0};
1438    uint32_t    rlen     = 0;
1439    int         i;
1440
1441    for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1442        UCollationElements *iter1;
1443        int j = 0;
1444        log_verbose("Rule %s for testing\n", testrules[i]);
1445        rlen = u_unescape(testrules[i], rule, 32);
1446        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1447        if (U_FAILURE(status)) {
1448            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1449            return;
1450        }
1451        iter1 = ucol_openElements(coll, testdata[i], 2, &status);
1452        if (U_FAILURE(status)) {
1453            log_err("Collation iterator creation failed\n");
1454            return;
1455        }
1456        while (j < 2) {
1457            UCollationElements *iter2 = ucol_openElements(coll,
1458                                                         &(testdata[i][j]),
1459                                                         1, &status);
1460            uint32_t ce;
1461            if (U_FAILURE(status)) {
1462                log_err("Collation iterator creation failed\n");
1463                return;
1464            }
1465            ce = ucol_next(iter2, &status);
1466            while (ce != UCOL_NULLORDER) {
1467                if ((uint32_t)ucol_next(iter1, &status) != ce) {
1468                    log_err("Collation elements in contraction split does not match\n");
1469                    return;
1470                }
1471                ce = ucol_next(iter2, &status);
1472            }
1473            j ++;
1474            ucol_closeElements(iter2);
1475        }
1476        if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
1477            log_err("Collation elements not exhausted\n");
1478            return;
1479        }
1480        ucol_closeElements(iter1);
1481        ucol_close(coll);
1482    }
1483
1484    rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
1485    coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1486    if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
1487        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1488                testdata2[0][0], testdata2[0][1], testdata2[1][0],
1489                testdata2[1][1]);
1490        return;
1491    }
1492    if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
1493        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1494                testdata2[1][0], testdata2[1][1], testdata2[2][0],
1495                testdata2[2][1]);
1496        return;
1497    }
1498    ucol_close(coll);
1499#if 0  /* see above */
1500    for (i = 0; i < UPRV_LENGTHOF(testrules3); i += 2) {
1501        log_verbose("testrules3 i==%d  \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
1502        UCollator          *coll1,
1503                           *coll2;
1504        UCollationElements *iter1,
1505                           *iter2;
1506        UChar               ch = 0x0042 /* 'B' */;
1507        uint32_t            ce;
1508        rlen = u_unescape(testrules3[i], rule, 32);
1509        coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1510        rlen = u_unescape(testrules3[i + 1], rule, 32);
1511        coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1512        if (U_FAILURE(status)) {
1513            log_err("Collator creation failed %s\n", testrules[i]);
1514            return;
1515        }
1516        iter1 = ucol_openElements(coll1, &ch, 1, &status);
1517        iter2 = ucol_openElements(coll2, &ch, 1, &status);
1518        if (U_FAILURE(status)) {
1519            log_err("Collation iterator creation failed\n");
1520            return;
1521        }
1522        ce = ucol_next(iter1, &status);
1523        if (U_FAILURE(status)) {
1524            log_err("Retrieving ces failed\n");
1525            return;
1526        }
1527        while (ce != UCOL_NULLORDER) {
1528            uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
1529            if (ce == ce2) {
1530                log_verbose("CEs match: %08x\n", ce);
1531            } else {
1532                log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
1533                return;
1534            }
1535            ce = ucol_next(iter1, &status);
1536            if (U_FAILURE(status)) {
1537                log_err("Retrieving ces failed\n");
1538                return;
1539            }
1540        }
1541        if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
1542            log_err("CEs not exhausted\n");
1543            return;
1544        }
1545        ucol_closeElements(iter1);
1546        ucol_closeElements(iter2);
1547        ucol_close(coll1);
1548        ucol_close(coll2);
1549    }
1550#endif
1551}
1552
1553static void TestExpansion(void) {
1554    const static char *testrules[] = {
1555#if 0
1556        /*
1557         * This seems to have tested that M was not mapped to an expansion.
1558         * I believe the old builder just did that because it computed the extension CEs
1559         * at the very end, which was a bug.
1560         * Among other problems, it violated the core tailoring principle
1561         * by making an earlier rule depend on a later one.
1562         * And, of course, if M did not get an expansion, then it was primary different from K,
1563         * unlike what the rule &K<<M says.
1564         */
1565        "&J << K / B & K << M",
1566#endif
1567        "&J << K / B << M"
1568    };
1569    const static UChar testdata[][3] = {
1570        {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
1571        {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
1572        {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
1573        {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
1574        {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
1575        {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
1576    };
1577
1578    UErrorCode  status   = U_ZERO_ERROR;
1579    UCollator  *coll;
1580    UChar       rule[256] = {0};
1581    uint32_t    rlen     = 0;
1582    int         i;
1583
1584    for (i = 0; i < UPRV_LENGTHOF(testrules); i ++) {
1585        int j = 0;
1586        log_verbose("Rule %s for testing\n", testrules[i]);
1587        rlen = u_unescape(testrules[i], rule, 32);
1588        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1589        if (U_FAILURE(status)) {
1590            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1591            return;
1592        }
1593
1594        for (j = 0; j < 5; j ++) {
1595            doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
1596        }
1597        ucol_close(coll);
1598    }
1599}
1600
1601#if 0
1602/* this test tests the current limitations of the engine */
1603/* it always fail, so it is disabled by default */
1604static void TestLimitations(void) {
1605  /* recursive expansions */
1606  {
1607    static const char *rule = "&a=b/c&d=c/e";
1608    static const char *tlimit01[] = {"add","b","adf"};
1609    static const char *tlimit02[] = {"aa","b","af"};
1610    log_verbose("recursive expansions\n");
1611    genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1612    genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1613  }
1614  /* contractions spanning expansions */
1615  {
1616    static const char *rule = "&a<<<c/e&g<<<eh";
1617    static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
1618    static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
1619    log_verbose("contractions spanning expansions\n");
1620    genericRulesStarter(rule, tlimit01, UPRV_LENGTHOF(tlimit01));
1621    genericRulesStarter(rule, tlimit02, UPRV_LENGTHOF(tlimit02));
1622  }
1623  /* normalization: nulls in contractions */
1624  {
1625    static const char *rule = "&a<<<\\u0000\\u0302";
1626    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1627    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1628    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1629    static const UColAttributeValue valOn[] = { UCOL_ON };
1630    static const UColAttributeValue valOff[] = { UCOL_OFF };
1631
1632    log_verbose("NULL in contractions\n");
1633    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1634    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1635    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1636    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1637
1638  }
1639  /* normalization: contractions spanning normalization */
1640  {
1641    static const char *rule = "&a<<<\\u0000\\u0302";
1642    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1643    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1644    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1645    static const UColAttributeValue valOn[] = { UCOL_ON };
1646    static const UColAttributeValue valOff[] = { UCOL_OFF };
1647
1648    log_verbose("contractions spanning normalization\n");
1649    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1650    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1651    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1652    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1653
1654  }
1655  /* variable top:  */
1656  {
1657    /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
1658    static const char *rule = "&\\u2010<x<[variable top]=z";
1659    /*static const char *rule3 = "&' '<x<[variable top]=z";*/
1660    static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
1661    static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
1662    static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
1663    static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
1664    static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
1665    static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
1666
1667    log_verbose("variable top\n");
1668    genericRulesStarterWithOptions(rule, tlimit03, UPRV_LENGTHOF(tlimit03), att, valOn, UPRV_LENGTHOF(att));
1669    genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1670    genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1671    genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));
1672    genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));
1673
1674  }
1675  /* case level */
1676  {
1677    static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
1678    static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
1679    static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
1680    static const UColAttribute att[] = { UCOL_CASE_FIRST};
1681    static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
1682    /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
1683    log_verbose("case level\n");
1684    genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOn, UPRV_LENGTHOF(att));
1685    genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOn, UPRV_LENGTHOF(att));
1686    /*genericRulesStarterWithOptions(rule, tlimit01, UPRV_LENGTHOF(tlimit01), att, valOff, UPRV_LENGTHOF(att));*/
1687    /*genericRulesStarterWithOptions(rule, tlimit02, UPRV_LENGTHOF(tlimit02), att, valOff, UPRV_LENGTHOF(att));*/
1688  }
1689
1690}
1691#endif
1692
1693static void TestBocsuCoverage(void) {
1694  UErrorCode status = U_ZERO_ERROR;
1695  const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
1696  UChar       test[256] = {0};
1697  uint32_t    tlen     = u_unescape(testString, test, 32);
1698  uint8_t key[256]     = {0};
1699  uint32_t klen         = 0;
1700
1701  UCollator *coll = ucol_open("", &status);
1702  if(U_SUCCESS(status)) {
1703  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
1704
1705  klen = ucol_getSortKey(coll, test, tlen, key, 256);
1706  (void)klen;    /* Suppress set but not used warning. */
1707
1708  ucol_close(coll);
1709  } else {
1710    log_data_err("Couldn't open UCA\n");
1711  }
1712}
1713
1714static void TestVariableTopSetting(void) {
1715  UErrorCode status = U_ZERO_ERROR;
1716  uint32_t varTopOriginal = 0, varTop1, varTop2;
1717  UCollator *coll = ucol_open("", &status);
1718  if(U_SUCCESS(status)) {
1719
1720  static const UChar nul = 0;
1721  static const UChar space = 0x20;
1722  static const UChar dot = 0x2e;  /* punctuation */
1723  static const UChar degree = 0xb0;  /* symbol */
1724  static const UChar dollar = 0x24;  /* currency symbol */
1725  static const UChar zero = 0x30;  /* digit */
1726
1727  varTopOriginal = ucol_getVariableTop(coll, &status);
1728  log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
1729  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1730
1731  varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1732  varTop2 = ucol_getVariableTop(coll, &status);
1733  log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
1734  if(U_FAILURE(status) || varTop1 != varTop2 ||
1735      !ucol_equal(coll, &nul, 0, &space, 1) ||
1736      ucol_equal(coll, &nul, 0, &dot, 1) ||
1737      ucol_equal(coll, &nul, 0, &degree, 1) ||
1738      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1739      ucol_equal(coll, &nul, 0, &zero, 1) ||
1740      ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1741    log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
1742  }
1743
1744  varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
1745  varTop2 = ucol_getVariableTop(coll, &status);
1746  log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
1747  if(U_FAILURE(status) || varTop1 != varTop2 ||
1748      !ucol_equal(coll, &nul, 0, &space, 1) ||
1749      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1750      ucol_equal(coll, &nul, 0, &degree, 1) ||
1751      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1752      ucol_equal(coll, &nul, 0, &zero, 1) ||
1753      ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1754    log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
1755  }
1756
1757  varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
1758  varTop2 = ucol_getVariableTop(coll, &status);
1759  log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
1760  if(U_FAILURE(status) || varTop1 != varTop2 ||
1761      !ucol_equal(coll, &nul, 0, &space, 1) ||
1762      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1763      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1764      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1765      ucol_equal(coll, &nul, 0, &zero, 1) ||
1766      ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1767    log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
1768  }
1769
1770  varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
1771  varTop2 = ucol_getVariableTop(coll, &status);
1772  log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
1773  if(U_FAILURE(status) || varTop1 != varTop2 ||
1774      !ucol_equal(coll, &nul, 0, &space, 1) ||
1775      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1776      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1777      !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1778      ucol_equal(coll, &nul, 0, &zero, 1) ||
1779      ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1780    log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
1781  }
1782
1783  log_verbose("Testing setting variable top to contractions\n");
1784  {
1785    UChar first[4] = { 0 };
1786    first[0] = 0x0040;
1787    first[1] = 0x0050;
1788    first[2] = 0x0000;
1789
1790    status = U_ZERO_ERROR;
1791    ucol_setVariableTop(coll, first, -1, &status);
1792
1793    if(U_SUCCESS(status)) {
1794      log_err("Invalid contraction succeded in setting variable top!\n");
1795    }
1796
1797  }
1798
1799  log_verbose("Test restoring variable top\n");
1800
1801  status = U_ZERO_ERROR;
1802  ucol_restoreVariableTop(coll, varTopOriginal, &status);
1803  if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
1804    log_err("Couldn't restore old variable top\n");
1805  }
1806
1807  log_verbose("Testing calling with error set\n");
1808
1809  status = U_INTERNAL_PROGRAM_ERROR;
1810  varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1811  varTop2 = ucol_getVariableTop(coll, &status);
1812  ucol_restoreVariableTop(coll, varTop2, &status);
1813  varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
1814  varTop2 = ucol_getVariableTop(NULL, &status);
1815  ucol_restoreVariableTop(NULL, varTop2, &status);
1816  if(status != U_INTERNAL_PROGRAM_ERROR) {
1817    log_err("Bad reaction to passed error!\n");
1818  }
1819  ucol_close(coll);
1820  } else {
1821    log_data_err("Couldn't open UCA collator\n");
1822  }
1823}
1824
1825static void TestMaxVariable() {
1826  UErrorCode status = U_ZERO_ERROR;
1827  UColReorderCode oldMax, max;
1828  UCollator *coll;
1829
1830  static const UChar nul = 0;
1831  static const UChar space = 0x20;
1832  static const UChar dot = 0x2e;  /* punctuation */
1833  static const UChar degree = 0xb0;  /* symbol */
1834  static const UChar dollar = 0x24;  /* currency symbol */
1835  static const UChar zero = 0x30;  /* digit */
1836
1837  coll = ucol_open("", &status);
1838  if(U_FAILURE(status)) {
1839    log_data_err("Couldn't open root collator\n");
1840    return;
1841  }
1842
1843  oldMax = ucol_getMaxVariable(coll);
1844  log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
1845  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1846
1847  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1848  max = ucol_getMaxVariable(coll);
1849  log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
1850  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
1851      !ucol_equal(coll, &nul, 0, &space, 1) ||
1852      ucol_equal(coll, &nul, 0, &dot, 1) ||
1853      ucol_equal(coll, &nul, 0, &degree, 1) ||
1854      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1855      ucol_equal(coll, &nul, 0, &zero, 1) ||
1856      ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1857    log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
1858  }
1859
1860  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
1861  max = ucol_getMaxVariable(coll);
1862  log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
1863  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
1864      !ucol_equal(coll, &nul, 0, &space, 1) ||
1865      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1866      ucol_equal(coll, &nul, 0, &degree, 1) ||
1867      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1868      ucol_equal(coll, &nul, 0, &zero, 1) ||
1869      ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1870    log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
1871  }
1872
1873  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
1874  max = ucol_getMaxVariable(coll);
1875  log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
1876  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
1877      !ucol_equal(coll, &nul, 0, &space, 1) ||
1878      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1879      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1880      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1881      ucol_equal(coll, &nul, 0, &zero, 1) ||
1882      ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1883    log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
1884  }
1885
1886  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
1887  max = ucol_getMaxVariable(coll);
1888  log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
1889  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
1890      !ucol_equal(coll, &nul, 0, &space, 1) ||
1891      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1892      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1893      !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1894      ucol_equal(coll, &nul, 0, &zero, 1) ||
1895      ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1896    log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
1897  }
1898
1899  log_verbose("Test restoring maxVariable\n");
1900  status = U_ZERO_ERROR;
1901  ucol_setMaxVariable(coll, oldMax, &status);
1902  if(oldMax != ucol_getMaxVariable(coll)) {
1903    log_err("Couldn't restore old maxVariable\n");
1904  }
1905
1906  log_verbose("Testing calling with error set\n");
1907  status = U_INTERNAL_PROGRAM_ERROR;
1908  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1909  max = ucol_getMaxVariable(coll);
1910  if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
1911    log_err("Bad reaction to passed error!\n");
1912  }
1913  ucol_close(coll);
1914}
1915
1916static void TestNonChars(void) {
1917  static const char *test[] = {
1918      "\\u0000",  /* ignorable */
1919      "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
1920      "\\uFDD0", "\\uFDEF",
1921      "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
1922      "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
1923      "\\U0003FFFE", "\\U0003FFFF",
1924      "\\U0004FFFE", "\\U0004FFFF",
1925      "\\U0005FFFE", "\\U0005FFFF",
1926      "\\U0006FFFE", "\\U0006FFFF",
1927      "\\U0007FFFE", "\\U0007FFFF",
1928      "\\U0008FFFE", "\\U0008FFFF",
1929      "\\U0009FFFE", "\\U0009FFFF",
1930      "\\U000AFFFE", "\\U000AFFFF",
1931      "\\U000BFFFE", "\\U000BFFFF",
1932      "\\U000CFFFE", "\\U000CFFFF",
1933      "\\U000DFFFE", "\\U000DFFFF",
1934      "\\U000EFFFE", "\\U000EFFFF",
1935      "\\U000FFFFE", "\\U000FFFFF",
1936      "\\U0010FFFE", "\\U0010FFFF",
1937      "\\uFFFF"  /* special character with maximum primary weight */
1938  };
1939  UErrorCode status = U_ZERO_ERROR;
1940  UCollator *coll = ucol_open("en_US", &status);
1941
1942  log_verbose("Test non characters\n");
1943
1944  if(U_SUCCESS(status)) {
1945    genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
1946  } else {
1947    log_err_status(status, "Unable to open collator\n");
1948  }
1949
1950  ucol_close(coll);
1951}
1952
1953static void TestExtremeCompression(void) {
1954  static char *test[4];
1955  int32_t j = 0, i = 0;
1956
1957  for(i = 0; i<4; i++) {
1958    test[i] = (char *)malloc(2048*sizeof(char));
1959  }
1960
1961  for(j = 20; j < 500; j++) {
1962    for(i = 0; i<4; i++) {
1963      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1964      test[i][j-1] = (char)('a'+i);
1965      test[i][j] = 0;
1966    }
1967    genericLocaleStarter("en_US", (const char **)test, 4);
1968  }
1969
1970
1971  for(i = 0; i<4; i++) {
1972    free(test[i]);
1973  }
1974}
1975
1976#if 0
1977static void TestExtremeCompression(void) {
1978  static char *test[4];
1979  int32_t j = 0, i = 0;
1980  UErrorCode status = U_ZERO_ERROR;
1981  UCollator *coll = ucol_open("en_US", status);
1982  for(i = 0; i<4; i++) {
1983    test[i] = (char *)malloc(2048*sizeof(char));
1984  }
1985  for(j = 10; j < 2048; j++) {
1986    for(i = 0; i<4; i++) {
1987      uprv_memset(test[i], 'a', (j-2)*sizeof(char));
1988      test[i][j-1] = (char)('a'+i);
1989      test[i][j] = 0;
1990    }
1991  }
1992  genericLocaleStarter("en_US", (const char **)test, 4);
1993
1994  for(j = 10; j < 2048; j++) {
1995    for(i = 0; i<1; i++) {
1996      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1997      test[i][j] = 0;
1998    }
1999  }
2000  for(i = 0; i<4; i++) {
2001    free(test[i]);
2002  }
2003}
2004#endif
2005
2006static void TestSurrogates(void) {
2007  static const char *test[] = {
2008    "z","\\ud900\\udc25",  "\\ud805\\udc50",
2009       "\\ud800\\udc00y",  "\\ud800\\udc00r",
2010       "\\ud800\\udc00f",  "\\ud800\\udc00",
2011       "\\ud800\\udc00c", "\\ud800\\udc00b",
2012       "\\ud800\\udc00fa", "\\ud800\\udc00fb",
2013       "\\ud800\\udc00a",
2014       "c", "b"
2015  };
2016
2017  static const char *rule =
2018    "&z < \\ud900\\udc25   < \\ud805\\udc50"
2019       "< \\ud800\\udc00y  < \\ud800\\udc00r"
2020       "< \\ud800\\udc00f  << \\ud800\\udc00"
2021       "< \\ud800\\udc00fa << \\ud800\\udc00fb"
2022       "< \\ud800\\udc00a  < c < b" ;
2023
2024  genericRulesStarter(rule, test, 14);
2025}
2026
2027/* This is a test for prefix implementation, used by JIS X 4061 collation rules */
2028static void TestPrefix(void) {
2029  uint32_t i;
2030
2031  static const struct {
2032    const char *rules;
2033    const char *data[50];
2034    const uint32_t len;
2035  } tests[] = {
2036    { "&z <<< z|a",
2037      {"zz", "za"}, 2 },
2038
2039    { "&z <<< z|   a",
2040      {"zz", "za"}, 2 },
2041    { "[strength I]"
2042      "&a=\\ud900\\udc25"
2043      "&z<<<\\ud900\\udc25|a",
2044      {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
2045  };
2046
2047
2048  for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2049    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2050  }
2051}
2052
2053/* This test uses data suplied by Masashiko Maedera to test the implementation */
2054/* JIS X 4061 collation order implementation                                   */
2055static void TestNewJapanese(void) {
2056
2057  static const char * const test1[] = {
2058      "\\u30b7\\u30e3\\u30fc\\u30ec",
2059      "\\u30b7\\u30e3\\u30a4",
2060      "\\u30b7\\u30e4\\u30a3",
2061      "\\u30b7\\u30e3\\u30ec",
2062      "\\u3061\\u3087\\u3053",
2063      "\\u3061\\u3088\\u3053",
2064      "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
2065      "\\u3066\\u30fc\\u305f",
2066      "\\u30c6\\u30fc\\u30bf",
2067      "\\u30c6\\u30a7\\u30bf",
2068      "\\u3066\\u3048\\u305f",
2069      "\\u3067\\u30fc\\u305f",
2070      "\\u30c7\\u30fc\\u30bf",
2071      "\\u30c7\\u30a7\\u30bf",
2072      "\\u3067\\u3048\\u305f",
2073      "\\u3066\\u30fc\\u305f\\u30fc",
2074      "\\u30c6\\u30fc\\u30bf\\u30a1",
2075      "\\u30c6\\u30a7\\u30bf\\u30fc",
2076      "\\u3066\\u3047\\u305f\\u3041",
2077      "\\u3066\\u3048\\u305f\\u30fc",
2078      "\\u3067\\u30fc\\u305f\\u30fc",
2079      "\\u30c7\\u30fc\\u30bf\\u30a1",
2080      "\\u3067\\u30a7\\u305f\\u30a1",
2081      "\\u30c7\\u3047\\u30bf\\u3041",
2082      "\\u30c7\\u30a8\\u30bf\\u30a2",
2083      "\\u3072\\u3086",
2084      "\\u3073\\u3085\\u3042",
2085      "\\u3074\\u3085\\u3042",
2086      "\\u3073\\u3085\\u3042\\u30fc",
2087      "\\u30d3\\u30e5\\u30a2\\u30fc",
2088      "\\u3074\\u3085\\u3042\\u30fc",
2089      "\\u30d4\\u30e5\\u30a2\\u30fc",
2090      "\\u30d2\\u30e5\\u30a6",
2091      "\\u30d2\\u30e6\\u30a6",
2092      "\\u30d4\\u30e5\\u30a6\\u30a2",
2093      "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
2094      "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
2095      "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
2096      "\\u3072\\u3085\\u3093",
2097      "\\u3074\\u3085\\u3093",
2098      "\\u3075\\u30fc\\u308a",
2099      "\\u30d5\\u30fc\\u30ea",
2100      "\\u3075\\u3045\\u308a",
2101      "\\u3075\\u30a5\\u308a",
2102      "\\u3075\\u30a5\\u30ea",
2103      "\\u30d5\\u30a6\\u30ea",
2104      "\\u3076\\u30fc\\u308a",
2105      "\\u30d6\\u30fc\\u30ea",
2106      "\\u3076\\u3045\\u308a",
2107      "\\u30d6\\u30a5\\u308a",
2108      "\\u3077\\u3046\\u308a",
2109      "\\u30d7\\u30a6\\u30ea",
2110      "\\u3075\\u30fc\\u308a\\u30fc",
2111      "\\u30d5\\u30a5\\u30ea\\u30fc",
2112      "\\u3075\\u30a5\\u308a\\u30a3",
2113      "\\u30d5\\u3045\\u308a\\u3043",
2114      "\\u30d5\\u30a6\\u30ea\\u30fc",
2115      "\\u3075\\u3046\\u308a\\u3043",
2116      "\\u30d6\\u30a6\\u30ea\\u30a4",
2117      "\\u3077\\u30fc\\u308a\\u30fc",
2118      "\\u3077\\u30a5\\u308a\\u30a4",
2119      "\\u3077\\u3046\\u308a\\u30fc",
2120      "\\u30d7\\u30a6\\u30ea\\u30a4",
2121      "\\u30d5\\u30fd",
2122      "\\u3075\\u309e",
2123      "\\u3076\\u309d",
2124      "\\u3076\\u3075",
2125      "\\u3076\\u30d5",
2126      "\\u30d6\\u3075",
2127      "\\u30d6\\u30d5",
2128      "\\u3076\\u309e",
2129      "\\u3076\\u3077",
2130      "\\u30d6\\u3077",
2131      "\\u3077\\u309d",
2132      "\\u30d7\\u30fd",
2133      "\\u3077\\u3075",
2134};
2135
2136  static const char *test2[] = {
2137    "\\u306f\\u309d", /* H\\u309d */
2138    "\\u30cf\\u30fd", /* K\\u30fd */
2139    "\\u306f\\u306f", /* HH */
2140    "\\u306f\\u30cf", /* HK */
2141    "\\u30cf\\u30cf", /* KK */
2142    "\\u306f\\u309e", /* H\\u309e */
2143    "\\u30cf\\u30fe", /* K\\u30fe */
2144    "\\u306f\\u3070", /* HH\\u309b */
2145    "\\u30cf\\u30d0", /* KK\\u309b */
2146    "\\u306f\\u3071", /* HH\\u309c */
2147    "\\u30cf\\u3071", /* KH\\u309c */
2148    "\\u30cf\\u30d1", /* KK\\u309c */
2149    "\\u3070\\u309d", /* H\\u309b\\u309d */
2150    "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
2151    "\\u3070\\u306f", /* H\\u309bH */
2152    "\\u30d0\\u30cf", /* K\\u309bK */
2153    "\\u3070\\u309e", /* H\\u309b\\u309e */
2154    "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
2155    "\\u3070\\u3070", /* H\\u309bH\\u309b */
2156    "\\u30d0\\u3070", /* K\\u309bH\\u309b */
2157    "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
2158    "\\u3070\\u3071", /* H\\u309bH\\u309c */
2159    "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
2160    "\\u3071\\u309d", /* H\\u309c\\u309d */
2161    "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
2162    "\\u3071\\u306f", /* H\\u309cH */
2163    "\\u30d1\\u30cf", /* K\\u309cK */
2164    "\\u3071\\u3070", /* H\\u309cH\\u309b */
2165    "\\u3071\\u30d0", /* H\\u309cK\\u309b */
2166    "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
2167    "\\u3071\\u3071", /* H\\u309cH\\u309c */
2168    "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
2169  };
2170  /*
2171  static const char *test3[] = {
2172    "\\u221er\\u221e",
2173    "\\u221eR#",
2174    "\\u221et\\u221e",
2175    "#r\\u221e",
2176    "#R#",
2177    "#t%",
2178    "#T%",
2179    "8t\\u221e",
2180    "8T\\u221e",
2181    "8t#",
2182    "8T#",
2183    "8t%",
2184    "8T%",
2185    "8t8",
2186    "8T8",
2187    "\\u03c9r\\u221e",
2188    "\\u03a9R%",
2189    "rr\\u221e",
2190    "rR\\u221e",
2191    "Rr\\u221e",
2192    "RR\\u221e",
2193    "RT%",
2194    "rt8",
2195    "tr\\u221e",
2196    "tr8",
2197    "TR8",
2198    "tt8",
2199    "\\u30b7\\u30e3\\u30fc\\u30ec",
2200  };
2201  */
2202  static const UColAttribute att[] = { UCOL_STRENGTH };
2203  static const UColAttributeValue val[] = { UCOL_QUATERNARY };
2204
2205  static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
2206  static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
2207
2208  genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), att, val, 1);
2209  genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), att, val, 1);
2210  /*genericLocaleStarter("ja", test3, UPRV_LENGTHOF(test3));*/
2211  genericLocaleStarterWithOptions("ja", test1, UPRV_LENGTHOF(test1), attShifted, valShifted, 2);
2212  genericLocaleStarterWithOptions("ja", test2, UPRV_LENGTHOF(test2), attShifted, valShifted, 2);
2213}
2214
2215static void TestStrCollIdenticalPrefix(void) {
2216  const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
2217  const char* test[] = {
2218    "ab\\ud9b0\\udc70",
2219    "ab\\ud9b0\\udc71"
2220  };
2221  genericRulesStarterWithResult(rule, test, UPRV_LENGTHOF(test), UCOL_EQUAL);
2222}
2223/* Contractions should have all their canonically equivalent */
2224/* strings included */
2225static void TestContractionClosure(void) {
2226  static const struct {
2227    const char *rules;
2228    const char *data[10];
2229    const uint32_t len;
2230  } tests[] = {
2231    {   "&b=\\u00e4\\u00e4",
2232      { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
2233    {   "&b=\\u00C5",
2234      { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
2235  };
2236  uint32_t i;
2237
2238
2239  for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2240    genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
2241  }
2242}
2243
2244/* This tests also fails*/
2245static void TestBeforePrefixFailure(void) {
2246  static const struct {
2247    const char *rules;
2248    const char *data[10];
2249    const uint32_t len;
2250  } tests[] = {
2251    { "&g <<< a"
2252      "&[before 3]\\uff41 <<< x",
2253      {"x", "\\uff41"}, 2 },
2254    {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2255        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2256        "&[before 3]\\u30a7<<<\\u30a9",
2257      {"\\u30a9", "\\u30a7"}, 2 },
2258    {   "&[before 3]\\u30a7<<<\\u30a9"
2259        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2260        "&\\u30A8=\\u30A8=\\u3048=\\uff74",
2261      {"\\u30a9", "\\u30a7"}, 2 },
2262  };
2263  uint32_t i;
2264
2265
2266  for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2267    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2268  }
2269
2270#if 0
2271  const char* rule1 =
2272        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2273        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2274        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
2275  const char* rule2 =
2276        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
2277        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2278        "&\\u30A8=\\u30A8=\\u3048=\\uff74";
2279  const char* test[] = {
2280      "\\u30c6\\u30fc\\u30bf",
2281      "\\u30c6\\u30a7\\u30bf",
2282  };
2283  genericRulesStarter(rule1, test, UPRV_LENGTHOF(test));
2284  genericRulesStarter(rule2, test, UPRV_LENGTHOF(test));
2285/* this piece of code should be in some sort of verbose mode     */
2286/* it gets the collation elements for elements and prints them   */
2287/* This is useful when trying to see whether the problem is      */
2288  {
2289    UErrorCode status = U_ZERO_ERROR;
2290    uint32_t i = 0;
2291    UCollationElements *it = NULL;
2292    uint32_t CE;
2293    UChar string[256];
2294    uint32_t uStringLen;
2295    UCollator *coll = NULL;
2296
2297    uStringLen = u_unescape(rule1, string, 256);
2298
2299    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2300
2301    /*coll = ucol_open("ja_JP_JIS", &status);*/
2302    it = ucol_openElements(coll, string, 0, &status);
2303
2304    for(i = 0; i < UPRV_LENGTHOF(test); i++) {
2305      log_verbose("%s\n", test[i]);
2306      uStringLen = u_unescape(test[i], string, 256);
2307      ucol_setText(it, string, uStringLen, &status);
2308
2309      while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
2310        log_verbose("%08X\n", CE);
2311      }
2312      log_verbose("\n");
2313
2314    }
2315
2316    ucol_closeElements(it);
2317    ucol_close(coll);
2318  }
2319#endif
2320}
2321
2322static void TestPrefixCompose(void) {
2323  const char* rule1 =
2324        "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
2325  /*
2326  const char* test[] = {
2327      "\\u30c6\\u30fc\\u30bf",
2328      "\\u30c6\\u30a7\\u30bf",
2329  };
2330  */
2331  {
2332    UErrorCode status = U_ZERO_ERROR;
2333    /*uint32_t i = 0;*/
2334    /*UCollationElements *it = NULL;*/
2335/*    uint32_t CE;*/
2336    UChar string[256];
2337    uint32_t uStringLen;
2338    UCollator *coll = NULL;
2339
2340    uStringLen = u_unescape(rule1, string, 256);
2341
2342    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2343    ucol_close(coll);
2344  }
2345
2346
2347}
2348
2349/*
2350[last variable] last variable value
2351[last primary ignorable] largest CE for primary ignorable
2352[last secondary ignorable] largest CE for secondary ignorable
2353[last tertiary ignorable] largest CE for tertiary ignorable
2354[top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
2355*/
2356
2357static void TestRuleOptions(void) {
2358  /* values here are hardcoded and are correct for the current UCA
2359   * when the UCA changes, one might be forced to change these
2360   * values.
2361   */
2362
2363  /*
2364   * These strings contain the last character before [variable top]
2365   * and the first and second characters (by primary weights) after it.
2366   * See FractionalUCA.txt. For example:
2367      [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
2368      [variable top = 0C FE]
2369      [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
2370     and
2371      00B4; [0D 0C, 05, 05]
2372   *
2373   * Note: Starting with UCA 6.0, the [variable top] collation element
2374   * is not the weight of any character or string,
2375   * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
2376   */
2377#define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
2378#define FIRST_REGULAR_CHAR_STRING "\\u0060"
2379#define SECOND_REGULAR_CHAR_STRING "\\u00B4"
2380
2381  /*
2382   * This string has to match the character that has the [last regular] weight
2383   * which changes with each UCA version.
2384   * See the bottom of FractionalUCA.txt which says something like
2385      [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
2386   *
2387   * Note: Starting with UCA 6.0, the [last regular] collation element
2388   * is not the weight of any character or string,
2389   * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
2390   */
2391#define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2392
2393  static const struct {
2394    const char *rules;
2395    const char *data[10];
2396    const uint32_t len;
2397  } tests[] = {
2398#if 0
2399    /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
2400    /* - all befores here amount to zero */
2401    { "&[before 3][first tertiary ignorable]<<<a",
2402        { "\\u0000", "a"}, 2
2403    }, /* you cannot go before first tertiary ignorable */
2404
2405    { "&[before 3][last tertiary ignorable]<<<a",
2406        { "\\u0000", "a"}, 2
2407    }, /* you cannot go before last tertiary ignorable */
2408#endif
2409    /*
2410     * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
2411     * and it *is* possible to "go before" that.
2412     */
2413    { "&[before 3][first secondary ignorable]<<<a",
2414        { "\\u0000", "a"}, 2
2415    },
2416
2417    { "&[before 3][last secondary ignorable]<<<a",
2418        { "\\u0000", "a"}, 2
2419    },
2420
2421    /* 'normal' befores */
2422
2423    /*
2424     * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2425     * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
2426     * because there is no tailoring space before that boundary.
2427     * Made the tests work by tailoring to a space instead.
2428     */
2429    { "&[before 3][first primary ignorable]<<<c<<<b &' '<a",  /* was &[first primary ignorable]<a */
2430        {  "c", "b", "\\u0332", "a" }, 4
2431    },
2432
2433    /* we don't have a code point that corresponds to
2434     * the last primary ignorable
2435     */
2436    { "&[before 3][last primary ignorable]<<<c<<<b &' '<a",  /* was &[last primary ignorable]<a */
2437        {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
2438    },
2439
2440    { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
2441        {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
2442    },
2443
2444    { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
2445        { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
2446    },
2447
2448    { "&[first regular]<a"
2449      "&[before 1][first regular]<b",
2450      { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
2451    },
2452
2453    { "&[before 1][last regular]<b"
2454      "&[last regular]<a",
2455        { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
2456    },
2457
2458    { "&[before 1][first implicit]<b"
2459      "&[first implicit]<a",
2460        { "b", "\\u4e00", "a", "\\u4e01"}, 4
2461    },
2462#if 0  /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
2463    { "&[before 1][last implicit]<b"
2464      "&[last implicit]<a",
2465        { "b", "\\U0010FFFD", "a" }, 3
2466    },
2467#endif
2468    { "&[last variable]<z"
2469      "&' '<x"  /* was &[last primary ignorable]<x, see above */
2470      "&[last secondary ignorable]<<y"
2471      "&[last tertiary ignorable]<<<w"
2472      "&[top]<u",
2473      {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
2474    }
2475
2476  };
2477  uint32_t i;
2478
2479  for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2480    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2481  }
2482}
2483
2484
2485static void TestOptimize(void) {
2486  /* this is not really a test - just trying out
2487   * whether copying of UCA contents will fail
2488   * Cannot really test, since the functionality
2489   * remains the same.
2490   */
2491  static const struct {
2492    const char *rules;
2493    const char *data[10];
2494    const uint32_t len;
2495  } tests[] = {
2496    /* - all befores here amount to zero */
2497    { "[optimize [\\uAC00-\\uD7FF]]",
2498    { "a", "b"}, 2}
2499  };
2500  uint32_t i;
2501
2502  for(i = 0; i<UPRV_LENGTHOF(tests); i++) {
2503    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2504  }
2505}
2506
2507/*
2508cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
2509weiv    ucol_strcollIter?
2510cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
2511weiv    these are the input strings?
2512cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
2513weiv    will check - could be a problem with utf-8 iterator
2514cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
2515weiv    hmmm
2516cycheng@ca.ibm.c... note that we have a standalone high surrogate
2517weiv    that doesn't sound right
2518cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
2519weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
2520cycheng@ca.ibm.c... yes
2521weiv    and then do the comparison
2522cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
2523weiv    utf-16 strings look like a little endian ones in the example you sent me
2524weiv    It could be a bug - let me try to test it out
2525cycheng@ca.ibm.c... ok
2526cycheng@ca.ibm.c... we can wait till the conf. call
2527cycheng@ca.ibm.c... next weke
2528weiv    that would be great
2529weiv    hmmm
2530weiv    I might be wrong
2531weiv    let me play with it some more
2532cycheng@ca.ibm.c... ok
2533cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
2534cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
2535cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
2536weiv    ok
2537cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
2538weiv    thanks
2539cycheng@ca.ibm.c... the 4 strings we sent are just samples
2540*/
2541#if 0
2542static void Alexis(void) {
2543  UErrorCode status = U_ZERO_ERROR;
2544  UCollator *coll = ucol_open("", &status);
2545
2546
2547  const char utf16be[2][4] = {
2548    { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
2549    { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
2550  };
2551
2552  const char utf8[2][4] = {
2553    { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
2554    { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
2555  };
2556
2557  UCharIterator iterU161, iterU162;
2558  UCharIterator iterU81, iterU82;
2559
2560  UCollationResult resU16, resU8;
2561
2562  uiter_setUTF16BE(&iterU161, utf16be[0], 4);
2563  uiter_setUTF16BE(&iterU162, utf16be[1], 4);
2564
2565  uiter_setUTF8(&iterU81, utf8[0], 4);
2566  uiter_setUTF8(&iterU82, utf8[1], 4);
2567
2568  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2569
2570  resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
2571  resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
2572
2573
2574  if(resU16 != resU8) {
2575    log_err("different results\n");
2576  }
2577
2578  ucol_close(coll);
2579}
2580#endif
2581
2582#define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
2583static void Alexis2(void) {
2584  UErrorCode status = U_ZERO_ERROR;
2585  UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2586  char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2587  char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2588  int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
2589
2590  UConverter *conv = NULL;
2591
2592  UCharIterator U16BEItS, U16BEItT;
2593  UCharIterator U8ItS, U8ItT;
2594
2595  UCollationResult resU16, resU16BE, resU8;
2596
2597  static const char* const pairs[][2] = {
2598    { "\\ud800\\u0021", "\\uFFFC\\u0062"},
2599    { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
2600    { "\\u0E40\\u0021", "\\u00A1\\u0021"},
2601    { "\\u0E40\\u0021", "\\uFE57\\u0062"},
2602    { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
2603    { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
2604    { "\\u0020", "\\u0020\\u0000"}
2605/*
26065F20 (my result here)
26075F204E008E3F
26085F20 (your result here)
2609*/
2610  };
2611
2612  int32_t i = 0;
2613
2614  UCollator *coll = ucol_open("", &status);
2615  if(status == U_FILE_ACCESS_ERROR) {
2616    log_data_err("Is your data around?\n");
2617    return;
2618  } else if(U_FAILURE(status)) {
2619    log_err("Error opening collator\n");
2620    return;
2621  }
2622  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2623  conv = ucnv_open("UTF16BE", &status);
2624  for(i = 0; i < UPRV_LENGTHOF(pairs); i++) {
2625    U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2626    U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2627
2628    resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
2629
2630    log_verbose("Result of strcoll is %i\n", resU16);
2631
2632    U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
2633    U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
2634    (void)U16BELenS;    /* Suppress set but not used warnings. */
2635    (void)U16BELenT;
2636
2637    /* use the original sizes, as the result from converter is in bytes */
2638    uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
2639    uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
2640
2641    resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
2642
2643    log_verbose("Result of U16BE is %i\n", resU16BE);
2644
2645    if(resU16 != resU16BE) {
2646      log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
2647    }
2648
2649    u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
2650    u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
2651
2652    uiter_setUTF8(&U8ItS, U8Source, U8LenS);
2653    uiter_setUTF8(&U8ItT, U8Target, U8LenT);
2654
2655    resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
2656
2657    if(resU16 != resU8) {
2658      log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
2659    }
2660
2661  }
2662
2663  ucol_close(coll);
2664  ucnv_close(conv);
2665}
2666
2667static void TestHebrewUCA(void) {
2668  UErrorCode status = U_ZERO_ERROR;
2669  static const char *first[] = {
2670    "d790d6b8d79cd795d6bcd7a9",
2671    "d790d79cd79ed7a7d799d799d7a1",
2672    "d790d6b4d79ed795d6bcd7a9",
2673  };
2674
2675  char utf8String[3][256];
2676  UChar utf16String[3][256];
2677
2678  int32_t i = 0, j = 0;
2679  int32_t sizeUTF8[3];
2680  int32_t sizeUTF16[3];
2681
2682  UCollator *coll = ucol_open("", &status);
2683  if (U_FAILURE(status)) {
2684      log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
2685      return;
2686  }
2687  /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
2688
2689  for(i = 0; i < UPRV_LENGTHOF(first); i++) {
2690    sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
2691    u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
2692    log_verbose("%i: ");
2693    for(j = 0; j < sizeUTF16[i]; j++) {
2694      /*log_verbose("\\u%04X", utf16String[i][j]);*/
2695      log_verbose("%04X", utf16String[i][j]);
2696    }
2697    log_verbose("\n");
2698  }
2699  for(i = 0; i < UPRV_LENGTHOF(first)-1; i++) {
2700    for(j = i + 1; j < UPRV_LENGTHOF(first); j++) {
2701      doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
2702    }
2703  }
2704
2705  ucol_close(coll);
2706
2707}
2708
2709static void TestPartialSortKeyTermination(void) {
2710  static const char* cases[] = {
2711    "\\u1234\\u1234\\udc00",
2712    "\\udc00\\ud800\\ud800"
2713  };
2714
2715  int32_t i;
2716
2717  UErrorCode status = U_ZERO_ERROR;
2718
2719  UCollator *coll = ucol_open("", &status);
2720
2721  UCharIterator iter;
2722
2723  UChar currCase[256];
2724  int32_t length = 0;
2725  int32_t pKeyLen = 0;
2726
2727  uint8_t key[256];
2728
2729  for(i = 0; i < UPRV_LENGTHOF(cases); i++) {
2730    uint32_t state[2] = {0, 0};
2731    length = u_unescape(cases[i], currCase, 256);
2732    uiter_setString(&iter, currCase, length);
2733    pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
2734    (void)pKeyLen;   /* Suppress set but not used warning. */
2735
2736    log_verbose("Done\n");
2737
2738  }
2739  ucol_close(coll);
2740}
2741
2742static void TestSettings(void) {
2743  static const char* cases[] = {
2744    "apple",
2745      "Apple"
2746  };
2747
2748  static const char* locales[] = {
2749    "",
2750      "en"
2751  };
2752
2753  UErrorCode status = U_ZERO_ERROR;
2754
2755  int32_t i = 0, j = 0;
2756
2757  UChar source[256], target[256];
2758  int32_t sLen = 0, tLen = 0;
2759
2760  UCollator *collateObject = NULL;
2761  for(i = 0; i < UPRV_LENGTHOF(locales); i++) {
2762    collateObject = ucol_open(locales[i], &status);
2763    ucol_setStrength(collateObject, UCOL_PRIMARY);
2764    ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
2765    for(j = 1; j < UPRV_LENGTHOF(cases); j++) {
2766      sLen = u_unescape(cases[j-1], source, 256);
2767      source[sLen] = 0;
2768      tLen = u_unescape(cases[j], target, 256);
2769      source[tLen] = 0;
2770      doTest(collateObject, source, target, UCOL_EQUAL);
2771    }
2772    ucol_close(collateObject);
2773  }
2774}
2775
2776static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
2777    UErrorCode status = U_ZERO_ERROR;
2778    int32_t errorNo = 0;
2779    const UChar *sourceRules = NULL;
2780    int32_t sourceRulesLen = 0;
2781    UParseError parseError;
2782    UColAttributeValue french = UCOL_OFF;
2783
2784    if(!ucol_equals(source, target)) {
2785        log_err("Same collators, different address not equal\n");
2786        errorNo++;
2787    }
2788    ucol_close(target);
2789    if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
2790        target = ucol_safeClone(source, NULL, NULL, &status);
2791        if(U_FAILURE(status)) {
2792            log_err("Error creating clone\n");
2793            errorNo++;
2794            return errorNo;
2795        }
2796        if(!ucol_equals(source, target)) {
2797            log_err("Collator different from it's clone\n");
2798            errorNo++;
2799        }
2800        french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
2801        if(french == UCOL_ON) {
2802            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
2803        } else {
2804            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
2805        }
2806        if(U_FAILURE(status)) {
2807            log_err("Error setting attributes\n");
2808            errorNo++;
2809            return errorNo;
2810        }
2811        if(ucol_equals(source, target)) {
2812            log_err("Collators same even when options changed\n");
2813            errorNo++;
2814        }
2815        ucol_close(target);
2816
2817        sourceRules = ucol_getRules(source, &sourceRulesLen);
2818        target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2819        if(U_FAILURE(status)) {
2820            log_err("Error instantiating target from rules - %s\n", u_errorName(status));
2821            errorNo++;
2822            return errorNo;
2823        }
2824        /* Note: The tailoring rule string is an optional data item. */
2825        if(!ucol_equals(source, target) && sourceRulesLen != 0) {
2826            log_err("Collator different from collator that was created from the same rules\n");
2827            errorNo++;
2828        }
2829        ucol_close(target);
2830    }
2831    return errorNo;
2832}
2833
2834
2835static void TestEquals(void) {
2836    /* ucol_equals is not currently a public API. There is a chance that it will become
2837    * something like this.
2838    */
2839    /* test whether the two collators instantiated from the same locale are equal */
2840    UErrorCode status = U_ZERO_ERROR;
2841    UParseError parseError;
2842    int32_t noOfLoc = uloc_countAvailable();
2843    const char *locName = NULL;
2844    UCollator *source = NULL, *target = NULL;
2845    int32_t i = 0;
2846
2847    const char* rules[] = {
2848        "&l < lj <<< Lj <<< LJ",
2849        "&n < nj <<< Nj <<< NJ",
2850        "&ae <<< \\u00e4",
2851        "&AE <<< \\u00c4"
2852    };
2853    /*
2854    const char* badRules[] = {
2855    "&l <<< Lj",
2856    "&n < nj <<< nJ <<< NJ",
2857    "&a <<< \\u00e4",
2858    "&AE <<< \\u00c4 <<< x"
2859    };
2860    */
2861
2862    UChar sourceRules[1024], targetRules[1024];
2863    int32_t sourceRulesSize = 0, targetRulesSize = 0;
2864    int32_t rulesSize = UPRV_LENGTHOF(rules);
2865
2866    for(i = 0; i < rulesSize; i++) {
2867        sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
2868        targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
2869    }
2870
2871    source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2872    if(status == U_FILE_ACCESS_ERROR) {
2873        log_data_err("Is your data around?\n");
2874        return;
2875    } else if(U_FAILURE(status)) {
2876        log_err("Error opening collator\n");
2877        return;
2878    }
2879    target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2880    if(!ucol_equals(source, target)) {
2881        log_err("Equivalent collators not equal!\n");
2882    }
2883    ucol_close(source);
2884    ucol_close(target);
2885
2886    source = ucol_open("root", &status);
2887    target = ucol_open("root", &status);
2888    log_verbose("Testing root\n");
2889    if(!ucol_equals(source, source)) {
2890        log_err("Same collator not equal\n");
2891    }
2892    if(TestEqualsForCollator("root", source, target)) {
2893        log_err("Errors for root\n");
2894    }
2895    ucol_close(source);
2896
2897    for(i = 0; i<noOfLoc; i++) {
2898        status = U_ZERO_ERROR;
2899        locName = uloc_getAvailable(i);
2900        /*if(hasCollationElements(locName)) {*/
2901        log_verbose("Testing equality for locale %s\n", locName);
2902        source = ucol_open(locName, &status);
2903        target = ucol_open(locName, &status);
2904        if (U_FAILURE(status)) {
2905            log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
2906            continue;
2907        }
2908        if(TestEqualsForCollator(locName, source, target)) {
2909            log_err("Errors for locale %s\n", locName);
2910        }
2911        ucol_close(source);
2912        /*}*/
2913    }
2914}
2915
2916static void TestJ2726(void) {
2917    UChar a[2] = { 0x61, 0x00 }; /*"a"*/
2918    UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
2919    UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
2920    UErrorCode status = U_ZERO_ERROR;
2921    UCollator *coll = ucol_open("en", &status);
2922    ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2923    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
2924    doTest(coll, a, aSpace, UCOL_EQUAL);
2925    doTest(coll, aSpace, a, UCOL_EQUAL);
2926    doTest(coll, a, spaceA, UCOL_EQUAL);
2927    doTest(coll, spaceA, a, UCOL_EQUAL);
2928    doTest(coll, spaceA, aSpace, UCOL_EQUAL);
2929    doTest(coll, aSpace, spaceA, UCOL_EQUAL);
2930    ucol_close(coll);
2931}
2932
2933static void NullRule(void) {
2934    UChar r[3] = {0};
2935    UErrorCode status = U_ZERO_ERROR;
2936    UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2937    if(U_SUCCESS(status)) {
2938        log_err("This should have been an error!\n");
2939        ucol_close(coll);
2940    } else {
2941        status = U_ZERO_ERROR;
2942    }
2943    coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2944    if(U_FAILURE(status)) {
2945        log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
2946    } else {
2947        ucol_close(coll);
2948    }
2949}
2950
2951/**
2952 * Test for CollationElementIterator previous and next for the whole set of
2953 * unicode characters with normalization on.
2954 */
2955static void TestNumericCollation(void)
2956{
2957    UErrorCode status = U_ZERO_ERROR;
2958
2959    const static char *basicTestStrings[]={
2960    "hello1",
2961    "hello2",
2962    "hello2002",
2963    "hello2003",
2964    "hello123456",
2965    "hello1234567",
2966    "hello10000000",
2967    "hello100000000",
2968    "hello1000000000",
2969    "hello10000000000",
2970    };
2971
2972    const static char *preZeroTestStrings[]={
2973    "avery10000",
2974    "avery010000",
2975    "avery0010000",
2976    "avery00010000",
2977    "avery000010000",
2978    "avery0000010000",
2979    "avery00000010000",
2980    "avery000000010000",
2981    };
2982
2983    const static char *thirtyTwoBitNumericStrings[]={
2984    "avery42949672960",
2985    "avery42949672961",
2986    "avery42949672962",
2987    "avery429496729610"
2988    };
2989
2990     const static char *longNumericStrings[]={
2991     /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
2992        In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
2993        are treated as multiple collation elements. */
2994    "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
2995    "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
2996    "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
2997    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
2998    "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
2999    "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
3000    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
3001    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
3002    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
3003    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
3004    };
3005
3006    const static char *supplementaryDigits[] = {
3007      "\\uD835\\uDFCE", /* 0 */
3008      "\\uD835\\uDFCF", /* 1 */
3009      "\\uD835\\uDFD0", /* 2 */
3010      "\\uD835\\uDFD1", /* 3 */
3011      "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
3012      "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
3013      "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
3014      "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
3015      "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
3016      "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
3017    };
3018
3019    const static char *foreignDigits[] = {
3020      "\\u0661",
3021        "\\u0662",
3022        "\\u0663",
3023      "\\u0661\\u0660",
3024      "\\u0661\\u0662",
3025      "\\u0661\\u0663",
3026      "\\u0662\\u0660",
3027      "\\u0662\\u0662",
3028      "\\u0662\\u0663",
3029      "\\u0663\\u0660",
3030      "\\u0663\\u0662",
3031      "\\u0663\\u0663"
3032    };
3033
3034    const static char *evenZeroes[] = {
3035      "2000",
3036      "2001",
3037        "2002",
3038        "2003"
3039    };
3040
3041    UColAttribute att = UCOL_NUMERIC_COLLATION;
3042    UColAttributeValue val = UCOL_ON;
3043
3044    /* Open our collator. */
3045    UCollator* coll = ucol_open("root", &status);
3046    if (U_FAILURE(status)){
3047        log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
3048              myErrorName(status));
3049        return;
3050    }
3051    genericLocaleStarterWithOptions("root", basicTestStrings, UPRV_LENGTHOF(basicTestStrings), &att, &val, 1);
3052    genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, UPRV_LENGTHOF(thirtyTwoBitNumericStrings), &att, &val, 1);
3053    genericLocaleStarterWithOptions("root", longNumericStrings, UPRV_LENGTHOF(longNumericStrings), &att, &val, 1);
3054    genericLocaleStarterWithOptions("en_US", foreignDigits, UPRV_LENGTHOF(foreignDigits), &att, &val, 1);
3055    genericLocaleStarterWithOptions("root", supplementaryDigits, UPRV_LENGTHOF(supplementaryDigits), &att, &val, 1);
3056    genericLocaleStarterWithOptions("root", evenZeroes, UPRV_LENGTHOF(evenZeroes), &att, &val, 1);
3057
3058    /* Setting up our collator to do digits. */
3059    ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
3060    if (U_FAILURE(status)){
3061        log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
3062              myErrorName(status));
3063        return;
3064    }
3065
3066    /*
3067       Testing that prepended zeroes still yield the correct collation behavior.
3068       We expect that every element in our strings array will be equal.
3069    */
3070    genericOrderingTestWithResult(coll, preZeroTestStrings, UPRV_LENGTHOF(preZeroTestStrings), UCOL_EQUAL);
3071
3072    ucol_close(coll);
3073}
3074
3075static void TestTibetanConformance(void)
3076{
3077    const char* test[] = {
3078        "\\u0FB2\\u0591\\u0F71\\u0061",
3079        "\\u0FB2\\u0F71\\u0061"
3080    };
3081
3082    UErrorCode status = U_ZERO_ERROR;
3083    UCollator *coll = ucol_open("", &status);
3084    UChar source[100];
3085    UChar target[100];
3086    int result;
3087    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3088    if (U_SUCCESS(status)) {
3089        u_unescape(test[0], source, 100);
3090        u_unescape(test[1], target, 100);
3091        doTest(coll, source, target, UCOL_EQUAL);
3092        result = ucol_strcoll(coll, source, -1,   target, -1);
3093        log_verbose("result %d\n", result);
3094        if (UCOL_EQUAL != result) {
3095            log_err("Tibetan comparison error\n");
3096        }
3097    }
3098    ucol_close(coll);
3099
3100    genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
3101}
3102
3103static void TestPinyinProblem(void) {
3104    static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
3105    genericLocaleStarter("zh__PINYIN", test, UPRV_LENGTHOF(test));
3106}
3107
3108/**
3109 * Iterate through the given iterator, checking to see that all the strings
3110 * in the expected array are present.
3111 * @param expected array of strings we expect to see, or NULL
3112 * @param expectedCount number of elements of expected, or 0
3113 */
3114static int32_t checkUEnumeration(const char* msg,
3115                                 UEnumeration* iter,
3116                                 const char** expected,
3117                                 int32_t expectedCount) {
3118    UErrorCode ec = U_ZERO_ERROR;
3119    int32_t i = 0, n, j, bit;
3120    int32_t seenMask = 0;
3121
3122    U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
3123    n = uenum_count(iter, &ec);
3124    if (!assertSuccess("count", &ec)) return -1;
3125    log_verbose("%s = [", msg);
3126    for (;; ++i) {
3127        const char* s = uenum_next(iter, NULL, &ec);
3128        if (!assertSuccess("snext", &ec) || s == NULL) break;
3129        if (i != 0) log_verbose(",");
3130        log_verbose("%s", s);
3131        /* check expected list */
3132        for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3133            if ((seenMask&bit) == 0 &&
3134                uprv_strcmp(s, expected[j]) == 0) {
3135                seenMask |= bit;
3136                break;
3137            }
3138        }
3139    }
3140    log_verbose("] (%d)\n", i);
3141    assertTrue("count verified", i==n);
3142    /* did we see all expected strings? */
3143    for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3144        if ((seenMask&bit)!=0) {
3145            log_verbose("Ok: \"%s\" seen\n", expected[j]);
3146        } else {
3147            log_err("FAIL: \"%s\" not seen\n", expected[j]);
3148        }
3149    }
3150    return n;
3151}
3152
3153/**
3154 * Test new API added for separate collation tree.
3155 */
3156static void TestSeparateTrees(void) {
3157    UErrorCode ec = U_ZERO_ERROR;
3158    UEnumeration *e = NULL;
3159    int32_t n = -1;
3160    UBool isAvailable;
3161    char loc[256];
3162
3163    static const char* AVAIL[] = { "en", "de" };
3164
3165    static const char* KW[] = { "collation" };
3166
3167    static const char* KWVAL[] = { "phonebook", "stroke" };
3168
3169#if !UCONFIG_NO_SERVICE
3170    e = ucol_openAvailableLocales(&ec);
3171    if (e != NULL) {
3172        assertSuccess("ucol_openAvailableLocales", &ec);
3173        assertTrue("ucol_openAvailableLocales!=0", e!=0);
3174        n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, UPRV_LENGTHOF(AVAIL));
3175        (void)n;    /* Suppress set but not used warnings. */
3176        /* Don't need to check n because we check list */
3177        uenum_close(e);
3178    } else {
3179        log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
3180    }
3181#endif
3182
3183    e = ucol_getKeywords(&ec);
3184    if (e != NULL) {
3185        assertSuccess("ucol_getKeywords", &ec);
3186        assertTrue("ucol_getKeywords!=0", e!=0);
3187        n = checkUEnumeration("ucol_getKeywords", e, KW, UPRV_LENGTHOF(KW));
3188        /* Don't need to check n because we check list */
3189        uenum_close(e);
3190    } else {
3191        log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
3192    }
3193
3194    e = ucol_getKeywordValues(KW[0], &ec);
3195    if (e != NULL) {
3196        assertSuccess("ucol_getKeywordValues", &ec);
3197        assertTrue("ucol_getKeywordValues!=0", e!=0);
3198        n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3199        /* Don't need to check n because we check list */
3200        uenum_close(e);
3201    } else {
3202        log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
3203    }
3204
3205    /* Try setting a warning before calling ucol_getKeywordValues */
3206    ec = U_USING_FALLBACK_WARNING;
3207    e = ucol_getKeywordValues(KW[0], &ec);
3208    if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
3209        assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
3210        n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, UPRV_LENGTHOF(KWVAL));
3211        /* Don't need to check n because we check list */
3212        uenum_close(e);
3213    }
3214
3215    /*
3216U_DRAFT int32_t U_EXPORT2
3217ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
3218                             const char* locale, UBool* isAvailable,
3219                             UErrorCode* status);
3220}
3221*/
3222    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
3223                                     &isAvailable, &ec);
3224    if (assertSuccess("getFunctionalEquivalent", &ec)) {
3225        assertEquals("getFunctionalEquivalent(de)", "root", loc);
3226        assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
3227                   isAvailable == TRUE);
3228    }
3229
3230    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
3231                                     &isAvailable, &ec);
3232    if (assertSuccess("getFunctionalEquivalent", &ec)) {
3233        assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
3234        assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
3235                   isAvailable == FALSE);
3236    }
3237}
3238
3239/* supercedes TestJ784 */
3240static void TestBeforePinyin(void) {
3241    const static char rules[] = {
3242        "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
3243        "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
3244        "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
3245        "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
3246        "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
3247        "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
3248    };
3249
3250    const static char *test[] = {
3251        "l\\u0101",
3252        "la",
3253        "l\\u0101n",
3254        "lan ",
3255        "l\\u0113",
3256        "le",
3257        "l\\u0113n",
3258        "len"
3259    };
3260
3261    const static char *test2[] = {
3262        "x\\u0101",
3263        "x\\u0100",
3264        "X\\u0101",
3265        "X\\u0100",
3266        "x\\u00E1",
3267        "x\\u00C1",
3268        "X\\u00E1",
3269        "X\\u00C1",
3270        "x\\u01CE",
3271        "x\\u01CD",
3272        "X\\u01CE",
3273        "X\\u01CD",
3274        "x\\u00E0",
3275        "x\\u00C0",
3276        "X\\u00E0",
3277        "X\\u00C0",
3278        "xa",
3279        "xA",
3280        "Xa",
3281        "XA",
3282        "x\\u0101x",
3283        "x\\u0100x",
3284        "x\\u00E1x",
3285        "x\\u00C1x",
3286        "x\\u01CEx",
3287        "x\\u01CDx",
3288        "x\\u00E0x",
3289        "x\\u00C0x",
3290        "xax",
3291        "xAx"
3292    };
3293
3294    genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3295    genericLocaleStarter("zh", test, UPRV_LENGTHOF(test));
3296    genericRulesStarter(rules, test2, UPRV_LENGTHOF(test2));
3297    genericLocaleStarter("zh", test2, UPRV_LENGTHOF(test2));
3298}
3299
3300static void TestBeforeTightening(void) {
3301    static const struct {
3302        const char *rules;
3303        UErrorCode expectedStatus;
3304    } tests[] = {
3305        { "&[before 1]a<x", U_ZERO_ERROR },
3306        { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
3307        { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
3308        { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
3309        { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
3310        { "&[before 2]a<<x",U_ZERO_ERROR },
3311        { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
3312        { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
3313        { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
3314        { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
3315        { "&[before 3]a<<<x",U_ZERO_ERROR },
3316        { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
3317        { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
3318    };
3319
3320    int32_t i = 0;
3321
3322    UErrorCode status = U_ZERO_ERROR;
3323    UChar rlz[RULE_BUFFER_LEN] = { 0 };
3324    uint32_t rlen = 0;
3325
3326    UCollator *coll = NULL;
3327
3328
3329    for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3330        rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
3331        coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3332        if(status != tests[i].expectedStatus) {
3333            log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
3334                tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
3335        }
3336        ucol_close(coll);
3337        status = U_ZERO_ERROR;
3338    }
3339
3340}
3341
3342/*
3343&m < a
3344&[before 1] a < x <<< X << q <<< Q < z
3345assert: m <<< M < x <<< X << q <<< Q < z < a < n
3346
3347&m < a
3348&[before 2] a << x <<< X << q <<< Q < z
3349assert: m <<< M < x <<< X << q <<< Q << a < z < n
3350
3351&m < a
3352&[before 3] a <<< x <<< X << q <<< Q < z
3353assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
3354
3355
3356&m << a
3357&[before 1] a < x <<< X << q <<< Q < z
3358assert: x <<< X << q <<< Q < z < m <<< M << a < n
3359
3360&m << a
3361&[before 2] a << x <<< X << q <<< Q < z
3362assert: m <<< M << x <<< X << q <<< Q << a < z < n
3363
3364&m << a
3365&[before 3] a <<< x <<< X << q <<< Q < z
3366assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
3367
3368
3369&m <<< a
3370&[before 1] a < x <<< X << q <<< Q < z
3371assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
3372
3373&m <<< a
3374&[before 2] a << x <<< X << q <<< Q < z
3375assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
3376
3377&m <<< a
3378&[before 3] a <<< x <<< X << q <<< Q < z
3379assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
3380
3381
3382&[before 1] s < x <<< X << q <<< Q < z
3383assert: r <<< R < x <<< X << q <<< Q < z < s < n
3384
3385&[before 2] s << x <<< X << q <<< Q < z
3386assert: r <<< R < x <<< X << q <<< Q << s < z < n
3387
3388&[before 3] s <<< x <<< X << q <<< Q < z
3389assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
3390
3391
3392&[before 1] \u24DC < x <<< X << q <<< Q < z
3393assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
3394
3395&[before 2] \u24DC << x <<< X << q <<< Q < z
3396assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
3397
3398&[before 3] \u24DC <<< x <<< X << q <<< Q < z
3399assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
3400*/
3401
3402
3403#if 0
3404/* requires features not yet supported */
3405static void TestMoreBefore(void) {
3406    static const struct {
3407        const char* rules;
3408        const char* order[16];
3409        int32_t size;
3410    } tests[] = {
3411        { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
3412        { "m","M","x","X","q","Q","z","a","n" }, 9},
3413        { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
3414        { "m","M","x","X","q","Q","a","z","n" }, 9},
3415        { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
3416        { "m","M","x","X","a","q","Q","z","n" }, 9},
3417        { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
3418        { "x","X","q","Q","z","m","M","a","n" }, 9},
3419        { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
3420        { "m","M","x","X","q","Q","a","z","n" }, 9},
3421        { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
3422        { "m","M","x","X","a","q","Q","z","n" }, 9},
3423        { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
3424        { "x","X","q","Q","z","n","m","a","M" }, 9},
3425        { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
3426        { "x","X","q","Q","m","a","M","z","n" }, 9},
3427        { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
3428        { "m","x","X","a","M","q","Q","z","n" }, 9},
3429        { "&[before 1] s < x <<< X << q <<< Q < z",
3430        { "r","R","x","X","q","Q","z","s","n" }, 9},
3431        { "&[before 2] s << x <<< X << q <<< Q < z",
3432        { "r","R","x","X","q","Q","s","z","n" }, 9},
3433        { "&[before 3] s <<< x <<< X << q <<< Q < z",
3434        { "r","R","x","X","s","q","Q","z","n" }, 9},
3435        { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
3436        { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
3437        { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
3438        { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
3439        { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
3440        { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
3441    };
3442
3443    int32_t i = 0;
3444
3445    for(i = 0; i < UPRV_LENGTHOF(tests); i++) {
3446        genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
3447    }
3448}
3449#endif
3450
3451static void TestTailorNULL( void ) {
3452    const static char* rule = "&a <<< '\\u0000'";
3453    UErrorCode status = U_ZERO_ERROR;
3454    UChar rlz[RULE_BUFFER_LEN] = { 0 };
3455    uint32_t rlen = 0;
3456    UChar a = 1, null = 0;
3457    UCollationResult res = UCOL_EQUAL;
3458
3459    UCollator *coll = NULL;
3460
3461
3462    rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
3463    coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3464
3465    if(U_FAILURE(status)) {
3466        log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
3467    } else {
3468        res = ucol_strcoll(coll, &a, 1, &null, 1);
3469
3470        if(res != UCOL_LESS) {
3471            log_err("NULL was not tailored properly!\n");
3472        }
3473    }
3474
3475    ucol_close(coll);
3476}
3477
3478static void
3479TestUpperFirstQuaternary(void)
3480{
3481  const char* tests[] = { "B", "b", "Bb", "bB" };
3482  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
3483  UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
3484  genericLocaleStarterWithOptions("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att));
3485}
3486
3487static void
3488TestJ4960(void)
3489{
3490  const char* tests[] = { "\\u00e2T", "aT" };
3491  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
3492  UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
3493  const char* tests2[] = { "a", "A" };
3494  const char* rule = "&[first tertiary ignorable]=A=a";
3495  UColAttribute att2[] = { UCOL_CASE_LEVEL };
3496  UColAttributeValue attVals2[] = { UCOL_ON };
3497  /* Test whether we correctly ignore primary ignorables on case level when */
3498  /* we have only primary & case level */
3499  genericLocaleStarterWithOptionsAndResult("root", tests, UPRV_LENGTHOF(tests), att, attVals, UPRV_LENGTHOF(att), UCOL_EQUAL);
3500  /* Test whether ICU4J will make case level for sortkeys that have primary strength */
3501  /* and case level */
3502  genericLocaleStarterWithOptions("root", tests2, UPRV_LENGTHOF(tests2), att, attVals, UPRV_LENGTHOF(att));
3503  /* Test whether completely ignorable letters have case level info (they shouldn't) */
3504  genericRulesStarterWithOptionsAndResult(rule, tests2, UPRV_LENGTHOF(tests2), att2, attVals2, UPRV_LENGTHOF(att2), UCOL_EQUAL);
3505}
3506
3507static void
3508TestJ5223(void)
3509{
3510  static const char *test = "this is a test string";
3511  UChar ustr[256];
3512  int32_t ustr_length = u_unescape(test, ustr, 256);
3513  unsigned char sortkey[256];
3514  int32_t sortkey_length;
3515  UErrorCode status = U_ZERO_ERROR;
3516  static UCollator *coll = NULL;
3517  coll = ucol_open("root", &status);
3518  if(U_FAILURE(status)) {
3519    log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
3520    return;
3521  }
3522  ucol_setStrength(coll, UCOL_PRIMARY);
3523  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
3524  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3525  if (U_FAILURE(status)) {
3526    log_err("Failed setting atributes\n");
3527    return;
3528  }
3529  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
3530  if (sortkey_length > 256) return;
3531
3532  /* we mark the position where the null byte should be written in advance */
3533  sortkey[sortkey_length-1] = 0xAA;
3534
3535  /* we set the buffer size one byte higher than needed */
3536  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3537    sortkey_length+1);
3538
3539  /* no error occurs (for me) */
3540  if (sortkey[sortkey_length-1] == 0xAA) {
3541    log_err("Hit bug at first try\n");
3542  }
3543
3544  /* we mark the position where the null byte should be written again */
3545  sortkey[sortkey_length-1] = 0xAA;
3546
3547  /* this time we set the buffer size to the exact amount needed */
3548  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3549    sortkey_length);
3550
3551  /* now the trailing null byte is not written */
3552  if (sortkey[sortkey_length-1] == 0xAA) {
3553    log_err("Hit bug at second try\n");
3554  }
3555
3556  ucol_close(coll);
3557}
3558
3559/* Regression test for Thai partial sort key problem */
3560static void
3561TestJ5232(void)
3562{
3563    const static char *test[] = {
3564        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
3565        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
3566    };
3567
3568    genericLocaleStarter("th", test, UPRV_LENGTHOF(test));
3569}
3570
3571static void
3572TestJ5367(void)
3573{
3574    const static char *test[] = { "a", "y" };
3575    const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
3576    genericRulesStarter(rules, test, UPRV_LENGTHOF(test));
3577}
3578
3579static void
3580TestVI5913(void)
3581{
3582    UErrorCode status = U_ZERO_ERROR;
3583    int32_t i, j;
3584    UCollator *coll =NULL;
3585    uint8_t  resColl[100], expColl[100];
3586    int32_t  rLen, tLen, ruleLen, sLen, kLen;
3587    UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &b<0x1FF3-omega with Ypogegrammeni*/
3588    UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
3589    /*
3590     * Note: Just tailoring &z<ae^ does not work as expected:
3591     * The UCA spec requires for discontiguous contractions that they
3592     * extend an *existing match* by one combining mark at a time.
3593     * Therefore, ae must be a contraction so that the builder finds
3594     * discontiguous contractions for ae^, for example with an intervening underdot.
3595     * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
3596     */
3597    UChar rule3[256]={
3598        0x26, 0x78, 0x3c, 0x61, 0x65,      /* &x<ae */
3599        0x26, 0x7a, 0x3c, 0x0061, 0x00ea,  /* &z<a+e with circumflex.*/
3600        0};
3601    static const UChar tData[][20]={
3602        {0x1EAC, 0},
3603        {0x0041, 0x0323, 0x0302, 0},
3604        {0x1EA0, 0x0302, 0},
3605        {0x00C2, 0x0323, 0},
3606        {0x1ED8, 0},  /* O with dot and circumflex */
3607        {0x1ECC, 0x0302, 0},
3608        {0x1EB7, 0},
3609        {0x1EA1, 0x0306, 0},
3610    };
3611    static const UChar tailorData[][20]={
3612        {0x1FA2, 0},  /* Omega with 3 combining marks */
3613        {0x03C9, 0x0313, 0x0300, 0x0345, 0},
3614        {0x1FF3, 0x0313, 0x0300, 0},
3615        {0x1F60, 0x0300, 0x0345, 0},
3616        {0x1F62, 0x0345, 0},
3617        {0x1FA0, 0x0300, 0},
3618    };
3619    static const UChar tailorData2[][20]={
3620        {0x1E63, 0x030C, 0},  /* s with dot below + caron */
3621        {0x0073, 0x0323, 0x030C, 0},
3622        {0x0073, 0x030C, 0x0323, 0},
3623    };
3624    static const UChar tailorData3[][20]={
3625        {0x007a, 0},  /*  z */
3626        {0x0061, 0x0065, 0},  /*  a + e */
3627        {0x0061, 0x00ea, 0}, /* a + e with circumflex */
3628        {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
3629        {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
3630        {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
3631        {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
3632        {0x00EA, 0},  /* e with circumflex  */
3633    };
3634
3635    /* Test Vietnamese sort. */
3636    coll = ucol_open("vi", &status);
3637    if(U_FAILURE(status)) {
3638        log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
3639        return;
3640    }
3641    log_verbose("\n\nVI collation:");
3642    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
3643        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3644    }
3645    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
3646        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3647    }
3648    if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
3649        log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
3650    }
3651    if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
3652        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3653    }
3654
3655    for (j=0; j<8; j++) {
3656        tLen = u_strlen(tData[j]);
3657        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3658        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3659        for(i = 0; i<rLen; i++) {
3660            log_verbose(" %02X", resColl[i]);
3661        }
3662    }
3663
3664    ucol_close(coll);
3665
3666    /* Test Romanian sort. */
3667    coll = ucol_open("ro", &status);
3668    log_verbose("\n\nRO collation:");
3669    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
3670        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3671    }
3672    if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
3673        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3674    }
3675    if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
3676        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3677    }
3678
3679    for (j=4; j<8; j++) {
3680        tLen = u_strlen(tData[j]);
3681        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3682        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3683        for(i = 0; i<rLen; i++) {
3684            log_verbose(" %02X", resColl[i]);
3685        }
3686    }
3687    ucol_close(coll);
3688
3689    /* Test the precomposed Greek character with 3 combining marks. */
3690    log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
3691    ruleLen = u_strlen(rule);
3692    coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3693    if (U_FAILURE(status)) {
3694        log_err("ucol_openRules failed with %s\n", u_errorName(status));
3695        return;
3696    }
3697    sLen = u_strlen(tailorData[0]);
3698    for (j=1; j<6; j++) {
3699        tLen = u_strlen(tailorData[j]);
3700        if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
3701            log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
3702        }
3703    }
3704    /* Test getSortKey. */
3705    tLen = u_strlen(tailorData[0]);
3706    kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
3707    for (j=0; j<6; j++) {
3708        tLen = u_strlen(tailorData[j]);
3709        rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
3710        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3711            log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3712            for(i = 0; i<rLen; i++) {
3713                log_err(" %02X", resColl[i]);
3714            }
3715        }
3716    }
3717    ucol_close(coll);
3718
3719    log_verbose("\n\nTailoring test for s with caron:");
3720    ruleLen = u_strlen(rule2);
3721    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3722    tLen = u_strlen(tailorData2[0]);
3723    kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
3724    for (j=1; j<3; j++) {
3725        tLen = u_strlen(tailorData2[j]);
3726        rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
3727        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3728            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3729            for(i = 0; i<rLen; i++) {
3730                log_err(" %02X", resColl[i]);
3731            }
3732        }
3733    }
3734    ucol_close(coll);
3735
3736    log_verbose("\n\nTailoring test for &z< ae with circumflex:");
3737    ruleLen = u_strlen(rule3);
3738    coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3739    tLen = u_strlen(tailorData3[3]);
3740    kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
3741    log_verbose("\n Test Data[3] :%s  \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
3742    for(i = 0; i<kLen; i++) {
3743        log_verbose(" %02X", expColl[i]);
3744    }
3745    for (j=4; j<6; j++) {
3746        tLen = u_strlen(tailorData3[j]);
3747        rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
3748
3749        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3750            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3751            for(i = 0; i<rLen; i++) {
3752                log_err(" %02X", resColl[i]);
3753            }
3754        }
3755
3756        log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3757         for(i = 0; i<rLen; i++) {
3758             log_verbose(" %02X", resColl[i]);
3759         }
3760    }
3761    ucol_close(coll);
3762}
3763
3764static void
3765TestTailor6179(void)
3766{
3767    UErrorCode status = U_ZERO_ERROR;
3768    int32_t i;
3769    UCollator *coll =NULL;
3770    uint8_t  resColl[100];
3771    int32_t  rLen, tLen, ruleLen;
3772    /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
3773    static const UChar rule1[]={
3774            0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
3775            0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
3776            0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
3777            0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
3778    /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
3779    static const UChar rule2[]={
3780            0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
3781            0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
3782            0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
3783            0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
3784            0x3C,0x3C,0x20,0x62,0};
3785
3786    static const UChar tData1[][4]={
3787        {0x61, 0},
3788        {0x62, 0},
3789        { 0xFDD0,0x009E, 0}
3790    };
3791    static const UChar tData2[][4]={
3792        {0x61, 0},
3793        {0x62, 0},
3794        { 0xFDD0,0x009E, 0}
3795     };
3796
3797    /*
3798     * These values from FractionalUCA.txt will change,
3799     * and need to be updated here.
3800     * TODO: Make this not check for particular sort keys.
3801     * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
3802     */
3803    static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
3804    static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
3805    static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
3806    static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
3807
3808    UParseError parseError;
3809
3810    /* Test [Last Primary ignorable] */
3811
3812    log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
3813    ruleLen = u_strlen(rule1);
3814    coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3815    if (U_FAILURE(status)) {
3816        log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
3817        return;
3818    }
3819    tLen = u_strlen(tData1[0]);
3820    rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
3821    if (rLen != UPRV_LENGTHOF(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
3822        log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
3823        for(i = 0; i<rLen; i++) {
3824            log_err(" %02X", resColl[i]);
3825        }
3826        log_err("\n");
3827    }
3828    tLen = u_strlen(tData1[1]);
3829    rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
3830    if (rLen != UPRV_LENGTHOF(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
3831        log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
3832        for(i = 0; i<rLen; i++) {
3833            log_err(" %02X", resColl[i]);
3834        }
3835        log_err("\n");
3836    }
3837    ucol_close(coll);
3838
3839
3840    /* Test [Last Secondary ignorable] */
3841    log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
3842    ruleLen = u_strlen(rule2);
3843    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
3844    if (U_FAILURE(status)) {
3845        log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
3846        log_info("  offset=%d  \"%s\" | \"%s\"\n",
3847                 parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
3848        return;
3849    }
3850    tLen = u_strlen(tData2[0]);
3851    rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
3852    if (rLen != UPRV_LENGTHOF(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
3853        log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
3854        for(i = 0; i<rLen; i++) {
3855            log_err(" %02X", resColl[i]);
3856        }
3857        log_err("\n");
3858    }
3859    tLen = u_strlen(tData2[1]);
3860    rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
3861    if (rLen != UPRV_LENGTHOF(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
3862      log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
3863      for(i = 0; i<rLen; i++) {
3864        log_err(" %02X", resColl[i]);
3865      }
3866      log_err("\n");
3867    }
3868    ucol_close(coll);
3869}
3870
3871static void
3872TestUCAPrecontext(void)
3873{
3874    UErrorCode status = U_ZERO_ERROR;
3875    int32_t i, j;
3876    UCollator *coll =NULL;
3877    uint8_t  resColl[100], prevColl[100];
3878    int32_t  rLen, tLen, ruleLen;
3879    UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
3880    UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
3881    /* & l middle-dot << a  a is an expansion. */
3882
3883    UChar tData1[][20]={
3884            { 0xb7, 0},  /* standalone middle dot(0xb7) */
3885            { 0x387, 0}, /* standalone middle dot(0x387) */
3886            { 0x61, 0},  /* a */
3887            { 0x6C, 0},  /* l */
3888            { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
3889            { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
3890            { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
3891            { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
3892            { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
3893            { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
3894            { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
3895     };
3896
3897    log_verbose("\n\nEN collation:");
3898    coll = ucol_open("en", &status);
3899    if (U_FAILURE(status)) {
3900        log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
3901        return;
3902    }
3903    for (j=0; j<11; j++) {
3904        tLen = u_strlen(tData1[j]);
3905        rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3906        if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3907            log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3908                    j, tData1[j]);
3909        }
3910        log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3911        for(i = 0; i<rLen; i++) {
3912            log_verbose(" %02X", resColl[i]);
3913        }
3914        uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3915     }
3916     ucol_close(coll);
3917
3918
3919     log_verbose("\n\nJA collation:");
3920     coll = ucol_open("ja", &status);
3921     if (U_FAILURE(status)) {
3922         log_err("Tailoring test: &z <<a|- failed!");
3923         return;
3924     }
3925     for (j=0; j<11; j++) {
3926         tLen = u_strlen(tData1[j]);
3927         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3928         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3929             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3930                     j, tData1[j]);
3931         }
3932         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3933         for(i = 0; i<rLen; i++) {
3934             log_verbose(" %02X", resColl[i]);
3935         }
3936         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3937      }
3938      ucol_close(coll);
3939
3940
3941      log_verbose("\n\nTailoring test: & middle dot < a ");
3942      ruleLen = u_strlen(rule1);
3943      coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3944      if (U_FAILURE(status)) {
3945          log_err("Tailoring test: & middle dot < a failed!");
3946          return;
3947      }
3948      for (j=0; j<11; j++) {
3949          tLen = u_strlen(tData1[j]);
3950          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3951          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3952              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3953                      j, tData1[j]);
3954          }
3955          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3956          for(i = 0; i<rLen; i++) {
3957              log_verbose(" %02X", resColl[i]);
3958          }
3959          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3960       }
3961       ucol_close(coll);
3962
3963
3964       log_verbose("\n\nTailoring test: & l middle-dot << a ");
3965       ruleLen = u_strlen(rule2);
3966       coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3967       if (U_FAILURE(status)) {
3968           log_err("Tailoring test: & l middle-dot << a failed!");
3969           return;
3970       }
3971       for (j=0; j<11; j++) {
3972           tLen = u_strlen(tData1[j]);
3973           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3974           if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3975               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3976                       j, tData1[j]);
3977           }
3978           if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
3979               log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
3980                       j, tData1[j]);
3981           }
3982           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3983           for(i = 0; i<rLen; i++) {
3984               log_verbose(" %02X", resColl[i]);
3985           }
3986           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3987        }
3988        ucol_close(coll);
3989}
3990
3991static void
3992TestOutOfBuffer5468(void)
3993{
3994    static const char *test = "\\u4e00";
3995    UChar ustr[256];
3996    int32_t ustr_length = u_unescape(test, ustr, 256);
3997    unsigned char shortKeyBuf[1];
3998    int32_t sortkey_length;
3999    UErrorCode status = U_ZERO_ERROR;
4000    static UCollator *coll = NULL;
4001
4002    coll = ucol_open("root", &status);
4003    if(U_FAILURE(status)) {
4004      log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4005      return;
4006    }
4007    ucol_setStrength(coll, UCOL_PRIMARY);
4008    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4009    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4010    if (U_FAILURE(status)) {
4011      log_err("Failed setting atributes\n");
4012      return;
4013    }
4014
4015    sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
4016    if (sortkey_length != 4) {
4017        log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
4018    }
4019    log_verbose("length of sortKey is %d", sortkey_length);
4020    ucol_close(coll);
4021}
4022
4023#define TSKC_DATA_SIZE 5
4024#define TSKC_BUF_SIZE  50
4025static void
4026TestSortKeyConsistency(void)
4027{
4028    UErrorCode icuRC = U_ZERO_ERROR;
4029    UCollator* ucol;
4030    UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4031
4032    uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4033    uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4034    int32_t i, j, i2;
4035
4036    ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
4037    if (U_FAILURE(icuRC))
4038    {
4039        log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
4040        return;
4041    }
4042
4043    for (i = 0; i < TSKC_DATA_SIZE; i++)
4044    {
4045        UCharIterator uiter;
4046        uint32_t state[2] = { 0, 0 };
4047        int32_t dataLen = i+1;
4048        for (j=0; j<TSKC_BUF_SIZE; j++)
4049            bufFull[i][j] = bufPart[i][j] = 0;
4050
4051        /* Full sort key */
4052        ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
4053
4054        /* Partial sort key */
4055        uiter_setString(&uiter, data, dataLen);
4056        ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
4057        if (U_FAILURE(icuRC))
4058        {
4059            log_err("ucol_nextSortKeyPart failed\n");
4060            ucol_close(ucol);
4061            return;
4062        }
4063
4064        for (i2=0; i2<i; i2++)
4065        {
4066            UBool fullMatch = TRUE;
4067            UBool partMatch = TRUE;
4068            for (j=0; j<TSKC_BUF_SIZE; j++)
4069            {
4070                fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
4071                partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
4072            }
4073            if (fullMatch != partMatch) {
4074                log_err(fullMatch ? "full key was consistent, but partial key changed\n"
4075                                  : "partial key was consistent, but full key changed\n");
4076                ucol_close(ucol);
4077                return;
4078            }
4079        }
4080    }
4081
4082    /*=============================================*/
4083   ucol_close(ucol);
4084}
4085
4086/* ticket: 6101 */
4087static void TestCroatianSortKey(void) {
4088    const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
4089    UErrorCode status = U_ZERO_ERROR;
4090    UCollator *ucol;
4091    UCharIterator iter;
4092
4093    static const UChar text[] = { 0x0044, 0xD81A };
4094
4095    size_t length = UPRV_LENGTHOF(text);
4096
4097    uint8_t textSortKey[32];
4098    size_t lenSortKey = 32;
4099    size_t actualSortKeyLen;
4100    uint32_t uStateInfo[2] = { 0, 0 };
4101
4102    ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
4103    if (U_FAILURE(status)) {
4104        log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
4105        return;
4106    }
4107
4108    uiter_setString(&iter, text, length);
4109
4110    actualSortKeyLen = ucol_nextSortKeyPart(
4111        ucol, &iter, (uint32_t*)uStateInfo,
4112        textSortKey, lenSortKey, &status
4113        );
4114
4115    if (actualSortKeyLen == lenSortKey) {
4116        log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
4117    }
4118
4119    ucol_close(ucol);
4120}
4121
4122/* ticket: 6140 */
4123/* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
4124 * they are both Hiragana and Katakana
4125 */
4126#define SORTKEYLEN 50
4127static void TestHiragana(void) {
4128    UErrorCode status = U_ZERO_ERROR;
4129    UCollator* ucol;
4130    UCollationResult strcollresult;
4131    UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
4132    UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
4133    int32_t data1Len = UPRV_LENGTHOF(data1);
4134    int32_t data2Len = UPRV_LENGTHOF(data2);
4135    int32_t i, j;
4136    uint8_t sortKey1[SORTKEYLEN];
4137    uint8_t sortKey2[SORTKEYLEN];
4138
4139    UCharIterator uiter1;
4140    UCharIterator uiter2;
4141    uint32_t state1[2] = { 0, 0 };
4142    uint32_t state2[2] = { 0, 0 };
4143    int32_t keySize1;
4144    int32_t keySize2;
4145
4146    ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
4147            &status);
4148    if (U_FAILURE(status)) {
4149        log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
4150        return;
4151    }
4152
4153    /* Start of full sort keys */
4154    /* Full sort key1 */
4155    keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
4156    /* Full sort key2 */
4157    keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
4158    if (keySize1 == keySize2) {
4159        for (i = 0; i < keySize1; i++) {
4160            if (sortKey1[i] != sortKey2[i]) {
4161                log_err("Full sort keys are different. Should be equal.");
4162            }
4163        }
4164    } else {
4165        log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
4166    }
4167    /* End of full sort keys */
4168
4169    /* Start of partial sort keys */
4170    /* Partial sort key1 */
4171    uiter_setString(&uiter1, data1, data1Len);
4172    keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
4173    /* Partial sort key2 */
4174    uiter_setString(&uiter2, data2, data2Len);
4175    keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
4176    if (U_SUCCESS(status) && keySize1 == keySize2) {
4177        for (j = 0; j < keySize1; j++) {
4178            if (sortKey1[j] != sortKey2[j]) {
4179                log_err("Partial sort keys are different. Should be equal");
4180            }
4181        }
4182    } else {
4183        log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
4184    }
4185    /* End of partial sort keys */
4186
4187    /* Start of strcoll */
4188    /* Use ucol_strcoll() to determine ordering */
4189    strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
4190    if (strcollresult != UCOL_EQUAL) {
4191        log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
4192    }
4193
4194    ucol_close(ucol);
4195}
4196
4197/* Convenient struct for running collation tests */
4198typedef struct {
4199  const UChar source[MAX_TOKEN_LEN];  /* String on left */
4200  const UChar target[MAX_TOKEN_LEN];  /* String on right */
4201  UCollationResult result;            /* -1, 0 or +1, depending on collation */
4202} OneTestCase;
4203
4204/*
4205 * Utility function to test one collation test case.
4206 * @param testcases Array of test cases.
4207 * @param n_testcases Size of the array testcases.
4208 * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
4209 * @param n_rules Size of the array str_rules.
4210 */
4211static void doTestOneTestCase(const OneTestCase testcases[],
4212                              int n_testcases,
4213                              const char* str_rules[],
4214                              int n_rules)
4215{
4216  int rule_no, testcase_no;
4217  UChar rule[500];
4218  int32_t length = 0;
4219  UErrorCode status = U_ZERO_ERROR;
4220  UParseError parse_error;
4221  UCollator  *myCollation;
4222
4223  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4224
4225    length = u_unescape(str_rules[rule_no], rule, 500);
4226    if (length == 0) {
4227        log_err("ERROR: The rule cannot be unescaped: %s\n");
4228        return;
4229    }
4230    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4231    if(U_FAILURE(status)){
4232        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4233        log_info("  offset=%d  \"%s\" | \"%s\"\n",
4234                 parse_error.offset,
4235                 aescstrdup(parse_error.preContext, -1),
4236                 aescstrdup(parse_error.postContext, -1));
4237        return;
4238    }
4239    log_verbose("Testing the <<* syntax\n");
4240    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4241    ucol_setStrength(myCollation, UCOL_TERTIARY);
4242    for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
4243      doTest(myCollation,
4244             testcases[testcase_no].source,
4245             testcases[testcase_no].target,
4246             testcases[testcase_no].result
4247             );
4248    }
4249    ucol_close(myCollation);
4250  }
4251}
4252
4253const static OneTestCase rangeTestcases[] = {
4254  { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
4255  { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
4256  { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
4257
4258  { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
4259  { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
4260  { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
4261  { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
4262  { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
4263
4264  { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
4265  { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
4266  { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
4267  { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
4268
4269  { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
4270  { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
4271  { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
4272  { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
4273  { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
4274  { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
4275  { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
4276  { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
4277};
4278
4279static int nRangeTestcases = UPRV_LENGTHOF(rangeTestcases);
4280
4281const static OneTestCase rangeTestcasesSupplemental[] = {
4282  { {0x4e00},                            {0xfffb},                          UCOL_LESS }, /* U+4E00 < U+FFFB */
4283  { {0xfffb},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFB < U+10000 */
4284  { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
4285  { {0x4e00},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4286  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4287  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4288  { {0x4e00},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4289};
4290
4291static int nRangeTestcasesSupplemental = UPRV_LENGTHOF(rangeTestcasesSupplemental);
4292
4293const static OneTestCase rangeTestcasesQwerty[] = {
4294  { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
4295  { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
4296
4297  { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
4298  { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
4299
4300  { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
4301  { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
4302
4303  { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
4304  { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
4305
4306  { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
4307    {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
4308  { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
4309    {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
4310};
4311
4312static int nRangeTestcasesQwerty = UPRV_LENGTHOF(rangeTestcasesQwerty);
4313
4314static void TestSameStrengthList(void)
4315{
4316  const char* strRules[] = {
4317    /* Normal */
4318    "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
4319
4320    /* Lists */
4321    "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
4322  };
4323  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4324}
4325
4326static void TestSameStrengthListQuoted(void)
4327{
4328  const char* strRules[] = {
4329    /* Lists with quoted characters */
4330    "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
4331    "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
4332
4333    "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
4334    "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
4335
4336    "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
4337    "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
4338  };
4339  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4340}
4341
4342static void TestSameStrengthListSupplemental(void)
4343{
4344  const char* strRules[] = {
4345    "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
4346    "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
4347    "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
4348    "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
4349  };
4350  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4351}
4352
4353static void TestSameStrengthListQwerty(void)
4354{
4355  const char* strRules[] = {
4356    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4357    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4358    "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
4359    "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
4360    "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
4361
4362    /* Quoted characters also will work if two quoted characters are not consecutive.  */
4363    "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
4364
4365    /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
4366    /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
4367
4368 };
4369  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4370}
4371
4372static void TestSameStrengthListQuotedQwerty(void)
4373{
4374  const char* strRules[] = {
4375    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4376    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4377    "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
4378
4379    /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
4380    /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
4381   };
4382  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, UPRV_LENGTHOF(strRules));
4383}
4384
4385static void TestSameStrengthListRanges(void)
4386{
4387  const char* strRules[] = {
4388    "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
4389  };
4390  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, UPRV_LENGTHOF(strRules));
4391}
4392
4393static void TestSameStrengthListSupplementalRanges(void)
4394{
4395  const char* strRules[] = {
4396    /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4397    "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
4398  };
4399  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, UPRV_LENGTHOF(strRules));
4400}
4401
4402static void TestSpecialCharacters(void)
4403{
4404  const char* strRules[] = {
4405    /* Normal */
4406    "&';'<'+'<','<'-'<'&'<'*'",
4407
4408    /* List */
4409    "&';'<*'+,-&*'",
4410
4411    /* Range */
4412    "&';'<*'+'-'-&*'",
4413  };
4414
4415  const static OneTestCase specialCharacterStrings[] = {
4416    { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
4417    { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
4418    { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
4419    { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
4420  };
4421  doTestOneTestCase(specialCharacterStrings, UPRV_LENGTHOF(specialCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4422}
4423
4424static void TestPrivateUseCharacters(void)
4425{
4426  const char* strRules[] = {
4427    /* Normal */
4428    "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
4429    "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
4430  };
4431
4432  const static OneTestCase privateUseCharacterStrings[] = {
4433    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4434    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4435    { {0xe2d9}, {0xe2da}, UCOL_LESS },
4436    { {0xe2da}, {0xe2db}, UCOL_LESS },
4437    { {0xe2db}, {0xe2dc}, UCOL_LESS },
4438    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4439  };
4440  doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4441}
4442
4443static void TestPrivateUseCharactersInList(void)
4444{
4445  const char* strRules[] = {
4446    /* List */
4447    "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
4448    /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
4449    "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
4450  };
4451
4452  const static OneTestCase privateUseCharacterStrings[] = {
4453    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4454    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4455    { {0xe2d9}, {0xe2da}, UCOL_LESS },
4456    { {0xe2da}, {0xe2db}, UCOL_LESS },
4457    { {0xe2db}, {0xe2dc}, UCOL_LESS },
4458    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4459  };
4460  doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4461}
4462
4463static void TestPrivateUseCharactersInRange(void)
4464{
4465  const char* strRules[] = {
4466    /* Range */
4467    "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
4468    "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
4469    /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
4470  };
4471
4472  const static OneTestCase privateUseCharacterStrings[] = {
4473    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4474    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4475    { {0xe2d9}, {0xe2da}, UCOL_LESS },
4476    { {0xe2da}, {0xe2db}, UCOL_LESS },
4477    { {0xe2db}, {0xe2dc}, UCOL_LESS },
4478    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4479  };
4480  doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
4481}
4482
4483static void TestInvalidListsAndRanges(void)
4484{
4485  const char* invalidRules[] = {
4486    /* Range not in starred expression */
4487    "&\\ufffe<\\uffff-\\U00010002",
4488
4489    /* Range without start */
4490    "&a<*-c",
4491
4492    /* Range without end */
4493    "&a<*b-",
4494
4495    /* More than one hyphen */
4496    "&a<*b-g-l",
4497
4498    /* Range in the wrong order */
4499    "&a<*k-b",
4500
4501  };
4502
4503  UChar rule[500];
4504  UErrorCode status = U_ZERO_ERROR;
4505  UParseError parse_error;
4506  int n_rules = UPRV_LENGTHOF(invalidRules);
4507  int rule_no;
4508  int length;
4509  UCollator  *myCollation;
4510
4511  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4512
4513    length = u_unescape(invalidRules[rule_no], rule, 500);
4514    if (length == 0) {
4515        log_err("ERROR: The rule cannot be unescaped: %s\n");
4516        return;
4517    }
4518    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4519    (void)myCollation;      /* Suppress set but not used warning. */
4520    if(!U_FAILURE(status)){
4521      log_err("ERROR: Could not cause a failure as expected: \n");
4522    }
4523    status = U_ZERO_ERROR;
4524  }
4525}
4526
4527/*
4528 * This test ensures that characters placed before a character in a different script have the same lead byte
4529 * in their collation key before and after script reordering.
4530 */
4531static void TestBeforeRuleWithScriptReordering(void)
4532{
4533    UParseError error;
4534    UErrorCode status = U_ZERO_ERROR;
4535    UCollator  *myCollation;
4536    char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
4537    UChar rules[500];
4538    uint32_t rulesLength = 0;
4539    int32_t reorderCodes[1] = {USCRIPT_GREEK};
4540    UCollationResult collResult;
4541
4542    uint8_t baseKey[256];
4543    uint32_t baseKeyLength;
4544    uint8_t beforeKey[256];
4545    uint32_t beforeKeyLength;
4546
4547    UChar base[] = { 0x03b1 }; /* base */
4548    int32_t baseLen = UPRV_LENGTHOF(base);
4549
4550    UChar before[] = { 0x0e01 }; /* ko kai */
4551    int32_t beforeLen = UPRV_LENGTHOF(before);
4552
4553    /*UChar *data[] = { before, base };
4554    genericRulesStarter(srules, data, 2);*/
4555
4556    log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
4557
4558    (void)beforeKeyLength;   /* Suppress set but not used warnings. */
4559    (void)baseKeyLength;
4560
4561    /* build collator */
4562    log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
4563
4564    rulesLength = u_unescape(srules, rules, UPRV_LENGTHOF(rules));
4565    myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
4566    if(U_FAILURE(status)) {
4567        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4568        return;
4569    }
4570
4571    /* check collation results - before rule applied but not script reordering */
4572    collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4573    if (collResult != UCOL_GREATER) {
4574        log_err("Collation result not correct before script reordering = %d\n", collResult);
4575    }
4576
4577    /* check the lead byte of the collation keys before script reordering */
4578    baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4579    beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4580    if (baseKey[0] != beforeKey[0]) {
4581      log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4582   }
4583
4584    /* reorder the scripts */
4585    ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
4586    if(U_FAILURE(status)) {
4587        log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
4588        return;
4589    }
4590
4591    /* check collation results - before rule applied and after script reordering */
4592    collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4593    if (collResult != UCOL_GREATER) {
4594        log_err("Collation result not correct after script reordering = %d\n", collResult);
4595    }
4596
4597    /* check the lead byte of the collation keys after script reordering */
4598    ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4599    ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4600    if (baseKey[0] != beforeKey[0]) {
4601        log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4602    }
4603
4604    ucol_close(myCollation);
4605}
4606
4607/*
4608 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
4609 */
4610static void TestNonLeadBytesDuringCollationReordering(void)
4611{
4612    UErrorCode status = U_ZERO_ERROR;
4613    UCollator  *myCollation;
4614    int32_t reorderCodes[1] = {USCRIPT_GREEK};
4615
4616    uint8_t baseKey[256];
4617    uint32_t baseKeyLength;
4618    uint8_t reorderKey[256];
4619    uint32_t reorderKeyLength;
4620
4621    UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
4622
4623    uint32_t i;
4624
4625
4626    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4627
4628    /* build collator tertiary */
4629    myCollation = ucol_open("", &status);
4630    ucol_setStrength(myCollation, UCOL_TERTIARY);
4631    if(U_FAILURE(status)) {
4632        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4633        return;
4634    }
4635    baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4636
4637    ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4638    if(U_FAILURE(status)) {
4639        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4640        return;
4641    }
4642    reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4643
4644    if (baseKeyLength != reorderKeyLength) {
4645        log_err("Key lengths not the same during reordering.\n");
4646        return;
4647    }
4648
4649    for (i = 1; i < baseKeyLength; i++) {
4650        if (baseKey[i] != reorderKey[i]) {
4651            log_err("Collation key bytes not the same at position %d.\n", i);
4652            return;
4653        }
4654    }
4655    ucol_close(myCollation);
4656
4657    /* build collator quaternary */
4658    myCollation = ucol_open("", &status);
4659    ucol_setStrength(myCollation, UCOL_QUATERNARY);
4660    if(U_FAILURE(status)) {
4661        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4662        return;
4663    }
4664    baseKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), baseKey, 256);
4665
4666    ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4667    if(U_FAILURE(status)) {
4668        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4669        return;
4670    }
4671    reorderKeyLength = ucol_getSortKey(myCollation, testString, UPRV_LENGTHOF(testString), reorderKey, 256);
4672
4673    if (baseKeyLength != reorderKeyLength) {
4674        log_err("Key lengths not the same during reordering.\n");
4675        return;
4676    }
4677
4678    for (i = 1; i < baseKeyLength; i++) {
4679        if (baseKey[i] != reorderKey[i]) {
4680            log_err("Collation key bytes not the same at position %d.\n", i);
4681            return;
4682        }
4683    }
4684    ucol_close(myCollation);
4685}
4686
4687/*
4688 * Test reordering API.
4689 */
4690static void TestReorderingAPI(void)
4691{
4692    UErrorCode status = U_ZERO_ERROR;
4693    UCollator  *myCollation;
4694    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4695    int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_KATAKANA};
4696    int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4697    int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;
4698    UCollationResult collResult;
4699    int32_t retrievedReorderCodesLength;
4700    int32_t retrievedReorderCodes[10];
4701    UChar greekString[] = { 0x03b1 };
4702    UChar punctuationString[] = { 0x203e };
4703    int loopIndex;
4704
4705    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4706
4707    /* build collator tertiary */
4708    myCollation = ucol_open("", &status);
4709    ucol_setStrength(myCollation, UCOL_TERTIARY);
4710    if(U_FAILURE(status)) {
4711        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4712        return;
4713    }
4714
4715    /* set the reorderding */
4716    ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4717    if (U_FAILURE(status)) {
4718        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4719        return;
4720    }
4721
4722    /* get the reordering */
4723    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4724    if (status != U_BUFFER_OVERFLOW_ERROR) {
4725        log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4726        return;
4727    }
4728    status = U_ZERO_ERROR;
4729    if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4730        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4731        return;
4732    }
4733    /* now let's really get it */
4734    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4735    if (U_FAILURE(status)) {
4736        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4737        return;
4738    }
4739    if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4740        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4741        return;
4742    }
4743    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4744        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4745            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4746            return;
4747        }
4748    }
4749    collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4750    if (collResult != UCOL_LESS) {
4751        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4752        return;
4753    }
4754
4755    /* clear the reordering */
4756    ucol_setReorderCodes(myCollation, NULL, 0, &status);
4757    if (U_FAILURE(status)) {
4758        log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4759        return;
4760    }
4761
4762    /* get the reordering again */
4763    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4764    if (retrievedReorderCodesLength != 0) {
4765        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4766        return;
4767    }
4768
4769    collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4770    if (collResult != UCOL_GREATER) {
4771        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4772        return;
4773    }
4774
4775    /* clear the reordering using [NONE] */
4776    ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);
4777    if (U_FAILURE(status)) {
4778        log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", myErrorName(status));
4779        return;
4780    }
4781
4782    /* get the reordering again */
4783    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4784    if (retrievedReorderCodesLength != 0) {
4785        log_err_status(status,
4786                       "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",
4787                       retrievedReorderCodesLength);
4788        return;
4789    }
4790
4791    /* test for error condition on duplicate reorder codes */
4792    ucol_setReorderCodes(myCollation, duplicateReorderCodes, UPRV_LENGTHOF(duplicateReorderCodes), &status);
4793    if (!U_FAILURE(status)) {
4794        log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
4795        return;
4796    }
4797
4798    status = U_ZERO_ERROR;
4799    /* test for reorder codes after a reset code */
4800    ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, UPRV_LENGTHOF(reorderCodesStartingWithDefault), &status);
4801    if (!U_FAILURE(status)) {
4802        log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
4803        return;
4804    }
4805
4806    ucol_close(myCollation);
4807}
4808
4809/*
4810 * Test reordering API.
4811 */
4812static void TestReorderingAPIWithRuleCreatedCollator(void)
4813{
4814    UErrorCode status = U_ZERO_ERROR;
4815    UCollator  *myCollation;
4816    UChar rules[90];
4817    static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
4818    static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4819    static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
4820    UCollationResult collResult;
4821    int32_t retrievedReorderCodesLength;
4822    int32_t retrievedReorderCodes[10];
4823    static const UChar greekString[] = { 0x03b1 };
4824    static const UChar punctuationString[] = { 0x203e };
4825    static const UChar hanString[] = { 0x65E5, 0x672C };
4826    int loopIndex;
4827
4828    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4829
4830    /* build collator from rules */
4831    u_uastrcpy(rules, "[reorder Hani Grek]");
4832    myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
4833    if(U_FAILURE(status)) {
4834        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4835        return;
4836    }
4837
4838    /* get the reordering */
4839    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4840    if (U_FAILURE(status)) {
4841        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4842        return;
4843    }
4844    if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4845        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4846        return;
4847    }
4848    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4849        if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4850            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4851            return;
4852        }
4853    }
4854    collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), hanString, UPRV_LENGTHOF(hanString));
4855    if (collResult != UCOL_GREATER) {
4856        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4857        return;
4858    }
4859
4860    /* set the reordering */
4861    ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
4862    if (U_FAILURE(status)) {
4863        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4864        return;
4865    }
4866
4867    /* get the reordering */
4868    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4869    if (status != U_BUFFER_OVERFLOW_ERROR) {
4870        log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4871        return;
4872    }
4873    status = U_ZERO_ERROR;
4874    if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4875        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4876        return;
4877    }
4878    /* now let's really get it */
4879    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4880    if (U_FAILURE(status)) {
4881        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4882        return;
4883    }
4884    if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
4885        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
4886        return;
4887    }
4888    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4889        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4890            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4891            return;
4892        }
4893    }
4894    collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4895    if (collResult != UCOL_LESS) {
4896        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4897        return;
4898    }
4899
4900    /* clear the reordering */
4901    ucol_setReorderCodes(myCollation, NULL, 0, &status);
4902    if (U_FAILURE(status)) {
4903        log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4904        return;
4905    }
4906
4907    /* get the reordering again */
4908    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4909    if (retrievedReorderCodesLength != 0) {
4910        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4911        return;
4912    }
4913
4914    collResult = ucol_strcoll(myCollation, greekString, UPRV_LENGTHOF(greekString), punctuationString, UPRV_LENGTHOF(punctuationString));
4915    if (collResult != UCOL_GREATER) {
4916        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4917        return;
4918    }
4919
4920    /* reset the reordering */
4921    ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
4922    if (U_FAILURE(status)) {
4923        log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
4924        return;
4925    }
4926    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
4927    if (U_FAILURE(status)) {
4928        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4929        return;
4930    }
4931    if (retrievedReorderCodesLength != UPRV_LENGTHOF(rulesReorderCodes)) {
4932        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(rulesReorderCodes));
4933        return;
4934    }
4935    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4936        if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4937            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4938            return;
4939        }
4940    }
4941
4942    ucol_close(myCollation);
4943}
4944
4945static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int32_t expectedScript) {
4946    int32_t i;
4947    for (i = 0; i < length; ++i) {
4948        if (expectedScript == scripts[i]) { return TRUE; }
4949    }
4950    return FALSE;
4951}
4952
4953static void TestEquivalentReorderingScripts(void) {
4954    // Beginning with ICU 55, collation reordering moves single scripts
4955    // rather than groups of scripts,
4956    // except where scripts share a range and sort primary-equal.
4957    UErrorCode status = U_ZERO_ERROR;
4958    int32_t equivalentScripts[100];
4959    int32_t length;
4960    int i;
4961    int32_t prevScript;
4962    /* These scripts are expected to be equivalent. */
4963    static const int32_t expectedScripts[] = {
4964        USCRIPT_HIRAGANA,
4965        USCRIPT_KATAKANA,
4966        USCRIPT_KATAKANA_OR_HIRAGANA
4967    };
4968
4969    equivalentScripts[0] = 0;
4970    length = ucol_getEquivalentReorderCodes(
4971            USCRIPT_GOTHIC, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4972    if (U_FAILURE(status)) {
4973        log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4974        return;
4975    }
4976    if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) {
4977        log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "
4978                "length expected 1, was = %d; expected [%d] was [%d]\n",
4979                length, USCRIPT_GOTHIC, equivalentScripts[0]);
4980    }
4981
4982    length = ucol_getEquivalentReorderCodes(
4983            USCRIPT_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
4984    if (U_FAILURE(status)) {
4985        log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4986        return;
4987    }
4988    if (length != UPRV_LENGTHOF(expectedScripts)) {
4989        log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "
4990                "expected %d, was = %d\n",
4991                UPRV_LENGTHOF(expectedScripts), length);
4992    }
4993    prevScript = -1;
4994    for (i = 0; i < length; ++i) {
4995        int32_t script = equivalentScripts[i];
4996        if (script <= prevScript) {
4997            log_err("ERROR/Hiragana: equivalent scripts out of order at index %d\n", i);
4998        }
4999        prevScript = script;
5000    }
5001    for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
5002        if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5003            log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",
5004                    expectedScripts[i]);
5005        }
5006    }
5007
5008    length = ucol_getEquivalentReorderCodes(
5009            USCRIPT_KATAKANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5010    if (U_FAILURE(status)) {
5011        log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder codes: %s\n", myErrorName(status));
5012        return;
5013    }
5014    if (length != UPRV_LENGTHOF(expectedScripts)) {
5015        log_err("ERROR/Katakana: retrieved equivalent script length wrong: "
5016                "expected %d, was = %d\n",
5017                UPRV_LENGTHOF(expectedScripts), length);
5018    }
5019    for (i = 0; i < UPRV_LENGTHOF(expectedScripts); i++) {
5020        if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i])) {
5021            log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",
5022                    expectedScripts[i]);
5023        }
5024    }
5025
5026    length = ucol_getEquivalentReorderCodes(
5027            USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5028    if (U_FAILURE(status) || length != UPRV_LENGTHOF(expectedScripts)) {
5029        log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "
5030                "expected %d, was = %d\n",
5031                UPRV_LENGTHOF(expectedScripts), length);
5032    }
5033
5034    length = ucol_getEquivalentReorderCodes(
5035            USCRIPT_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5036    if (U_FAILURE(status) || length != 3) {
5037        log_err("ERROR/Hani: retrieved equivalent script length wrong: "
5038                "expected 3, was = %d\n", length);
5039    }
5040    length = ucol_getEquivalentReorderCodes(
5041            USCRIPT_SIMPLIFIED_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5042    if (U_FAILURE(status) || length != 3) {
5043        log_err("ERROR/Hans: retrieved equivalent script length wrong: "
5044                "expected 3, was = %d\n", length);
5045    }
5046    length = ucol_getEquivalentReorderCodes(
5047            USCRIPT_TRADITIONAL_HAN, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5048    if (U_FAILURE(status) || length != 3) {
5049        log_err("ERROR/Hant: retrieved equivalent script length wrong: "
5050                "expected 3, was = %d\n", length);
5051    }
5052
5053    length = ucol_getEquivalentReorderCodes(
5054            USCRIPT_MEROITIC_CURSIVE, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5055    if (U_FAILURE(status) || length != 2) {
5056        log_err("ERROR/Merc: retrieved equivalent script length wrong: "
5057                "expected 2, was = %d\n", length);
5058    }
5059    length = ucol_getEquivalentReorderCodes(
5060            USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, UPRV_LENGTHOF(equivalentScripts), &status);
5061    if (U_FAILURE(status) || length != 2) {
5062        log_err("ERROR/Mero: retrieved equivalent script length wrong: "
5063                "expected 2, was = %d\n", length);
5064    }
5065}
5066
5067static void TestReorderingAcrossCloning(void)
5068{
5069    UErrorCode status = U_ZERO_ERROR;
5070    UCollator  *myCollation;
5071    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5072    UCollator *clonedCollation;
5073    int32_t retrievedReorderCodesLength;
5074    int32_t retrievedReorderCodes[10];
5075    int loopIndex;
5076
5077    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5078
5079    /* build collator tertiary */
5080    myCollation = ucol_open("", &status);
5081    ucol_setStrength(myCollation, UCOL_TERTIARY);
5082    if(U_FAILURE(status)) {
5083        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5084        return;
5085    }
5086
5087    /* set the reorderding */
5088    ucol_setReorderCodes(myCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5089    if (U_FAILURE(status)) {
5090        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5091        return;
5092    }
5093
5094    /* clone the collator */
5095    clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
5096    if (U_FAILURE(status)) {
5097        log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
5098        return;
5099    }
5100
5101    /* get the reordering */
5102    retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, UPRV_LENGTHOF(retrievedReorderCodes), &status);
5103    if (U_FAILURE(status)) {
5104        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
5105        return;
5106    }
5107    if (retrievedReorderCodesLength != UPRV_LENGTHOF(reorderCodes)) {
5108        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, UPRV_LENGTHOF(reorderCodes));
5109        return;
5110    }
5111    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
5112        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
5113            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
5114            return;
5115        }
5116    }
5117
5118    /*uprv_free(buffer);*/
5119    ucol_close(myCollation);
5120    ucol_close(clonedCollation);
5121}
5122
5123/*
5124 * Utility function to test one collation reordering test case set.
5125 * @param testcases Array of test cases.
5126 * @param n_testcases Size of the array testcases.
5127 * @param reorderTokens Array of reordering codes.
5128 * @param reorderTokensLen Size of the array reorderTokens.
5129 */
5130static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
5131{
5132    uint32_t testCaseNum;
5133    UErrorCode status = U_ZERO_ERROR;
5134    UCollator  *myCollation;
5135
5136    myCollation = ucol_open("", &status);
5137    if (U_FAILURE(status)) {
5138        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5139        return;
5140    }
5141    ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
5142    if(U_FAILURE(status)) {
5143        log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5144        return;
5145    }
5146
5147    for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
5148        doTest(myCollation,
5149            testCases[testCaseNum].source,
5150            testCases[testCaseNum].target,
5151            testCases[testCaseNum].result
5152        );
5153    }
5154    ucol_close(myCollation);
5155}
5156
5157static void TestGreekFirstReorder(void)
5158{
5159    const char* strRules[] = {
5160        "[reorder Grek]"
5161    };
5162
5163    const int32_t apiRules[] = {
5164        USCRIPT_GREEK
5165    };
5166
5167    const static OneTestCase privateUseCharacterStrings[] = {
5168        { {0x0391}, {0x0391}, UCOL_EQUAL },
5169        { {0x0041}, {0x0391}, UCOL_GREATER },
5170        { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
5171        { {0x0060}, {0x0391}, UCOL_LESS },
5172        { {0x0391}, {0xe2dc}, UCOL_LESS },
5173        { {0x0391}, {0x0060}, UCOL_GREATER },
5174    };
5175
5176    /* Test rules creation */
5177    doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5178
5179    /* Test collation reordering API */
5180    doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5181}
5182
5183static void TestGreekLastReorder(void)
5184{
5185    const char* strRules[] = {
5186        "[reorder Zzzz Grek]"
5187    };
5188
5189    const int32_t apiRules[] = {
5190        USCRIPT_UNKNOWN, USCRIPT_GREEK
5191    };
5192
5193    const static OneTestCase privateUseCharacterStrings[] = {
5194        { {0x0391}, {0x0391}, UCOL_EQUAL },
5195        { {0x0041}, {0x0391}, UCOL_LESS },
5196        { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
5197        { {0x0060}, {0x0391}, UCOL_LESS },
5198        { {0x0391}, {0xe2dc}, UCOL_GREATER },
5199    };
5200
5201    /* Test rules creation */
5202    doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5203
5204    /* Test collation reordering API */
5205    doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5206}
5207
5208static void TestNonScriptReorder(void)
5209{
5210    const char* strRules[] = {
5211        "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
5212    };
5213
5214    const int32_t apiRules[] = {
5215        USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
5216        UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
5217        UCOL_REORDER_CODE_CURRENCY
5218    };
5219
5220    const static OneTestCase privateUseCharacterStrings[] = {
5221        { {0x0391}, {0x0041}, UCOL_LESS },
5222        { {0x0041}, {0x0391}, UCOL_GREATER },
5223        { {0x0060}, {0x0041}, UCOL_LESS },
5224        { {0x0060}, {0x0391}, UCOL_GREATER },
5225        { {0x0024}, {0x0041}, UCOL_GREATER },
5226    };
5227
5228    /* Test rules creation */
5229    doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5230
5231    /* Test collation reordering API */
5232    doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5233}
5234
5235static void TestHaniReorder(void)
5236{
5237    const char* strRules[] = {
5238        "[reorder Hani]"
5239    };
5240    const int32_t apiRules[] = {
5241        USCRIPT_HAN
5242    };
5243
5244    const static OneTestCase privateUseCharacterStrings[] = {
5245        { {0x4e00}, {0x0041}, UCOL_LESS },
5246        { {0x4e00}, {0x0060}, UCOL_GREATER },
5247        { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5248        { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5249        { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5250        { {0xfa27}, {0x0041}, UCOL_LESS },
5251        { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5252    };
5253
5254    /* Test rules creation */
5255    doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5256
5257    /* Test collation reordering API */
5258    doTestOneReorderingAPITestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), apiRules, UPRV_LENGTHOF(apiRules));
5259}
5260
5261static void TestHaniReorderWithOtherRules(void)
5262{
5263    const char* strRules[] = {
5264        "[reorder Hani] &b<a"
5265    };
5266    /*const int32_t apiRules[] = {
5267        USCRIPT_HAN
5268    };*/
5269
5270    const static OneTestCase privateUseCharacterStrings[] = {
5271        { {0x4e00}, {0x0041}, UCOL_LESS },
5272        { {0x4e00}, {0x0060}, UCOL_GREATER },
5273        { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5274        { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5275        { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5276        { {0xfa27}, {0x0041}, UCOL_LESS },
5277        { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5278        { {0x0062}, {0x0061}, UCOL_LESS },
5279    };
5280
5281    /* Test rules creation */
5282    doTestOneTestCase(privateUseCharacterStrings, UPRV_LENGTHOF(privateUseCharacterStrings), strRules, UPRV_LENGTHOF(strRules));
5283}
5284
5285static void TestMultipleReorder(void)
5286{
5287    const char* strRules[] = {
5288        "[reorder Grek Zzzz DIGIT Latn Hani]"
5289    };
5290
5291    const int32_t apiRules[] = {
5292        USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
5293    };
5294
5295    const static OneTestCase collationTestCases[] = {
5296        { {0x0391}, {0x0041}, UCOL_LESS},
5297        { {0x0031}, {0x0041}, UCOL_LESS},
5298        { {0x0041}, {0x4e00}, UCOL_LESS},
5299    };
5300
5301    /* Test rules creation */
5302    doTestOneTestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), strRules, UPRV_LENGTHOF(strRules));
5303
5304    /* Test collation reordering API */
5305    doTestOneReorderingAPITestCase(collationTestCases, UPRV_LENGTHOF(collationTestCases), apiRules, UPRV_LENGTHOF(apiRules));
5306}
5307
5308/*
5309 * Test that covers issue reported in ticket 8814
5310 */
5311static void TestReorderWithNumericCollation(void)
5312{
5313    UErrorCode status = U_ZERO_ERROR;
5314    UCollator  *myCollation;
5315    UCollator  *myReorderCollation;
5316    int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
5317    /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
5318    UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
5319    UChar fortyS[] = { 0x0053 };
5320    UChar fortyThreeP[] = { 0x0050 };
5321    uint8_t fortyS_sortKey[128];
5322    int32_t fortyS_sortKey_Length;
5323    uint8_t fortyThreeP_sortKey[128];
5324    int32_t fortyThreeP_sortKey_Length;
5325    uint8_t fortyS_sortKey_reorder[128];
5326    int32_t fortyS_sortKey_reorder_Length;
5327    uint8_t fortyThreeP_sortKey_reorder[128];
5328    int32_t fortyThreeP_sortKey_reorder_Length;
5329    UCollationResult collResult;
5330    UCollationResult collResultReorder;
5331
5332    log_verbose("Testing reordering with and without numeric collation\n");
5333
5334    /* build collator tertiary with numeric */
5335    myCollation = ucol_open("", &status);
5336    /*
5337    ucol_setStrength(myCollation, UCOL_TERTIARY);
5338    */
5339    ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5340    if(U_FAILURE(status)) {
5341        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5342        return;
5343    }
5344
5345    /* build collator tertiary with numeric and reordering */
5346    myReorderCollation = ucol_open("", &status);
5347    /*
5348    ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
5349    */
5350    ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5351    ucol_setReorderCodes(myReorderCollation, reorderCodes, UPRV_LENGTHOF(reorderCodes), &status);
5352    if(U_FAILURE(status)) {
5353        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5354        return;
5355    }
5356
5357    fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey, 128);
5358    fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey, 128);
5359    fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyS_sortKey_reorder, 128);
5360    fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, UPRV_LENGTHOF(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
5361
5362    if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
5363        log_err_status(status, "ERROR: couldn't generate sort keys\n");
5364        return;
5365    }
5366    collResult = ucol_strcoll(myCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5367    collResultReorder = ucol_strcoll(myReorderCollation, fortyS, UPRV_LENGTHOF(fortyS), fortyThreeP, UPRV_LENGTHOF(fortyThreeP));
5368    /*
5369    fprintf(stderr, "\tcollResult = %x\n", collResult);
5370    fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
5371    fprintf(stderr, "\nfortyS\n");
5372    for (i = 0; i < fortyS_sortKey_Length; i++) {
5373        fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
5374    }
5375    fprintf(stderr, "\nfortyThreeP\n");
5376    for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
5377        fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
5378    }
5379    */
5380    if (collResult != collResultReorder) {
5381        log_err_status(status, "ERROR: collation results should have been the same.\n");
5382        return;
5383    }
5384
5385    ucol_close(myCollation);
5386    ucol_close(myReorderCollation);
5387}
5388
5389static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
5390{
5391  for (; *a == *b; ++a, ++b) {
5392    if (*a == 0) {
5393      return 0;
5394    }
5395  }
5396  return (*a < *b ? -1 : 1);
5397}
5398
5399static void TestImportRulesDeWithPhonebook(void)
5400{
5401  const char* normalRules[] = {
5402    "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
5403    "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
5404    "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
5405  };
5406  const OneTestCase normalTests[] = {
5407    { {0x00e6}, {0x00c6}, UCOL_LESS},
5408    { {0x00fc}, {0x00dc}, UCOL_GREATER},
5409  };
5410
5411  const char* importRules[] = {
5412    "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
5413    "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5414    "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5415  };
5416  const OneTestCase importTests[] = {
5417    { {0x00e6}, {0x00c6}, UCOL_LESS},
5418    { {0x00fc}, {0x00dc}, UCOL_LESS},
5419  };
5420
5421  doTestOneTestCase(normalTests, UPRV_LENGTHOF(normalTests), normalRules, UPRV_LENGTHOF(normalRules));
5422  doTestOneTestCase(importTests, UPRV_LENGTHOF(importTests), importRules, UPRV_LENGTHOF(importRules));
5423}
5424
5425#if 0
5426static void TestImportRulesFiWithEor(void)
5427{
5428  /* DUCET. */
5429  const char* defaultRules[] = {
5430    "&a<b",                                    /* Dummy rule. */
5431  };
5432
5433  const OneTestCase defaultTests[] = {
5434    { {0x0110}, {0x00F0}, UCOL_LESS},
5435    { {0x00a3}, {0x00a5}, UCOL_LESS},
5436    { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5437  };
5438
5439  /* European Ordering rules: ignore currency characters. */
5440  const char* eorRules[] = {
5441    "[import root-u-co-eor]",
5442  };
5443
5444  const OneTestCase eorTests[] = {
5445    { {0x0110}, {0x00F0}, UCOL_LESS},
5446    { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5447    { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5448  };
5449
5450  const char* fiStdRules[] = {
5451    "[import fi-u-co-standard]",
5452  };
5453
5454  const OneTestCase fiStdTests[] = {
5455    { {0x0110}, {0x00F0}, UCOL_GREATER},
5456    { {0x00a3}, {0x00a5}, UCOL_LESS},
5457    { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5458  };
5459
5460  /* Both European Ordering Rules and Fi Standard Rules. */
5461  const char* eorFiStdRules[] = {
5462    "[import root-u-co-eor][import fi-u-co-standard]",
5463  };
5464
5465  /* This is essentially same as the one before once fi.txt is updated with import. */
5466  const char* fiEorRules[] = {
5467    "[import fi-u-co-eor]",
5468  };
5469
5470  const OneTestCase fiEorTests[] = {
5471    { {0x0110}, {0x00F0}, UCOL_GREATER},
5472    { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5473    { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5474  };
5475
5476  doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5477  doTestOneTestCase(eorTests, UPRV_LENGTHOF(eorTests), eorRules, UPRV_LENGTHOF(eorRules));
5478  doTestOneTestCase(fiStdTests, UPRV_LENGTHOF(fiStdTests), fiStdRules, UPRV_LENGTHOF(fiStdRules));
5479  doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), eorFiStdRules, UPRV_LENGTHOF(eorFiStdRules));
5480
5481  log_knownIssue("8962", NULL);
5482  /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
5483        eor{
5484            Sequence{
5485                "[import root-u-co-eor][import fi-u-co-standard]"
5486            }
5487            Version{"21.0"}
5488        }
5489  */
5490  /* doTestOneTestCase(fiEorTests, UPRV_LENGTHOF(fiEorTests), fiEorRules, UPRV_LENGTHOF(fiEorRules)); */
5491
5492}
5493#endif
5494
5495#if 0
5496/*
5497 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
5498 * the resource files are built with -includeUnihanColl option.
5499 * TODO: Uncomment this function and make it work when unihan rules are built by default.
5500 */
5501static void TestImportRulesCJKWithUnihan(void)
5502{
5503  /* DUCET. */
5504  const char* defaultRules[] = {
5505    "&a<b",                                    /* Dummy rule. */
5506  };
5507
5508  const OneTestCase defaultTests[] = {
5509    { {0x3402}, {0x4e1e}, UCOL_GREATER},
5510  };
5511
5512  /* European Ordering rules: ignore currency characters. */
5513  const char* unihanRules[] = {
5514    "[import ko-u-co-unihan]",
5515  };
5516
5517  const OneTestCase unihanTests[] = {
5518    { {0x3402}, {0x4e1e}, UCOL_LESS},
5519  };
5520
5521  doTestOneTestCase(defaultTests, UPRV_LENGTHOF(defaultTests), defaultRules, UPRV_LENGTHOF(defaultRules));
5522  doTestOneTestCase(unihanTests, UPRV_LENGTHOF(unihanTests), unihanRules, UPRV_LENGTHOF(unihanRules));
5523
5524}
5525#endif
5526
5527static void TestImport(void)
5528{
5529    UCollator* vicoll;
5530    UCollator* escoll;
5531    UCollator* viescoll;
5532    UCollator* importviescoll;
5533    UParseError error;
5534    UErrorCode status = U_ZERO_ERROR;
5535    UChar* virules;
5536    int32_t viruleslength;
5537    UChar* esrules;
5538    int32_t esruleslength;
5539    UChar* viesrules;
5540    int32_t viesruleslength;
5541    char srules[500] = "[import vi][import es]";
5542    UChar rules[500];
5543    uint32_t length = 0;
5544    int32_t itemCount;
5545    int32_t i, k;
5546    UChar32 start;
5547    UChar32 end;
5548    UChar str[500];
5549    int32_t strLength;
5550
5551    uint8_t sk1[500];
5552    uint8_t sk2[500];
5553
5554    UBool b;
5555    USet* tailoredSet;
5556    USet* importTailoredSet;
5557
5558
5559    vicoll = ucol_open("vi", &status);
5560    if(U_FAILURE(status)){
5561        log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
5562        return;
5563    }
5564
5565    virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
5566    if(viruleslength == 0) {
5567        log_data_err("missing vi tailoring rule string\n");
5568        ucol_close(vicoll);
5569        return;
5570    }
5571    escoll = ucol_open("es", &status);
5572    esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
5573    viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
5574    viesrules[0] = 0;
5575    u_strcat(viesrules, virules);
5576    u_strcat(viesrules, esrules);
5577    viesruleslength = viruleslength + esruleslength;
5578    viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5579
5580    /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5581    length = u_unescape(srules, rules, 500);
5582    importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5583    if(U_FAILURE(status)){
5584        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5585        return;
5586    }
5587
5588    tailoredSet = ucol_getTailoredSet(viescoll, &status);
5589    importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
5590
5591    if(!uset_equals(tailoredSet, importTailoredSet)){
5592        log_err("Tailored sets not equal");
5593    }
5594
5595    uset_close(importTailoredSet);
5596
5597    itemCount = uset_getItemCount(tailoredSet);
5598
5599    for( i = 0; i < itemCount; i++){
5600        strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5601        if(strLength < 2){
5602            for (; start <= end; start++){
5603                k = 0;
5604                U16_APPEND(str, k, 500, start, b);
5605                (void)b;    /* Suppress set but not used warning. */
5606                ucol_getSortKey(viescoll, str, 1, sk1, 500);
5607                ucol_getSortKey(importviescoll, str, 1, sk2, 500);
5608                if(compare_uint8_t_arrays(sk1, sk2) != 0){
5609                    log_err("Sort key for %s not equal\n", str);
5610                    break;
5611                }
5612            }
5613        }else{
5614            ucol_getSortKey(viescoll, str, strLength, sk1, 500);
5615            ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
5616            if(compare_uint8_t_arrays(sk1, sk2) != 0){
5617                log_err("ZZSort key for %s not equal\n", str);
5618                break;
5619            }
5620
5621        }
5622    }
5623
5624    uset_close(tailoredSet);
5625
5626    uprv_free(viesrules);
5627
5628    ucol_close(vicoll);
5629    ucol_close(escoll);
5630    ucol_close(viescoll);
5631    ucol_close(importviescoll);
5632}
5633
5634static void TestImportWithType(void)
5635{
5636    UCollator* vicoll;
5637    UCollator* decoll;
5638    UCollator* videcoll;
5639    UCollator* importvidecoll;
5640    UParseError error;
5641    UErrorCode status = U_ZERO_ERROR;
5642    const UChar* virules;
5643    int32_t viruleslength;
5644    const UChar* derules;
5645    int32_t deruleslength;
5646    UChar* viderules;
5647    int32_t videruleslength;
5648    const char srules[500] = "[import vi][import de-u-co-phonebk]";
5649    UChar rules[500];
5650    uint32_t length = 0;
5651    int32_t itemCount;
5652    int32_t i, k;
5653    UChar32 start;
5654    UChar32 end;
5655    UChar str[500];
5656    int32_t strLength;
5657
5658    uint8_t sk1[500];
5659    uint8_t sk2[500];
5660
5661    USet* tailoredSet;
5662    USet* importTailoredSet;
5663
5664    vicoll = ucol_open("vi", &status);
5665    if(U_FAILURE(status)){
5666        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5667        return;
5668    }
5669    virules = ucol_getRules(vicoll, &viruleslength);
5670    if(viruleslength == 0) {
5671        log_data_err("missing vi tailoring rule string\n");
5672        ucol_close(vicoll);
5673        return;
5674    }
5675    /* decoll = ucol_open("de@collation=phonebook", &status); */
5676    decoll = ucol_open("de-u-co-phonebk", &status);
5677    if(U_FAILURE(status)){
5678        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5679        return;
5680    }
5681
5682
5683    derules = ucol_getRules(decoll, &deruleslength);
5684    viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
5685    viderules[0] = 0;
5686    u_strcat(viderules, virules);
5687    u_strcat(viderules, derules);
5688    videruleslength = viruleslength + deruleslength;
5689    videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5690
5691    /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5692    length = u_unescape(srules, rules, 500);
5693    importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5694    if(U_FAILURE(status)){
5695        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5696        return;
5697    }
5698
5699    tailoredSet = ucol_getTailoredSet(videcoll, &status);
5700    importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
5701
5702    if(!uset_equals(tailoredSet, importTailoredSet)){
5703        log_err("Tailored sets not equal");
5704    }
5705
5706    uset_close(importTailoredSet);
5707
5708    itemCount = uset_getItemCount(tailoredSet);
5709
5710    for( i = 0; i < itemCount; i++){
5711        strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5712        if(strLength < 2){
5713            for (; start <= end; start++){
5714                k = 0;
5715                U16_APPEND_UNSAFE(str, k, start);
5716                ucol_getSortKey(videcoll, str, 1, sk1, 500);
5717                ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
5718                if(compare_uint8_t_arrays(sk1, sk2) != 0){
5719                    log_err("Sort key for %s not equal\n", str);
5720                    break;
5721                }
5722            }
5723        }else{
5724            ucol_getSortKey(videcoll, str, strLength, sk1, 500);
5725            ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
5726            if(compare_uint8_t_arrays(sk1, sk2) != 0){
5727                log_err("Sort key for %s not equal\n", str);
5728                break;
5729            }
5730
5731        }
5732    }
5733
5734    uset_close(tailoredSet);
5735
5736    uprv_free(viderules);
5737
5738    ucol_close(videcoll);
5739    ucol_close(importvidecoll);
5740    ucol_close(vicoll);
5741    ucol_close(decoll);
5742}
5743
5744/* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
5745static const UChar longUpperStr1[]= { /* 155 chars */
5746    0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
5747    0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
5748    0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
5749    0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
5750    0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
5751    0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
5752    0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
5753    0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
5754    0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
5755    0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
5756};
5757
5758/* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
5759static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
5760    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5761    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5762    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5763    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5764    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
5765};
5766
5767/* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
5768static const UChar longUpperStr3[]= { /* 324 chars */
5769    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5770    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5771    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5772    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5773    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5774    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5775    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5776    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5777    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5778    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5779    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5780    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
5781};
5782
5783typedef struct {
5784    const UChar * longUpperStrPtr;
5785    int32_t       longUpperStrLen;
5786} LongUpperStrItem;
5787
5788/* String pointers must be in reverse collation order of the corresponding strings */
5789static const LongUpperStrItem longUpperStrItems[] = {
5790    { longUpperStr1, UPRV_LENGTHOF(longUpperStr1) },
5791    { longUpperStr2, UPRV_LENGTHOF(longUpperStr2) },
5792    { longUpperStr3, UPRV_LENGTHOF(longUpperStr3) },
5793    { NULL,          0                           }
5794};
5795
5796enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
5797
5798/* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
5799static void TestCaseLevelBufferOverflow(void)
5800{
5801    UErrorCode status = U_ZERO_ERROR;
5802    UCollator * ucol = ucol_open("root", &status);
5803    if ( U_SUCCESS(status) ) {
5804        ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
5805        if ( U_SUCCESS(status) ) {
5806            const LongUpperStrItem * itemPtr;
5807            uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
5808            for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
5809                int32_t sortKeyLen;
5810                if (itemPtr > longUpperStrItems) {
5811                    uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
5812                }
5813                sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
5814                if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
5815                    log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
5816                    break;
5817                }
5818                if ( itemPtr > longUpperStrItems ) {
5819                    int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
5820                    if (compareResult >= 0) {
5821                        log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
5822                    }
5823                }
5824            }
5825        } else {
5826            log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
5827        }
5828        ucol_close(ucol);
5829    } else {
5830        log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
5831    }
5832}
5833
5834/* Test for #10595 */
5835static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5836#define KEY_PART_SIZE 16
5837
5838static void TestNextSortKeyPartJaIdentical(void)
5839{
5840    UErrorCode status = U_ZERO_ERROR;
5841    UCollator *coll;
5842    uint8_t keyPart[KEY_PART_SIZE];
5843    UCharIterator iter;
5844    uint32_t state[2] = {0, 0};
5845    int32_t keyPartLen;
5846
5847    coll = ucol_open("ja", &status);
5848    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
5849    if (U_FAILURE(status)) {
5850        log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
5851        return;
5852    }
5853
5854    uiter_setString(&iter, testJapaneseName, 5);
5855    keyPartLen = KEY_PART_SIZE;
5856    while (keyPartLen == KEY_PART_SIZE) {
5857        keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
5858        if (U_FAILURE(status)) {
5859            log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
5860            break;
5861        }
5862    }
5863
5864    ucol_close(coll);
5865}
5866
5867#define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5868
5869void addMiscCollTest(TestNode** root)
5870{
5871    TEST(TestRuleOptions);
5872    TEST(TestBeforePrefixFailure);
5873    TEST(TestContractionClosure);
5874    TEST(TestPrefixCompose);
5875    TEST(TestStrCollIdenticalPrefix);
5876    TEST(TestPrefix);
5877    TEST(TestNewJapanese);
5878    /*TEST(TestLimitations);*/
5879    TEST(TestNonChars);
5880    TEST(TestExtremeCompression);
5881    TEST(TestSurrogates);
5882    TEST(TestVariableTopSetting);
5883    TEST(TestMaxVariable);
5884    TEST(TestBocsuCoverage);
5885    TEST(TestCyrillicTailoring);
5886    TEST(TestCase);
5887    TEST(IncompleteCntTest);
5888    TEST(BlackBirdTest);
5889    TEST(FunkyATest);
5890    TEST(BillFairmanTest);
5891    TEST(TestChMove);
5892    TEST(TestImplicitTailoring);
5893    TEST(TestFCDProblem);
5894    TEST(TestEmptyRule);
5895    /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5896    TEST(TestJ815);
5897    /*TEST(TestJ831);*/ /* we changed lv locale */
5898    TEST(TestBefore);
5899    TEST(TestHangulTailoring);
5900    TEST(TestUCARules);
5901    TEST(TestIncrementalNormalize);
5902    TEST(TestComposeDecompose);
5903    TEST(TestCompressOverlap);
5904    TEST(TestContraction);
5905    TEST(TestExpansion);
5906    /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5907    /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5908    TEST(TestOptimize);
5909    TEST(TestSuppressContractions);
5910    TEST(Alexis2);
5911    TEST(TestHebrewUCA);
5912    TEST(TestPartialSortKeyTermination);
5913    TEST(TestSettings);
5914    TEST(TestEquals);
5915    TEST(TestJ2726);
5916    TEST(NullRule);
5917    TEST(TestNumericCollation);
5918    TEST(TestTibetanConformance);
5919    TEST(TestPinyinProblem);
5920    TEST(TestSeparateTrees);
5921    TEST(TestBeforePinyin);
5922    TEST(TestBeforeTightening);
5923    /*TEST(TestMoreBefore);*/
5924    TEST(TestTailorNULL);
5925    TEST(TestUpperFirstQuaternary);
5926    TEST(TestJ4960);
5927    TEST(TestJ5223);
5928    TEST(TestJ5232);
5929    TEST(TestJ5367);
5930    TEST(TestHiragana);
5931    TEST(TestSortKeyConsistency);
5932    TEST(TestVI5913);  /* VI, RO tailored rules */
5933    TEST(TestCroatianSortKey);
5934    TEST(TestTailor6179);
5935    TEST(TestUCAPrecontext);
5936    TEST(TestOutOfBuffer5468);
5937    TEST(TestSameStrengthList);
5938
5939    TEST(TestSameStrengthListQuoted);
5940    TEST(TestSameStrengthListSupplemental);
5941    TEST(TestSameStrengthListQwerty);
5942    TEST(TestSameStrengthListQuotedQwerty);
5943    TEST(TestSameStrengthListRanges);
5944    TEST(TestSameStrengthListSupplementalRanges);
5945    TEST(TestSpecialCharacters);
5946    TEST(TestPrivateUseCharacters);
5947    TEST(TestPrivateUseCharactersInList);
5948    TEST(TestPrivateUseCharactersInRange);
5949    TEST(TestInvalidListsAndRanges);
5950    TEST(TestImportRulesDeWithPhonebook);
5951    /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
5952    /* TEST(TestImportRulesCJKWithUnihan); */
5953    TEST(TestImport);
5954    TEST(TestImportWithType);
5955
5956    TEST(TestBeforeRuleWithScriptReordering);
5957    TEST(TestNonLeadBytesDuringCollationReordering);
5958    TEST(TestReorderingAPI);
5959    TEST(TestReorderingAPIWithRuleCreatedCollator);
5960    TEST(TestEquivalentReorderingScripts);
5961    TEST(TestGreekFirstReorder);
5962    TEST(TestGreekLastReorder);
5963    TEST(TestNonScriptReorder);
5964    TEST(TestHaniReorder);
5965    TEST(TestHaniReorderWithOtherRules);
5966    TEST(TestMultipleReorder);
5967    TEST(TestReorderingAcrossCloning);
5968    TEST(TestReorderWithNumericCollation);
5969
5970    TEST(TestCaseLevelBufferOverflow);
5971    TEST(TestNextSortKeyPartJaIdentical);
5972}
5973
5974#endif /* #if !UCONFIG_NO_COLLATION */
5975