1
2/********************************************************************
3 * COPYRIGHT:
4 * Copyright (c) 2001-2014, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************/
7/*******************************************************************************
8*
9* File cmsccoll.C
10*
11*******************************************************************************/
12/**
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where
14 * to fit.
15 */
16
17#include <stdio.h>
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_COLLATION
22
23#include "unicode/ucol.h"
24#include "unicode/ucoleitr.h"
25#include "unicode/uloc.h"
26#include "cintltst.h"
27#include "ccolltst.h"
28#include "callcoll.h"
29#include "unicode/ustring.h"
30#include "string.h"
31#include "ucol_imp.h"
32#include "cmemory.h"
33#include "cstring.h"
34#include "uassert.h"
35#include "unicode/parseerr.h"
36#include "unicode/ucnv.h"
37#include "unicode/ures.h"
38#include "unicode/uscript.h"
39#include "unicode/utf16.h"
40#include "uparse.h"
41#include "putilimp.h"
42
43
44#define LEN(a) (sizeof(a)/sizeof(a[0]))
45
46#define MAX_TOKEN_LEN 16
47
48typedef UCollationResult tst_strcoll(void *collator, const int object,
49                        const UChar *source, const int sLen,
50                        const UChar *target, const int tLen);
51
52
53
54const static char cnt1[][10] = {
55
56  "AA",
57  "AC",
58  "AZ",
59  "AQ",
60  "AB",
61  "ABZ",
62  "ABQ",
63  "Z",
64  "ABC",
65  "Q",
66  "B"
67};
68
69const static char cnt2[][10] = {
70  "DA",
71  "DAD",
72  "DAZ",
73  "MAR",
74  "Z",
75  "DAVIS",
76  "MARK",
77  "DAV",
78  "DAVI"
79};
80
81static void IncompleteCntTest(void)
82{
83  UErrorCode status = U_ZERO_ERROR;
84  UChar temp[90];
85  UChar t1[90];
86  UChar t2[90];
87
88  UCollator *coll =  NULL;
89  uint32_t i = 0, j = 0;
90  uint32_t size = 0;
91
92  u_uastrcpy(temp, " & Z < ABC < Q < B");
93
94  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
95
96  if(U_SUCCESS(status)) {
97    size = sizeof(cnt1)/sizeof(cnt1[0]);
98    for(i = 0; i < size-1; i++) {
99      for(j = i+1; j < size; j++) {
100        UCollationElements *iter;
101        u_uastrcpy(t1, cnt1[i]);
102        u_uastrcpy(t2, cnt1[j]);
103        doTest(coll, t1, t2, UCOL_LESS);
104        /* synwee : added collation element iterator test */
105        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
106        if (U_FAILURE(status)) {
107          log_err("Creation of iterator failed\n");
108          break;
109        }
110        backAndForth(iter);
111        ucol_closeElements(iter);
112      }
113    }
114  }
115
116  ucol_close(coll);
117
118
119  u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
120  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
121
122  if(U_SUCCESS(status)) {
123    size = sizeof(cnt2)/sizeof(cnt2[0]);
124    for(i = 0; i < size-1; i++) {
125      for(j = i+1; j < size; j++) {
126        UCollationElements *iter;
127        u_uastrcpy(t1, cnt2[i]);
128        u_uastrcpy(t2, cnt2[j]);
129        doTest(coll, t1, t2, UCOL_LESS);
130
131        /* synwee : added collation element iterator test */
132        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
133        if (U_FAILURE(status)) {
134          log_err("Creation of iterator failed\n");
135          break;
136        }
137        backAndForth(iter);
138        ucol_closeElements(iter);
139      }
140    }
141  }
142
143  ucol_close(coll);
144
145
146}
147
148const static char shifted[][20] = {
149  "black bird",
150  "black-bird",
151  "blackbird",
152  "black Bird",
153  "black-Bird",
154  "blackBird",
155  "black birds",
156  "black-birds",
157  "blackbirds"
158};
159
160const static UCollationResult shiftedTert[] = {
161  UCOL_EQUAL,
162  UCOL_EQUAL,
163  UCOL_EQUAL,
164  UCOL_LESS,
165  UCOL_EQUAL,
166  UCOL_EQUAL,
167  UCOL_LESS,
168  UCOL_EQUAL,
169  UCOL_EQUAL
170};
171
172const static char nonignorable[][20] = {
173  "black bird",
174  "black Bird",
175  "black birds",
176  "black-bird",
177  "black-Bird",
178  "black-birds",
179  "blackbird",
180  "blackBird",
181  "blackbirds"
182};
183
184static void BlackBirdTest(void) {
185  UErrorCode status = U_ZERO_ERROR;
186  UChar t1[90];
187  UChar t2[90];
188
189  uint32_t i = 0, j = 0;
190  uint32_t size = 0;
191  UCollator *coll = ucol_open("en_US", &status);
192
193  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
194  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
195
196  if(U_SUCCESS(status)) {
197    size = sizeof(nonignorable)/sizeof(nonignorable[0]);
198    for(i = 0; i < size-1; i++) {
199      for(j = i+1; j < size; j++) {
200        u_uastrcpy(t1, nonignorable[i]);
201        u_uastrcpy(t2, nonignorable[j]);
202        doTest(coll, t1, t2, UCOL_LESS);
203      }
204    }
205  }
206
207  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
208  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
209
210  if(U_SUCCESS(status)) {
211    size = sizeof(shifted)/sizeof(shifted[0]);
212    for(i = 0; i < size-1; i++) {
213      for(j = i+1; j < size; j++) {
214        u_uastrcpy(t1, shifted[i]);
215        u_uastrcpy(t2, shifted[j]);
216        doTest(coll, t1, t2, UCOL_LESS);
217      }
218    }
219  }
220
221  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
222  if(U_SUCCESS(status)) {
223    size = sizeof(shifted)/sizeof(shifted[0]);
224    for(i = 1; i < size; i++) {
225      u_uastrcpy(t1, shifted[i-1]);
226      u_uastrcpy(t2, shifted[i]);
227      doTest(coll, t1, t2, shiftedTert[i]);
228    }
229  }
230
231  ucol_close(coll);
232}
233
234const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
235    {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
236    {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
237    {0x0041/*'A'*/, 0x0300, 0x0000},
238    {0x00C0, 0x0301, 0x0000},
239    /* this would work with forced normalization */
240    {0x00C0, 0x0316, 0x0000}
241};
242
243const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
244    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
245    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
246    {0x00C0, 0},
247    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
248    /* this would work with forced normalization */
249    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
250};
251
252const static UCollationResult results[] = {
253    UCOL_GREATER,
254    UCOL_EQUAL,
255    UCOL_EQUAL,
256    UCOL_GREATER,
257    UCOL_EQUAL
258};
259
260static void FunkyATest(void)
261{
262
263    int32_t i;
264    UErrorCode status = U_ZERO_ERROR;
265    UCollator  *myCollation;
266    myCollation = ucol_open("en_US", &status);
267    if(U_FAILURE(status)){
268        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
269        return;
270    }
271    log_verbose("Testing some A letters, for some reason\n");
272    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
273    ucol_setStrength(myCollation, UCOL_TERTIARY);
274    for (i = 0; i < 4 ; i++)
275    {
276        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
277    }
278    ucol_close(myCollation);
279}
280
281UColAttributeValue caseFirst[] = {
282    UCOL_OFF,
283    UCOL_LOWER_FIRST,
284    UCOL_UPPER_FIRST
285};
286
287
288UColAttributeValue alternateHandling[] = {
289    UCOL_NON_IGNORABLE,
290    UCOL_SHIFTED
291};
292
293UColAttributeValue caseLevel[] = {
294    UCOL_OFF,
295    UCOL_ON
296};
297
298UColAttributeValue strengths[] = {
299    UCOL_PRIMARY,
300    UCOL_SECONDARY,
301    UCOL_TERTIARY,
302    UCOL_QUATERNARY,
303    UCOL_IDENTICAL
304};
305
306#if 0
307static const char * strengthsC[] = {
308    "UCOL_PRIMARY",
309    "UCOL_SECONDARY",
310    "UCOL_TERTIARY",
311    "UCOL_QUATERNARY",
312    "UCOL_IDENTICAL"
313};
314
315static const char * caseFirstC[] = {
316    "UCOL_OFF",
317    "UCOL_LOWER_FIRST",
318    "UCOL_UPPER_FIRST"
319};
320
321
322static const char * alternateHandlingC[] = {
323    "UCOL_NON_IGNORABLE",
324    "UCOL_SHIFTED"
325};
326
327static const char * caseLevelC[] = {
328    "UCOL_OFF",
329    "UCOL_ON"
330};
331
332/* not used currently - does not test only prints */
333static void PrintMarkDavis(void)
334{
335  UErrorCode status = U_ZERO_ERROR;
336  UChar m[256];
337  uint8_t sortkey[256];
338  UCollator *coll = ucol_open("en_US", &status);
339  uint32_t h,i,j,k, sortkeysize;
340  uint32_t sizem = 0;
341  char buffer[512];
342  uint32_t len = 512;
343
344  log_verbose("PrintMarkDavis");
345
346  u_uastrcpy(m, "Mark Davis");
347  sizem = u_strlen(m);
348
349
350  m[1] = 0xe4;
351
352  for(i = 0; i<sizem; i++) {
353    fprintf(stderr, "\\u%04X ", m[i]);
354  }
355  fprintf(stderr, "\n");
356
357  for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
358    ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
359    fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
360
361    for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
362      ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
363      fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
364
365      for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
366        ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
367        fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
368
369        for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
370          ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
371          sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
372          fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
373          fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
374        }
375
376      }
377
378    }
379
380  }
381}
382#endif
383
384static void BillFairmanTest(void) {
385/*
386** check for actual locale via ICU resource bundles
387**
388** lp points to the original locale ("fr_FR_....")
389*/
390
391    UResourceBundle *lr,*cr;
392    UErrorCode              lec = U_ZERO_ERROR;
393    const char *lp = "fr_FR_you_ll_never_find_this_locale";
394
395    log_verbose("BillFairmanTest\n");
396
397    lr = ures_open(NULL,lp,&lec);
398    if (lr) {
399        cr = ures_getByKey(lr,"collations",0,&lec);
400        if (cr) {
401            lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
402            if (lp) {
403                if (U_SUCCESS(lec)) {
404                    if(strcmp(lp, "fr") != 0) {
405                        log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
406                    }
407                }
408            }
409            ures_close(cr);
410        }
411        ures_close(lr);
412    }
413}
414
415const static char chTest[][20] = {
416  "c",
417  "C",
418  "ca", "cb", "cx", "cy", "CZ",
419  "c\\u030C", "C\\u030C",
420  "h",
421  "H",
422  "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
423  "ch", "cH", "Ch", "CH",
424  "cha", "charly", "che", "chh", "chch", "chr",
425  "i", "I", "iarly",
426  "r", "R",
427  "r\\u030C", "R\\u030C",
428  "s",
429  "S",
430  "s\\u030C", "S\\u030C",
431  "z", "Z",
432  "z\\u030C", "Z\\u030C"
433};
434
435static void TestChMove(void) {
436    UChar t1[256] = {0};
437    UChar t2[256] = {0};
438
439    uint32_t i = 0, j = 0;
440    uint32_t size = 0;
441    UErrorCode status = U_ZERO_ERROR;
442
443    UCollator *coll = ucol_open("cs", &status);
444
445    if(U_SUCCESS(status)) {
446        size = sizeof(chTest)/sizeof(chTest[0]);
447        for(i = 0; i < size-1; i++) {
448            for(j = i+1; j < size; j++) {
449                u_unescape(chTest[i], t1, 256);
450                u_unescape(chTest[j], t2, 256);
451                doTest(coll, t1, t2, UCOL_LESS);
452            }
453        }
454    }
455    else {
456        log_data_err("Can't open collator");
457    }
458    ucol_close(coll);
459}
460
461
462
463
464/*
465const static char impTest[][20] = {
466  "\\u4e00",
467    "a",
468    "A",
469    "b",
470    "B",
471    "\\u4e01"
472};
473*/
474
475
476static void TestImplicitTailoring(void) {
477  static const struct {
478    const char *rules;
479    const char *data[10];
480    const uint32_t len;
481  } tests[] = {
482      {
483        /* Tailor b and c before U+4E00. */
484        "&[before 1]\\u4e00 < b < c "
485        /* Now, before U+4E00 is c; put d and e after that. */
486        "&[before 1]\\u4e00 < d < e",
487        { "b", "c", "d", "e", "\\u4e00"}, 5 },
488      { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
489      { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
490      { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
491  };
492
493  int32_t i = 0;
494
495  for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
496      genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
497  }
498
499/*
500  UChar t1[256] = {0};
501  UChar t2[256] = {0};
502
503  const char *rule = "&\\u4e00 < a <<< A < b <<< B";
504
505  uint32_t i = 0, j = 0;
506  uint32_t size = 0;
507  uint32_t ruleLen = 0;
508  UErrorCode status = U_ZERO_ERROR;
509  UCollator *coll = NULL;
510  ruleLen = u_unescape(rule, t1, 256);
511
512  coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
513
514  if(U_SUCCESS(status)) {
515    size = sizeof(impTest)/sizeof(impTest[0]);
516    for(i = 0; i < size-1; i++) {
517      for(j = i+1; j < size; j++) {
518        u_unescape(impTest[i], t1, 256);
519        u_unescape(impTest[j], t2, 256);
520        doTest(coll, t1, t2, UCOL_LESS);
521      }
522    }
523  }
524  else {
525    log_err("Can't open collator");
526  }
527  ucol_close(coll);
528  */
529}
530
531static void TestFCDProblem(void) {
532  UChar t1[256] = {0};
533  UChar t2[256] = {0};
534
535  const char *s1 = "\\u0430\\u0306\\u0325";
536  const char *s2 = "\\u04D1\\u0325";
537
538  UErrorCode status = U_ZERO_ERROR;
539  UCollator *coll = ucol_open("", &status);
540  u_unescape(s1, t1, 256);
541  u_unescape(s2, t2, 256);
542
543  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
544  doTest(coll, t1, t2, UCOL_EQUAL);
545
546  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
547  doTest(coll, t1, t2, UCOL_EQUAL);
548
549  ucol_close(coll);
550}
551
552/*
553The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
554We're only using NFC/NFD in this test.
555*/
556#define NORM_BUFFER_TEST_LEN 18
557typedef struct {
558  UChar32 u;
559  UChar NFC[NORM_BUFFER_TEST_LEN];
560  UChar NFD[NORM_BUFFER_TEST_LEN];
561} tester;
562
563static void TestComposeDecompose(void) {
564    /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
565    static const UChar UNICODESET_STR[] = {
566        0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
567        0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
568        0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
569    };
570    int32_t noOfLoc;
571    int32_t i = 0, j = 0;
572
573    UErrorCode status = U_ZERO_ERROR;
574    const char *locName = NULL;
575    uint32_t nfcSize;
576    uint32_t nfdSize;
577    tester **t;
578    uint32_t noCases = 0;
579    UCollator *coll = NULL;
580    UChar32 u = 0;
581    UChar comp[NORM_BUFFER_TEST_LEN];
582    uint32_t len = 0;
583    UCollationElements *iter;
584    USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
585    int32_t charsToTestSize;
586
587    noOfLoc = uloc_countAvailable();
588
589    coll = ucol_open("", &status);
590    if (U_FAILURE(status)) {
591        log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
592        return;
593    }
594    charsToTestSize = uset_size(charsToTest);
595    if (charsToTestSize <= 0) {
596        log_err("Set was zero. Missing data?\n");
597        return;
598    }
599    t = (tester **)malloc(charsToTestSize * sizeof(tester *));
600    t[0] = (tester *)malloc(sizeof(tester));
601    log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
602
603    for(u = 0; u < charsToTestSize; u++) {
604        UChar32 ch = uset_charAt(charsToTest, u);
605        len = 0;
606        U16_APPEND_UNSAFE(comp, len, ch);
607        nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
608        nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
609
610        if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
611          || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
612            t[noCases]->u = ch;
613            if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
614                u_strncpy(t[noCases]->NFC, comp, len);
615                t[noCases]->NFC[len] = 0;
616            }
617            noCases++;
618            t[noCases] = (tester *)malloc(sizeof(tester));
619            uprv_memset(t[noCases], 0, sizeof(tester));
620        }
621    }
622    log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
623    uset_close(charsToTest);
624    charsToTest = NULL;
625
626    for(u=0; u<(UChar32)noCases; u++) {
627        if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
628            log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
629            doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
630        }
631    }
632    /*
633    for(u = 0; u < charsToTestSize; u++) {
634      if(!(u&0xFFFF)) {
635        log_verbose("%08X ", u);
636      }
637      uprv_memset(t[noCases], 0, sizeof(tester));
638      t[noCases]->u = u;
639      len = 0;
640      U16_APPEND_UNSAFE(comp, len, u);
641      comp[len] = 0;
642      nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
643      nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
644      doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
645      doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
646    }
647    */
648
649    ucol_close(coll);
650
651    log_verbose("Testing locales, number of cases = %i\n", noCases);
652    for(i = 0; i<noOfLoc; i++) {
653        status = U_ZERO_ERROR;
654        locName = uloc_getAvailable(i);
655        if(hasCollationElements(locName)) {
656            char cName[256];
657            UChar name[256];
658            int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
659
660            for(j = 0; j<nameSize; j++) {
661                cName[j] = (char)name[j];
662            }
663            cName[nameSize] = 0;
664            log_verbose("\nTesting locale %s (%s)\n", locName, cName);
665
666            coll = ucol_open(locName, &status);
667            ucol_setStrength(coll, UCOL_IDENTICAL);
668            iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
669
670            for(u=0; u<(UChar32)noCases; u++) {
671                if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
672                    log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
673                    doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
674                    log_verbose("Testing NFC\n");
675                    ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
676                    backAndForth(iter);
677                    log_verbose("Testing NFD\n");
678                    ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
679                    backAndForth(iter);
680                }
681            }
682            ucol_closeElements(iter);
683            ucol_close(coll);
684        }
685    }
686    for(u = 0; u <= (UChar32)noCases; u++) {
687        free(t[u]);
688    }
689    free(t);
690}
691
692static void TestEmptyRule(void) {
693  UErrorCode status = U_ZERO_ERROR;
694  UChar rulez[] = { 0 };
695  UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
696
697  ucol_close(coll);
698}
699
700static void TestUCARules(void) {
701  UErrorCode status = U_ZERO_ERROR;
702  UChar b[256];
703  UChar *rules = b;
704  uint32_t ruleLen = 0;
705  UCollator *UCAfromRules = NULL;
706  UCollator *coll = ucol_open("", &status);
707  if(status == U_FILE_ACCESS_ERROR) {
708    log_data_err("Is your data around?\n");
709    return;
710  } else if(U_FAILURE(status)) {
711    log_err("Error opening collator\n");
712    return;
713  }
714  ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
715
716  log_verbose("TestUCARules\n");
717  if(ruleLen > 256) {
718    rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
719    ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
720  }
721  log_verbose("Rules length is %d\n", ruleLen);
722  UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
723  if(U_SUCCESS(status)) {
724    ucol_close(UCAfromRules);
725  } else {
726    log_verbose("Unable to create a collator from UCARules!\n");
727  }
728/*
729  u_unescape(blah, b, 256);
730  ucol_getSortKey(coll, b, 1, res, 256);
731*/
732  ucol_close(coll);
733  if(rules != b) {
734    free(rules);
735  }
736}
737
738
739/* Pinyin tonal order */
740/*
741    A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
742          (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
743    E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
744    I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
745    O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
746    U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
747      < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
748.. (\u00fc)
749
750However, in testing we got the following order:
751    A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
752          (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
753    E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
754.. (\u0113)
755    I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
756    O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
757    U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
758.. (\u01d8)
759      < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
760*/
761
762static void TestBefore(void) {
763  const static char *data[] = {
764      "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
765      "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
766      "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
767      "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
768      "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
769      "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
770  };
771  genericRulesStarter(
772    "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
773    "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
774    "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
775    "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
776    "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
777    "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
778    data, sizeof(data)/sizeof(data[0]));
779}
780
781#if 0
782/* superceded by TestBeforePinyin */
783static void TestJ784(void) {
784  const static char *data[] = {
785      "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
786      "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
787      "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
788      "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
789      "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
790      "\\u00fc",
791           "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
792  };
793  genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
794}
795#endif
796
797#if 0
798/* superceded by the changes to the lv locale */
799static void TestJ831(void) {
800  const static char *data[] = {
801    "I",
802      "i",
803      "Y",
804      "y"
805  };
806  genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
807}
808#endif
809
810static void TestJ815(void) {
811  const static char *data[] = {
812    "aa",
813      "Aa",
814      "ab",
815      "Ab",
816      "ad",
817      "Ad",
818      "ae",
819      "Ae",
820      "\\u00e6",
821      "\\u00c6",
822      "af",
823      "Af",
824      "b",
825      "B"
826  };
827  genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
828  genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
829}
830
831
832static void TestCase(void)
833{
834    const static UChar gRules[MAX_TOKEN_LEN] =
835    /*" & 0 < 1,\u2461<a,A"*/
836    { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
837
838    const static UChar testCase[][MAX_TOKEN_LEN] =
839    {
840        /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
841        /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
842        /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
843        /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
844    };
845
846    const static UCollationResult caseTestResults[][9] =
847    {
848        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
849        { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
850        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
851        { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
852    };
853
854    const static UColAttributeValue caseTestAttributes[][2] =
855    {
856        { UCOL_LOWER_FIRST, UCOL_OFF},
857        { UCOL_UPPER_FIRST, UCOL_OFF},
858        { UCOL_LOWER_FIRST, UCOL_ON},
859        { UCOL_UPPER_FIRST, UCOL_ON}
860    };
861    int32_t i,j,k;
862    UErrorCode status = U_ZERO_ERROR;
863    UCollationElements *iter;
864    UCollator  *myCollation;
865    myCollation = ucol_open("en_US", &status);
866
867    if(U_FAILURE(status)){
868        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
869        return;
870    }
871    log_verbose("Testing different case settings\n");
872    ucol_setStrength(myCollation, UCOL_TERTIARY);
873
874    for(k = 0; k<4; k++) {
875      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
876      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
877      log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
878      for (i = 0; i < 3 ; i++) {
879        for(j = i+1; j<4; j++) {
880          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
881        }
882      }
883    }
884    ucol_close(myCollation);
885
886    myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
887    if(U_FAILURE(status)){
888        log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
889        return;
890    }
891    log_verbose("Testing different case settings with custom rules\n");
892    ucol_setStrength(myCollation, UCOL_TERTIARY);
893
894    for(k = 0; k<4; k++) {
895      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
896      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
897      for (i = 0; i < 3 ; i++) {
898        for(j = i+1; j<4; j++) {
899          log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
900          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
901          iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
902          backAndForth(iter);
903          ucol_closeElements(iter);
904          iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
905          backAndForth(iter);
906          ucol_closeElements(iter);
907        }
908      }
909    }
910    ucol_close(myCollation);
911    {
912      const static char *lowerFirst[] = {
913        "h",
914        "H",
915        "ch",
916        "Ch",
917        "CH",
918        "cha",
919        "chA",
920        "Cha",
921        "ChA",
922        "CHa",
923        "CHA",
924        "i",
925        "I"
926      };
927
928      const static char *upperFirst[] = {
929        "H",
930        "h",
931        "CH",
932        "Ch",
933        "ch",
934        "CHA",
935        "CHa",
936        "ChA",
937        "Cha",
938        "chA",
939        "cha",
940        "I",
941        "i"
942      };
943      log_verbose("mixed case test\n");
944      log_verbose("lower first, case level off\n");
945      genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
946      log_verbose("upper first, case level off\n");
947      genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
948      log_verbose("lower first, case level on\n");
949      genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
950      log_verbose("upper first, case level on\n");
951      genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
952    }
953
954}
955
956static void TestIncrementalNormalize(void) {
957
958    /*UChar baseA     =0x61;*/
959    UChar baseA     =0x41;
960/*    UChar baseB     = 0x42;*/
961    static const UChar ccMix[]   = {0x316, 0x321, 0x300};
962    /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
963    /*
964        0x316 is combining grave accent below, cc=220
965        0x321 is combining palatalized hook below, cc=202
966        0x300 is combining grave accent, cc=230
967    */
968
969#define MAXSLEN 2000
970    /*int          maxSLen   = 64000;*/
971    int          sLen;
972    int          i;
973
974    UCollator        *coll;
975    UErrorCode       status = U_ZERO_ERROR;
976    UCollationResult result;
977
978    int32_t myQ = getTestOption(QUICK_OPTION);
979
980    if(getTestOption(QUICK_OPTION) < 0) {
981        setTestOption(QUICK_OPTION, 1);
982    }
983
984    {
985        /* Test 1.  Run very long unnormalized strings, to force overflow of*/
986        /*          most buffers along the way.*/
987        UChar            strA[MAXSLEN+1];
988        UChar            strB[MAXSLEN+1];
989
990        coll = ucol_open("en_US", &status);
991        if(status == U_FILE_ACCESS_ERROR) {
992          log_data_err("Is your data around?\n");
993          return;
994        } else if(U_FAILURE(status)) {
995          log_err("Error opening collator\n");
996          return;
997        }
998        ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
999
1000        /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
1001        /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
1002        /*for (sLen = 1000; sLen<1001; sLen++) {*/
1003        for (sLen = 500; sLen<501; sLen++) {
1004        /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
1005            strA[0] = baseA;
1006            strB[0] = baseA;
1007            for (i=1; i<=sLen-1; i++) {
1008                strA[i] = ccMix[i % 3];
1009                strB[sLen-i] = ccMix[i % 3];
1010            }
1011            strA[sLen]   = 0;
1012            strB[sLen]   = 0;
1013
1014            ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
1015            doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
1016            ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
1017            doTest(coll, strA, strB, UCOL_EQUAL);
1018        }
1019    }
1020
1021    setTestOption(QUICK_OPTION, myQ);
1022
1023
1024    /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
1025    /*         of the string.  Checks a couple of edge cases.*/
1026
1027    {
1028        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
1029        static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
1030        ucol_setStrength(coll, UCOL_TERTIARY);
1031        doTest(coll, strA, strB, UCOL_EQUAL);
1032    }
1033
1034    /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
1035
1036    {
1037      /* New UCA  3.1.1.
1038       * test below used a code point from Desseret, which sorts differently
1039       * than d800 dc00
1040       */
1041        /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
1042        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
1043        static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
1044        ucol_setStrength(coll, UCOL_TERTIARY);
1045        doTest(coll, strA, strB, UCOL_GREATER);
1046    }
1047
1048    /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
1049
1050    {
1051        static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
1052        static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
1053        char  sortKeyA[50];
1054        char  sortKeyAz[50];
1055        char  sortKeyB[50];
1056        char  sortKeyBz[50];
1057        int   r;
1058
1059        /* there used to be -3 here. Hmmmm.... */
1060        /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
1061        result = ucol_strcoll(coll, strA, 3, strB, 3);
1062        if (result != UCOL_GREATER) {
1063            log_err("ERROR 1 in test 4\n");
1064        }
1065        result = ucol_strcoll(coll, strA, -1, strB, -1);
1066        if (result != UCOL_EQUAL) {
1067            log_err("ERROR 2 in test 4\n");
1068        }
1069
1070        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1071        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1072        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1073        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1074
1075        r = strcmp(sortKeyA, sortKeyAz);
1076        if (r <= 0) {
1077            log_err("Error 3 in test 4\n");
1078        }
1079        r = strcmp(sortKeyA, sortKeyB);
1080        if (r <= 0) {
1081            log_err("Error 4 in test 4\n");
1082        }
1083        r = strcmp(sortKeyAz, sortKeyBz);
1084        if (r != 0) {
1085            log_err("Error 5 in test 4\n");
1086        }
1087
1088        ucol_setStrength(coll, UCOL_IDENTICAL);
1089        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1090        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1091        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1092        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1093
1094        r = strcmp(sortKeyA, sortKeyAz);
1095        if (r <= 0) {
1096            log_err("Error 6 in test 4\n");
1097        }
1098        r = strcmp(sortKeyA, sortKeyB);
1099        if (r <= 0) {
1100            log_err("Error 7 in test 4\n");
1101        }
1102        r = strcmp(sortKeyAz, sortKeyBz);
1103        if (r != 0) {
1104            log_err("Error 8 in test 4\n");
1105        }
1106        ucol_setStrength(coll, UCOL_TERTIARY);
1107    }
1108
1109
1110    /*  Test 5:  Null characters in non-normal source strings.*/
1111
1112    {
1113        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
1114        static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
1115        char  sortKeyA[50];
1116        char  sortKeyAz[50];
1117        char  sortKeyB[50];
1118        char  sortKeyBz[50];
1119        int   r;
1120
1121        result = ucol_strcoll(coll, strA, 6, strB, 6);
1122        if (result != UCOL_GREATER) {
1123            log_err("ERROR 1 in test 5\n");
1124        }
1125        result = ucol_strcoll(coll, strA, -1, strB, -1);
1126        if (result != UCOL_EQUAL) {
1127            log_err("ERROR 2 in test 5\n");
1128        }
1129
1130        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1131        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1132        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1133        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1134
1135        r = strcmp(sortKeyA, sortKeyAz);
1136        if (r <= 0) {
1137            log_err("Error 3 in test 5\n");
1138        }
1139        r = strcmp(sortKeyA, sortKeyB);
1140        if (r <= 0) {
1141            log_err("Error 4 in test 5\n");
1142        }
1143        r = strcmp(sortKeyAz, sortKeyBz);
1144        if (r != 0) {
1145            log_err("Error 5 in test 5\n");
1146        }
1147
1148        ucol_setStrength(coll, UCOL_IDENTICAL);
1149        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
1150        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
1151        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
1152        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
1153
1154        r = strcmp(sortKeyA, sortKeyAz);
1155        if (r <= 0) {
1156            log_err("Error 6 in test 5\n");
1157        }
1158        r = strcmp(sortKeyA, sortKeyB);
1159        if (r <= 0) {
1160            log_err("Error 7 in test 5\n");
1161        }
1162        r = strcmp(sortKeyAz, sortKeyBz);
1163        if (r != 0) {
1164            log_err("Error 8 in test 5\n");
1165        }
1166        ucol_setStrength(coll, UCOL_TERTIARY);
1167    }
1168
1169
1170    /*  Test 6:  Null character as base of a non-normal combining sequence.*/
1171
1172    {
1173        static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
1174        static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
1175
1176        result = ucol_strcoll(coll, strA, 5, strB, 5);
1177        if (result != UCOL_LESS) {
1178            log_err("Error 1 in test 6\n");
1179        }
1180        result = ucol_strcoll(coll, strA, -1, strB, -1);
1181        if (result != UCOL_EQUAL) {
1182            log_err("Error 2 in test 6\n");
1183        }
1184    }
1185
1186    ucol_close(coll);
1187}
1188
1189
1190
1191#if 0
1192static void TestGetCaseBit(void) {
1193  static const char *caseBitData[] = {
1194    "a", "A", "ch", "Ch", "CH",
1195      "\\uFF9E", "\\u0009"
1196  };
1197
1198  static const uint8_t results[] = {
1199    UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
1200      UCOL_UPPER_CASE, UCOL_LOWER_CASE
1201  };
1202
1203  uint32_t i, blen = 0;
1204  UChar b[256] = {0};
1205  UErrorCode status = U_ZERO_ERROR;
1206  UCollator *UCA = ucol_open("", &status);
1207  uint8_t res = 0;
1208
1209  for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
1210    blen = u_unescape(caseBitData[i], b, 256);
1211    res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
1212    if(results[i] != res) {
1213      log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
1214    }
1215  }
1216}
1217#endif
1218
1219static void TestHangulTailoring(void) {
1220    static const char *koreanData[] = {
1221        "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
1222            "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
1223            "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
1224            "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
1225            "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
1226            "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
1227    };
1228
1229    const char *rules =
1230        "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
1231        "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
1232        "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
1233        "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
1234        "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
1235        "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
1236
1237
1238  UErrorCode status = U_ZERO_ERROR;
1239  UChar rlz[2048] = { 0 };
1240  uint32_t rlen = u_unescape(rules, rlz, 2048);
1241
1242  UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
1243  if(status == U_FILE_ACCESS_ERROR) {
1244    log_data_err("Is your data around?\n");
1245    return;
1246  } else if(U_FAILURE(status)) {
1247    log_err("Error opening collator\n");
1248    return;
1249  }
1250
1251  log_verbose("Using start of korean rules\n");
1252
1253  if(U_SUCCESS(status)) {
1254    genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
1255  } else {
1256    log_err("Unable to open collator with rules %s\n", rules);
1257  }
1258
1259  ucol_close(coll);
1260
1261  log_verbose("Using ko__LOTUS locale\n");
1262  genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
1263}
1264
1265/*
1266 * The secondary/tertiary compression middle byte
1267 * as used by the current implementation.
1268 * Subject to change as the sort key compression changes.
1269 * See class CollationKeys.
1270 */
1271enum {
1272    SEC_COMMON_MIDDLE = 0x25,  /* range 05..45 */
1273    TER_ONLY_COMMON_MIDDLE = 0x65  /* range 05..C5 */
1274};
1275
1276static void TestCompressOverlap(void) {
1277    UChar       secstr[150];
1278    UChar       tertstr[150];
1279    UErrorCode  status = U_ZERO_ERROR;
1280    UCollator  *coll;
1281    uint8_t     result[500];
1282    uint32_t    resultlen;
1283    int         count = 0;
1284    uint8_t    *tempptr;
1285
1286    coll = ucol_open("", &status);
1287
1288    if (U_FAILURE(status)) {
1289        log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
1290        return;
1291    }
1292    while (count < 149) {
1293        secstr[count] = 0x0020; /* [06, 05, 05] */
1294        tertstr[count] = 0x0020;
1295        count ++;
1296    }
1297
1298    /* top down compression ----------------------------------- */
1299    secstr[count] = 0x0332; /* [, 87, 05] */
1300    tertstr[count] = 0x3000; /* [06, 05, 07] */
1301
1302    /* no compression secstr should have 150 secondary bytes, tertstr should
1303    have 150 tertiary bytes.
1304    with correct compression, secstr should have 6 secondary
1305    bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes */
1306    resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
1307    (void)resultlen;    /* Suppress set but not used warning. */
1308    tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1309    while (*(tempptr + 1) != 1) {
1310        /* the last secondary collation element is not checked since it is not
1311        part of the compression */
1312        if (*tempptr < SEC_COMMON_MIDDLE) {
1313            log_err("Secondary top down compression overlapped\n");
1314        }
1315        tempptr ++;
1316    }
1317
1318    /* tertiary top/bottom/common for en_US is similar to the secondary
1319    top/bottom/common */
1320    resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
1321    tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1322    while (*(tempptr + 1) != 0) {
1323        /* the last secondary collation element is not checked since it is not
1324        part of the compression */
1325        if (*tempptr < TER_ONLY_COMMON_MIDDLE) {
1326            log_err("Tertiary top down compression overlapped\n");
1327        }
1328        tempptr ++;
1329    }
1330
1331    /* bottom up compression ------------------------------------- */
1332    secstr[count] = 0;
1333    tertstr[count] = 0;
1334    resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));
1335    tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1;
1336    while (*(tempptr + 1) != 1) {
1337        /* the last secondary collation element is not checked since it is not
1338        part of the compression */
1339        if (*tempptr > SEC_COMMON_MIDDLE) {
1340            log_err("Secondary bottom up compression overlapped\n");
1341        }
1342        tempptr ++;
1343    }
1344
1345    /* tertiary top/bottom/common for en_US is similar to the secondary
1346    top/bottom/common */
1347    resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));
1348    tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1;
1349    while (*(tempptr + 1) != 0) {
1350        /* the last secondary collation element is not checked since it is not
1351        part of the compression */
1352        if (*tempptr > TER_ONLY_COMMON_MIDDLE) {
1353            log_err("Tertiary bottom up compression overlapped\n");
1354        }
1355        tempptr ++;
1356    }
1357
1358    ucol_close(coll);
1359}
1360
1361static void TestCyrillicTailoring(void) {
1362  static const char *test[] = {
1363    "\\u0410b",
1364      "\\u0410\\u0306a",
1365      "\\u04d0A"
1366  };
1367
1368    /* Russian overrides contractions, so this test is not valid anymore */
1369    /*genericLocaleStarter("ru", test, 3);*/
1370
1371    genericLocaleStarter("root", test, 3);
1372    genericRulesStarter("&\\u0410 = \\u0410", test, 3);
1373    genericRulesStarter("&Z < \\u0410", test, 3);
1374    genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
1375    genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
1376    genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
1377    genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
1378}
1379
1380static void TestSuppressContractions(void) {
1381
1382  static const char *testNoCont2[] = {
1383      "\\u0410\\u0302a",
1384      "\\u0410\\u0306b",
1385      "\\u0410c"
1386  };
1387  static const char *testNoCont[] = {
1388      "a\\u0410",
1389      "A\\u0410\\u0306",
1390      "\\uFF21\\u0410\\u0302"
1391  };
1392
1393  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
1394  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
1395}
1396
1397static void TestContraction(void) {
1398    const static char *testrules[] = {
1399        "&A = AB / B",
1400        "&A = A\\u0306/\\u0306",
1401        "&c = ch / h"
1402    };
1403    const static UChar testdata[][2] = {
1404        {0x0041 /* 'A' */, 0x0042 /* 'B' */},
1405        {0x0041 /* 'A' */, 0x0306 /* combining breve */},
1406        {0x0063 /* 'c' */, 0x0068 /* 'h' */}
1407    };
1408    const static UChar testdata2[][2] = {
1409        {0x0063 /* 'c' */, 0x0067 /* 'g' */},
1410        {0x0063 /* 'c' */, 0x0068 /* 'h' */},
1411        {0x0063 /* 'c' */, 0x006C /* 'l' */}
1412    };
1413#if 0
1414    /*
1415     * These pairs of rule strings are not guaranteed to yield the very same mappings.
1416     * In fact, LDML 24 recommends an improved way of creating mappings
1417     * which always yields different mappings for such pairs. See
1418     * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings
1419     */
1420    const static char *testrules3[] = {
1421        "&z < xyz &xyzw << B",
1422        "&z < xyz &xyz << B / w",
1423        "&z < ch &achm << B",
1424        "&z < ch &a << B / chm",
1425        "&\\ud800\\udc00w << B",
1426        "&\\ud800\\udc00 << B / w",
1427        "&a\\ud800\\udc00m << B",
1428        "&a << B / \\ud800\\udc00m",
1429    };
1430#endif
1431
1432    UErrorCode  status   = U_ZERO_ERROR;
1433    UCollator  *coll;
1434    UChar       rule[256] = {0};
1435    uint32_t    rlen     = 0;
1436    int         i;
1437
1438    for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
1439        UCollationElements *iter1;
1440        int j = 0;
1441        log_verbose("Rule %s for testing\n", testrules[i]);
1442        rlen = u_unescape(testrules[i], rule, 32);
1443        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1444        if (U_FAILURE(status)) {
1445            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1446            return;
1447        }
1448        iter1 = ucol_openElements(coll, testdata[i], 2, &status);
1449        if (U_FAILURE(status)) {
1450            log_err("Collation iterator creation failed\n");
1451            return;
1452        }
1453        while (j < 2) {
1454            UCollationElements *iter2 = ucol_openElements(coll,
1455                                                         &(testdata[i][j]),
1456                                                         1, &status);
1457            uint32_t ce;
1458            if (U_FAILURE(status)) {
1459                log_err("Collation iterator creation failed\n");
1460                return;
1461            }
1462            ce = ucol_next(iter2, &status);
1463            while (ce != UCOL_NULLORDER) {
1464                if ((uint32_t)ucol_next(iter1, &status) != ce) {
1465                    log_err("Collation elements in contraction split does not match\n");
1466                    return;
1467                }
1468                ce = ucol_next(iter2, &status);
1469            }
1470            j ++;
1471            ucol_closeElements(iter2);
1472        }
1473        if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
1474            log_err("Collation elements not exhausted\n");
1475            return;
1476        }
1477        ucol_closeElements(iter1);
1478        ucol_close(coll);
1479    }
1480
1481    rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
1482    coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1483    if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
1484        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1485                testdata2[0][0], testdata2[0][1], testdata2[1][0],
1486                testdata2[1][1]);
1487        return;
1488    }
1489    if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
1490        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
1491                testdata2[1][0], testdata2[1][1], testdata2[2][0],
1492                testdata2[2][1]);
1493        return;
1494    }
1495    ucol_close(coll);
1496#if 0  /* see above */
1497    for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
1498        log_verbose("testrules3 i==%d  \"%s\" vs. \"%s\"\n", i, testrules3[i], testrules3[i + 1]);
1499        UCollator          *coll1,
1500                           *coll2;
1501        UCollationElements *iter1,
1502                           *iter2;
1503        UChar               ch = 0x0042 /* 'B' */;
1504        uint32_t            ce;
1505        rlen = u_unescape(testrules3[i], rule, 32);
1506        coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1507        rlen = u_unescape(testrules3[i + 1], rule, 32);
1508        coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1509        if (U_FAILURE(status)) {
1510            log_err("Collator creation failed %s\n", testrules[i]);
1511            return;
1512        }
1513        iter1 = ucol_openElements(coll1, &ch, 1, &status);
1514        iter2 = ucol_openElements(coll2, &ch, 1, &status);
1515        if (U_FAILURE(status)) {
1516            log_err("Collation iterator creation failed\n");
1517            return;
1518        }
1519        ce = ucol_next(iter1, &status);
1520        if (U_FAILURE(status)) {
1521            log_err("Retrieving ces failed\n");
1522            return;
1523        }
1524        while (ce != UCOL_NULLORDER) {
1525            uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);
1526            if (ce == ce2) {
1527                log_verbose("CEs match: %08x\n", ce);
1528            } else {
1529                log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);
1530                return;
1531            }
1532            ce = ucol_next(iter1, &status);
1533            if (U_FAILURE(status)) {
1534                log_err("Retrieving ces failed\n");
1535                return;
1536            }
1537        }
1538        if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
1539            log_err("CEs not exhausted\n");
1540            return;
1541        }
1542        ucol_closeElements(iter1);
1543        ucol_closeElements(iter2);
1544        ucol_close(coll1);
1545        ucol_close(coll2);
1546    }
1547#endif
1548}
1549
1550static void TestExpansion(void) {
1551    const static char *testrules[] = {
1552#if 0
1553        /*
1554         * This seems to have tested that M was not mapped to an expansion.
1555         * I believe the old builder just did that because it computed the extension CEs
1556         * at the very end, which was a bug.
1557         * Among other problems, it violated the core tailoring principle
1558         * by making an earlier rule depend on a later one.
1559         * And, of course, if M did not get an expansion, then it was primary different from K,
1560         * unlike what the rule &K<<M says.
1561         */
1562        "&J << K / B & K << M",
1563#endif
1564        "&J << K / B << M"
1565    };
1566    const static UChar testdata[][3] = {
1567        {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
1568        {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
1569        {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
1570        {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
1571        {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
1572        {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
1573    };
1574
1575    UErrorCode  status   = U_ZERO_ERROR;
1576    UCollator  *coll;
1577    UChar       rule[256] = {0};
1578    uint32_t    rlen     = 0;
1579    int         i;
1580
1581    for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
1582        int j = 0;
1583        log_verbose("Rule %s for testing\n", testrules[i]);
1584        rlen = u_unescape(testrules[i], rule, 32);
1585        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
1586        if (U_FAILURE(status)) {
1587            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
1588            return;
1589        }
1590
1591        for (j = 0; j < 5; j ++) {
1592            doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
1593        }
1594        ucol_close(coll);
1595    }
1596}
1597
1598#if 0
1599/* this test tests the current limitations of the engine */
1600/* it always fail, so it is disabled by default */
1601static void TestLimitations(void) {
1602  /* recursive expansions */
1603  {
1604    static const char *rule = "&a=b/c&d=c/e";
1605    static const char *tlimit01[] = {"add","b","adf"};
1606    static const char *tlimit02[] = {"aa","b","af"};
1607    log_verbose("recursive expansions\n");
1608    genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
1609    genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
1610  }
1611  /* contractions spanning expansions */
1612  {
1613    static const char *rule = "&a<<<c/e&g<<<eh";
1614    static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
1615    static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
1616    log_verbose("contractions spanning expansions\n");
1617    genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
1618    genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
1619  }
1620  /* normalization: nulls in contractions */
1621  {
1622    static const char *rule = "&a<<<\\u0000\\u0302";
1623    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1624    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1625    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1626    static const UColAttributeValue valOn[] = { UCOL_ON };
1627    static const UColAttributeValue valOff[] = { UCOL_OFF };
1628
1629    log_verbose("NULL in contractions\n");
1630    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1631    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1632    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1633    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1634
1635  }
1636  /* normalization: contractions spanning normalization */
1637  {
1638    static const char *rule = "&a<<<\\u0000\\u0302";
1639    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
1640    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
1641    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
1642    static const UColAttributeValue valOn[] = { UCOL_ON };
1643    static const UColAttributeValue valOff[] = { UCOL_OFF };
1644
1645    log_verbose("contractions spanning normalization\n");
1646    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
1647    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
1648    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
1649    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
1650
1651  }
1652  /* variable top:  */
1653  {
1654    /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
1655    static const char *rule = "&\\u2010<x<[variable top]=z";
1656    /*static const char *rule3 = "&' '<x<[variable top]=z";*/
1657    static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
1658    static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
1659    static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
1660    static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
1661    static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
1662    static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
1663
1664    log_verbose("variable top\n");
1665    genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1666    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1667    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1668    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
1669    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
1670
1671  }
1672  /* case level */
1673  {
1674    static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
1675    static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
1676    static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
1677    static const UColAttribute att[] = { UCOL_CASE_FIRST};
1678    static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
1679    /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
1680    log_verbose("case level\n");
1681    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1682    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
1683    /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
1684    /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
1685  }
1686
1687}
1688#endif
1689
1690static void TestBocsuCoverage(void) {
1691  UErrorCode status = U_ZERO_ERROR;
1692  const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
1693  UChar       test[256] = {0};
1694  uint32_t    tlen     = u_unescape(testString, test, 32);
1695  uint8_t key[256]     = {0};
1696  uint32_t klen         = 0;
1697
1698  UCollator *coll = ucol_open("", &status);
1699  if(U_SUCCESS(status)) {
1700  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
1701
1702  klen = ucol_getSortKey(coll, test, tlen, key, 256);
1703  (void)klen;    /* Suppress set but not used warning. */
1704
1705  ucol_close(coll);
1706  } else {
1707    log_data_err("Couldn't open UCA\n");
1708  }
1709}
1710
1711static void TestVariableTopSetting(void) {
1712  UErrorCode status = U_ZERO_ERROR;
1713  uint32_t varTopOriginal = 0, varTop1, varTop2;
1714  UCollator *coll = ucol_open("", &status);
1715  if(U_SUCCESS(status)) {
1716
1717  static const UChar nul = 0;
1718  static const UChar space = 0x20;
1719  static const UChar dot = 0x2e;  /* punctuation */
1720  static const UChar degree = 0xb0;  /* symbol */
1721  static const UChar dollar = 0x24;  /* currency symbol */
1722  static const UChar zero = 0x30;  /* digit */
1723
1724  varTopOriginal = ucol_getVariableTop(coll, &status);
1725  log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);
1726  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1727
1728  varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1729  varTop2 = ucol_getVariableTop(coll, &status);
1730  log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);
1731  if(U_FAILURE(status) || varTop1 != varTop2 ||
1732      !ucol_equal(coll, &nul, 0, &space, 1) ||
1733      ucol_equal(coll, &nul, 0, &dot, 1) ||
1734      ucol_equal(coll, &nul, 0, &degree, 1) ||
1735      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1736      ucol_equal(coll, &nul, 0, &zero, 1) ||
1737      ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1738    log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status));
1739  }
1740
1741  varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);
1742  varTop2 = ucol_getVariableTop(coll, &status);
1743  log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);
1744  if(U_FAILURE(status) || varTop1 != varTop2 ||
1745      !ucol_equal(coll, &nul, 0, &space, 1) ||
1746      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1747      ucol_equal(coll, &nul, 0, &degree, 1) ||
1748      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1749      ucol_equal(coll, &nul, 0, &zero, 1) ||
1750      ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1751    log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status));
1752  }
1753
1754  varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);
1755  varTop2 = ucol_getVariableTop(coll, &status);
1756  log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);
1757  if(U_FAILURE(status) || varTop1 != varTop2 ||
1758      !ucol_equal(coll, &nul, 0, &space, 1) ||
1759      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1760      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1761      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1762      ucol_equal(coll, &nul, 0, &zero, 1) ||
1763      ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1764    log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(status));
1765  }
1766
1767  varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);
1768  varTop2 = ucol_getVariableTop(coll, &status);
1769  log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);
1770  if(U_FAILURE(status) || varTop1 != varTop2 ||
1771      !ucol_equal(coll, &nul, 0, &space, 1) ||
1772      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1773      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1774      !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1775      ucol_equal(coll, &nul, 0, &zero, 1) ||
1776      ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1777    log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(status));
1778  }
1779
1780  log_verbose("Testing setting variable top to contractions\n");
1781  {
1782    UChar first[4] = { 0 };
1783    first[0] = 0x0040;
1784    first[1] = 0x0050;
1785    first[2] = 0x0000;
1786
1787    status = U_ZERO_ERROR;
1788    ucol_setVariableTop(coll, first, -1, &status);
1789
1790    if(U_SUCCESS(status)) {
1791      log_err("Invalid contraction succeded in setting variable top!\n");
1792    }
1793
1794  }
1795
1796  log_verbose("Test restoring variable top\n");
1797
1798  status = U_ZERO_ERROR;
1799  ucol_restoreVariableTop(coll, varTopOriginal, &status);
1800  if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
1801    log_err("Couldn't restore old variable top\n");
1802  }
1803
1804  log_verbose("Testing calling with error set\n");
1805
1806  status = U_INTERNAL_PROGRAM_ERROR;
1807  varTop1 = ucol_setVariableTop(coll, &space, 1, &status);
1808  varTop2 = ucol_getVariableTop(coll, &status);
1809  ucol_restoreVariableTop(coll, varTop2, &status);
1810  varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);
1811  varTop2 = ucol_getVariableTop(NULL, &status);
1812  ucol_restoreVariableTop(NULL, varTop2, &status);
1813  if(status != U_INTERNAL_PROGRAM_ERROR) {
1814    log_err("Bad reaction to passed error!\n");
1815  }
1816  ucol_close(coll);
1817  } else {
1818    log_data_err("Couldn't open UCA collator\n");
1819  }
1820}
1821
1822static void TestMaxVariable() {
1823  UErrorCode status = U_ZERO_ERROR;
1824  UColReorderCode oldMax, max;
1825  UCollator *coll;
1826
1827  static const UChar nul = 0;
1828  static const UChar space = 0x20;
1829  static const UChar dot = 0x2e;  /* punctuation */
1830  static const UChar degree = 0xb0;  /* symbol */
1831  static const UChar dollar = 0x24;  /* currency symbol */
1832  static const UChar zero = 0x30;  /* digit */
1833
1834  coll = ucol_open("", &status);
1835  if(U_FAILURE(status)) {
1836    log_data_err("Couldn't open root collator\n");
1837    return;
1838  }
1839
1840  oldMax = ucol_getMaxVariable(coll);
1841  log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);
1842  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1843
1844  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1845  max = ucol_getMaxVariable(coll);
1846  log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);
1847  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE ||
1848      !ucol_equal(coll, &nul, 0, &space, 1) ||
1849      ucol_equal(coll, &nul, 0, &dot, 1) ||
1850      ucol_equal(coll, &nul, 0, &degree, 1) ||
1851      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1852      ucol_equal(coll, &nul, 0, &zero, 1) ||
1853      ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {
1854    log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status));
1855  }
1856
1857  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);
1858  max = ucol_getMaxVariable(coll);
1859  log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);
1860  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION ||
1861      !ucol_equal(coll, &nul, 0, &space, 1) ||
1862      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1863      ucol_equal(coll, &nul, 0, &degree, 1) ||
1864      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1865      ucol_equal(coll, &nul, 0, &zero, 1) ||
1866      ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {
1867    log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(status));
1868  }
1869
1870  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);
1871  max = ucol_getMaxVariable(coll);
1872  log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);
1873  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL ||
1874      !ucol_equal(coll, &nul, 0, &space, 1) ||
1875      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1876      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1877      ucol_equal(coll, &nul, 0, &dollar, 1) ||
1878      ucol_equal(coll, &nul, 0, &zero, 1) ||
1879      ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {
1880    log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(status));
1881  }
1882
1883  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);
1884  max = ucol_getMaxVariable(coll);
1885  log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);
1886  if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY ||
1887      !ucol_equal(coll, &nul, 0, &space, 1) ||
1888      !ucol_equal(coll, &nul, 0, &dot, 1) ||
1889      !ucol_equal(coll, &nul, 0, &degree, 1) ||
1890      !ucol_equal(coll, &nul, 0, &dollar, 1) ||
1891      ucol_equal(coll, &nul, 0, &zero, 1) ||
1892      ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {
1893    log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(status));
1894  }
1895
1896  log_verbose("Test restoring maxVariable\n");
1897  status = U_ZERO_ERROR;
1898  ucol_setMaxVariable(coll, oldMax, &status);
1899  if(oldMax != ucol_getMaxVariable(coll)) {
1900    log_err("Couldn't restore old maxVariable\n");
1901  }
1902
1903  log_verbose("Testing calling with error set\n");
1904  status = U_INTERNAL_PROGRAM_ERROR;
1905  ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);
1906  max = ucol_getMaxVariable(coll);
1907  if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) {
1908    log_err("Bad reaction to passed error!\n");
1909  }
1910  ucol_close(coll);
1911}
1912
1913static void TestNonChars(void) {
1914  static const char *test[] = {
1915      "\\u0000",  /* ignorable */
1916      "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
1917      "\\uFDD0", "\\uFDEF",
1918      "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
1919      "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
1920      "\\U0003FFFE", "\\U0003FFFF",
1921      "\\U0004FFFE", "\\U0004FFFF",
1922      "\\U0005FFFE", "\\U0005FFFF",
1923      "\\U0006FFFE", "\\U0006FFFF",
1924      "\\U0007FFFE", "\\U0007FFFF",
1925      "\\U0008FFFE", "\\U0008FFFF",
1926      "\\U0009FFFE", "\\U0009FFFF",
1927      "\\U000AFFFE", "\\U000AFFFF",
1928      "\\U000BFFFE", "\\U000BFFFF",
1929      "\\U000CFFFE", "\\U000CFFFF",
1930      "\\U000DFFFE", "\\U000DFFFF",
1931      "\\U000EFFFE", "\\U000EFFFF",
1932      "\\U000FFFFE", "\\U000FFFFF",
1933      "\\U0010FFFE", "\\U0010FFFF",
1934      "\\uFFFF"  /* special character with maximum primary weight */
1935  };
1936  UErrorCode status = U_ZERO_ERROR;
1937  UCollator *coll = ucol_open("en_US", &status);
1938
1939  log_verbose("Test non characters\n");
1940
1941  if(U_SUCCESS(status)) {
1942    genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
1943  } else {
1944    log_err_status(status, "Unable to open collator\n");
1945  }
1946
1947  ucol_close(coll);
1948}
1949
1950static void TestExtremeCompression(void) {
1951  static char *test[4];
1952  int32_t j = 0, i = 0;
1953
1954  for(i = 0; i<4; i++) {
1955    test[i] = (char *)malloc(2048*sizeof(char));
1956  }
1957
1958  for(j = 20; j < 500; j++) {
1959    for(i = 0; i<4; i++) {
1960      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1961      test[i][j-1] = (char)('a'+i);
1962      test[i][j] = 0;
1963    }
1964    genericLocaleStarter("en_US", (const char **)test, 4);
1965  }
1966
1967
1968  for(i = 0; i<4; i++) {
1969    free(test[i]);
1970  }
1971}
1972
1973#if 0
1974static void TestExtremeCompression(void) {
1975  static char *test[4];
1976  int32_t j = 0, i = 0;
1977  UErrorCode status = U_ZERO_ERROR;
1978  UCollator *coll = ucol_open("en_US", status);
1979  for(i = 0; i<4; i++) {
1980    test[i] = (char *)malloc(2048*sizeof(char));
1981  }
1982  for(j = 10; j < 2048; j++) {
1983    for(i = 0; i<4; i++) {
1984      uprv_memset(test[i], 'a', (j-2)*sizeof(char));
1985      test[i][j-1] = (char)('a'+i);
1986      test[i][j] = 0;
1987    }
1988  }
1989  genericLocaleStarter("en_US", (const char **)test, 4);
1990
1991  for(j = 10; j < 2048; j++) {
1992    for(i = 0; i<1; i++) {
1993      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
1994      test[i][j] = 0;
1995    }
1996  }
1997  for(i = 0; i<4; i++) {
1998    free(test[i]);
1999  }
2000}
2001#endif
2002
2003static void TestSurrogates(void) {
2004  static const char *test[] = {
2005    "z","\\ud900\\udc25",  "\\ud805\\udc50",
2006       "\\ud800\\udc00y",  "\\ud800\\udc00r",
2007       "\\ud800\\udc00f",  "\\ud800\\udc00",
2008       "\\ud800\\udc00c", "\\ud800\\udc00b",
2009       "\\ud800\\udc00fa", "\\ud800\\udc00fb",
2010       "\\ud800\\udc00a",
2011       "c", "b"
2012  };
2013
2014  static const char *rule =
2015    "&z < \\ud900\\udc25   < \\ud805\\udc50"
2016       "< \\ud800\\udc00y  < \\ud800\\udc00r"
2017       "< \\ud800\\udc00f  << \\ud800\\udc00"
2018       "< \\ud800\\udc00fa << \\ud800\\udc00fb"
2019       "< \\ud800\\udc00a  < c < b" ;
2020
2021  genericRulesStarter(rule, test, 14);
2022}
2023
2024/* This is a test for prefix implementation, used by JIS X 4061 collation rules */
2025static void TestPrefix(void) {
2026  uint32_t i;
2027
2028  static const struct {
2029    const char *rules;
2030    const char *data[50];
2031    const uint32_t len;
2032  } tests[] = {
2033    { "&z <<< z|a",
2034      {"zz", "za"}, 2 },
2035
2036    { "&z <<< z|   a",
2037      {"zz", "za"}, 2 },
2038    { "[strength I]"
2039      "&a=\\ud900\\udc25"
2040      "&z<<<\\ud900\\udc25|a",
2041      {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
2042  };
2043
2044
2045  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2046    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2047  }
2048}
2049
2050/* This test uses data suplied by Masashiko Maedera to test the implementation */
2051/* JIS X 4061 collation order implementation                                   */
2052static void TestNewJapanese(void) {
2053
2054  static const char * const test1[] = {
2055      "\\u30b7\\u30e3\\u30fc\\u30ec",
2056      "\\u30b7\\u30e3\\u30a4",
2057      "\\u30b7\\u30e4\\u30a3",
2058      "\\u30b7\\u30e3\\u30ec",
2059      "\\u3061\\u3087\\u3053",
2060      "\\u3061\\u3088\\u3053",
2061      "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
2062      "\\u3066\\u30fc\\u305f",
2063      "\\u30c6\\u30fc\\u30bf",
2064      "\\u30c6\\u30a7\\u30bf",
2065      "\\u3066\\u3048\\u305f",
2066      "\\u3067\\u30fc\\u305f",
2067      "\\u30c7\\u30fc\\u30bf",
2068      "\\u30c7\\u30a7\\u30bf",
2069      "\\u3067\\u3048\\u305f",
2070      "\\u3066\\u30fc\\u305f\\u30fc",
2071      "\\u30c6\\u30fc\\u30bf\\u30a1",
2072      "\\u30c6\\u30a7\\u30bf\\u30fc",
2073      "\\u3066\\u3047\\u305f\\u3041",
2074      "\\u3066\\u3048\\u305f\\u30fc",
2075      "\\u3067\\u30fc\\u305f\\u30fc",
2076      "\\u30c7\\u30fc\\u30bf\\u30a1",
2077      "\\u3067\\u30a7\\u305f\\u30a1",
2078      "\\u30c7\\u3047\\u30bf\\u3041",
2079      "\\u30c7\\u30a8\\u30bf\\u30a2",
2080      "\\u3072\\u3086",
2081      "\\u3073\\u3085\\u3042",
2082      "\\u3074\\u3085\\u3042",
2083      "\\u3073\\u3085\\u3042\\u30fc",
2084      "\\u30d3\\u30e5\\u30a2\\u30fc",
2085      "\\u3074\\u3085\\u3042\\u30fc",
2086      "\\u30d4\\u30e5\\u30a2\\u30fc",
2087      "\\u30d2\\u30e5\\u30a6",
2088      "\\u30d2\\u30e6\\u30a6",
2089      "\\u30d4\\u30e5\\u30a6\\u30a2",
2090      "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
2091      "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
2092      "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
2093      "\\u3072\\u3085\\u3093",
2094      "\\u3074\\u3085\\u3093",
2095      "\\u3075\\u30fc\\u308a",
2096      "\\u30d5\\u30fc\\u30ea",
2097      "\\u3075\\u3045\\u308a",
2098      "\\u3075\\u30a5\\u308a",
2099      "\\u3075\\u30a5\\u30ea",
2100      "\\u30d5\\u30a6\\u30ea",
2101      "\\u3076\\u30fc\\u308a",
2102      "\\u30d6\\u30fc\\u30ea",
2103      "\\u3076\\u3045\\u308a",
2104      "\\u30d6\\u30a5\\u308a",
2105      "\\u3077\\u3046\\u308a",
2106      "\\u30d7\\u30a6\\u30ea",
2107      "\\u3075\\u30fc\\u308a\\u30fc",
2108      "\\u30d5\\u30a5\\u30ea\\u30fc",
2109      "\\u3075\\u30a5\\u308a\\u30a3",
2110      "\\u30d5\\u3045\\u308a\\u3043",
2111      "\\u30d5\\u30a6\\u30ea\\u30fc",
2112      "\\u3075\\u3046\\u308a\\u3043",
2113      "\\u30d6\\u30a6\\u30ea\\u30a4",
2114      "\\u3077\\u30fc\\u308a\\u30fc",
2115      "\\u3077\\u30a5\\u308a\\u30a4",
2116      "\\u3077\\u3046\\u308a\\u30fc",
2117      "\\u30d7\\u30a6\\u30ea\\u30a4",
2118      "\\u30d5\\u30fd",
2119      "\\u3075\\u309e",
2120      "\\u3076\\u309d",
2121      "\\u3076\\u3075",
2122      "\\u3076\\u30d5",
2123      "\\u30d6\\u3075",
2124      "\\u30d6\\u30d5",
2125      "\\u3076\\u309e",
2126      "\\u3076\\u3077",
2127      "\\u30d6\\u3077",
2128      "\\u3077\\u309d",
2129      "\\u30d7\\u30fd",
2130      "\\u3077\\u3075",
2131};
2132
2133  static const char *test2[] = {
2134    "\\u306f\\u309d", /* H\\u309d */
2135    "\\u30cf\\u30fd", /* K\\u30fd */
2136    "\\u306f\\u306f", /* HH */
2137    "\\u306f\\u30cf", /* HK */
2138    "\\u30cf\\u30cf", /* KK */
2139    "\\u306f\\u309e", /* H\\u309e */
2140    "\\u30cf\\u30fe", /* K\\u30fe */
2141    "\\u306f\\u3070", /* HH\\u309b */
2142    "\\u30cf\\u30d0", /* KK\\u309b */
2143    "\\u306f\\u3071", /* HH\\u309c */
2144    "\\u30cf\\u3071", /* KH\\u309c */
2145    "\\u30cf\\u30d1", /* KK\\u309c */
2146    "\\u3070\\u309d", /* H\\u309b\\u309d */
2147    "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
2148    "\\u3070\\u306f", /* H\\u309bH */
2149    "\\u30d0\\u30cf", /* K\\u309bK */
2150    "\\u3070\\u309e", /* H\\u309b\\u309e */
2151    "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
2152    "\\u3070\\u3070", /* H\\u309bH\\u309b */
2153    "\\u30d0\\u3070", /* K\\u309bH\\u309b */
2154    "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
2155    "\\u3070\\u3071", /* H\\u309bH\\u309c */
2156    "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
2157    "\\u3071\\u309d", /* H\\u309c\\u309d */
2158    "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
2159    "\\u3071\\u306f", /* H\\u309cH */
2160    "\\u30d1\\u30cf", /* K\\u309cK */
2161    "\\u3071\\u3070", /* H\\u309cH\\u309b */
2162    "\\u3071\\u30d0", /* H\\u309cK\\u309b */
2163    "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
2164    "\\u3071\\u3071", /* H\\u309cH\\u309c */
2165    "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
2166  };
2167  /*
2168  static const char *test3[] = {
2169    "\\u221er\\u221e",
2170    "\\u221eR#",
2171    "\\u221et\\u221e",
2172    "#r\\u221e",
2173    "#R#",
2174    "#t%",
2175    "#T%",
2176    "8t\\u221e",
2177    "8T\\u221e",
2178    "8t#",
2179    "8T#",
2180    "8t%",
2181    "8T%",
2182    "8t8",
2183    "8T8",
2184    "\\u03c9r\\u221e",
2185    "\\u03a9R%",
2186    "rr\\u221e",
2187    "rR\\u221e",
2188    "Rr\\u221e",
2189    "RR\\u221e",
2190    "RT%",
2191    "rt8",
2192    "tr\\u221e",
2193    "tr8",
2194    "TR8",
2195    "tt8",
2196    "\\u30b7\\u30e3\\u30fc\\u30ec",
2197  };
2198  */
2199  static const UColAttribute att[] = { UCOL_STRENGTH };
2200  static const UColAttributeValue val[] = { UCOL_QUATERNARY };
2201
2202  static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
2203  static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
2204
2205  genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
2206  genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
2207  /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
2208  genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
2209  genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
2210}
2211
2212static void TestStrCollIdenticalPrefix(void) {
2213  const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
2214  const char* test[] = {
2215    "ab\\ud9b0\\udc70",
2216    "ab\\ud9b0\\udc71"
2217  };
2218  genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
2219}
2220/* Contractions should have all their canonically equivalent */
2221/* strings included */
2222static void TestContractionClosure(void) {
2223  static const struct {
2224    const char *rules;
2225    const char *data[10];
2226    const uint32_t len;
2227  } tests[] = {
2228    {   "&b=\\u00e4\\u00e4",
2229      { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
2230    {   "&b=\\u00C5",
2231      { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
2232  };
2233  uint32_t i;
2234
2235
2236  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2237    genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
2238  }
2239}
2240
2241/* This tests also fails*/
2242static void TestBeforePrefixFailure(void) {
2243  static const struct {
2244    const char *rules;
2245    const char *data[10];
2246    const uint32_t len;
2247  } tests[] = {
2248    { "&g <<< a"
2249      "&[before 3]\\uff41 <<< x",
2250      {"x", "\\uff41"}, 2 },
2251    {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2252        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2253        "&[before 3]\\u30a7<<<\\u30a9",
2254      {"\\u30a9", "\\u30a7"}, 2 },
2255    {   "&[before 3]\\u30a7<<<\\u30a9"
2256        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2257        "&\\u30A8=\\u30A8=\\u3048=\\uff74",
2258      {"\\u30a9", "\\u30a7"}, 2 },
2259  };
2260  uint32_t i;
2261
2262
2263  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2264    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2265  }
2266
2267#if 0
2268  const char* rule1 =
2269        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2270        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
2271        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
2272  const char* rule2 =
2273        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
2274        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
2275        "&\\u30A8=\\u30A8=\\u3048=\\uff74";
2276  const char* test[] = {
2277      "\\u30c6\\u30fc\\u30bf",
2278      "\\u30c6\\u30a7\\u30bf",
2279  };
2280  genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
2281  genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
2282/* this piece of code should be in some sort of verbose mode     */
2283/* it gets the collation elements for elements and prints them   */
2284/* This is useful when trying to see whether the problem is      */
2285  {
2286    UErrorCode status = U_ZERO_ERROR;
2287    uint32_t i = 0;
2288    UCollationElements *it = NULL;
2289    uint32_t CE;
2290    UChar string[256];
2291    uint32_t uStringLen;
2292    UCollator *coll = NULL;
2293
2294    uStringLen = u_unescape(rule1, string, 256);
2295
2296    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2297
2298    /*coll = ucol_open("ja_JP_JIS", &status);*/
2299    it = ucol_openElements(coll, string, 0, &status);
2300
2301    for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
2302      log_verbose("%s\n", test[i]);
2303      uStringLen = u_unescape(test[i], string, 256);
2304      ucol_setText(it, string, uStringLen, &status);
2305
2306      while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
2307        log_verbose("%08X\n", CE);
2308      }
2309      log_verbose("\n");
2310
2311    }
2312
2313    ucol_closeElements(it);
2314    ucol_close(coll);
2315  }
2316#endif
2317}
2318
2319static void TestPrefixCompose(void) {
2320  const char* rule1 =
2321        "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
2322  /*
2323  const char* test[] = {
2324      "\\u30c6\\u30fc\\u30bf",
2325      "\\u30c6\\u30a7\\u30bf",
2326  };
2327  */
2328  {
2329    UErrorCode status = U_ZERO_ERROR;
2330    /*uint32_t i = 0;*/
2331    /*UCollationElements *it = NULL;*/
2332/*    uint32_t CE;*/
2333    UChar string[256];
2334    uint32_t uStringLen;
2335    UCollator *coll = NULL;
2336
2337    uStringLen = u_unescape(rule1, string, 256);
2338
2339    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2340    ucol_close(coll);
2341  }
2342
2343
2344}
2345
2346/*
2347[last variable] last variable value
2348[last primary ignorable] largest CE for primary ignorable
2349[last secondary ignorable] largest CE for secondary ignorable
2350[last tertiary ignorable] largest CE for tertiary ignorable
2351[top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
2352*/
2353
2354static void TestRuleOptions(void) {
2355  /* values here are hardcoded and are correct for the current UCA
2356   * when the UCA changes, one might be forced to change these
2357   * values.
2358   */
2359
2360  /*
2361   * These strings contain the last character before [variable top]
2362   * and the first and second characters (by primary weights) after it.
2363   * See FractionalUCA.txt. For example:
2364      [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
2365      [variable top = 0C FE]
2366      [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
2367     and
2368      00B4; [0D 0C, 05, 05]
2369   *
2370   * Note: Starting with UCA 6.0, the [variable top] collation element
2371   * is not the weight of any character or string,
2372   * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
2373   */
2374#define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
2375#define FIRST_REGULAR_CHAR_STRING "\\u0060"
2376#define SECOND_REGULAR_CHAR_STRING "\\u00B4"
2377
2378  /*
2379   * This string has to match the character that has the [last regular] weight
2380   * which changes with each UCA version.
2381   * See the bottom of FractionalUCA.txt which says something like
2382      [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
2383   *
2384   * Note: Starting with UCA 6.0, the [last regular] collation element
2385   * is not the weight of any character or string,
2386   * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
2387   */
2388#define LAST_REGULAR_CHAR_STRING "\\U0001342E"
2389
2390  static const struct {
2391    const char *rules;
2392    const char *data[10];
2393    const uint32_t len;
2394  } tests[] = {
2395#if 0
2396    /* "you cannot go before ...": The parser now sets an error for such nonsensical rules. */
2397    /* - all befores here amount to zero */
2398    { "&[before 3][first tertiary ignorable]<<<a",
2399        { "\\u0000", "a"}, 2
2400    }, /* you cannot go before first tertiary ignorable */
2401
2402    { "&[before 3][last tertiary ignorable]<<<a",
2403        { "\\u0000", "a"}, 2
2404    }, /* you cannot go before last tertiary ignorable */
2405#endif
2406    /*
2407     * However, there is a real secondary ignorable (artificial addition in FractionalUCA.txt),
2408     * and it *is* possible to "go before" that.
2409     */
2410    { "&[before 3][first secondary ignorable]<<<a",
2411        { "\\u0000", "a"}, 2
2412    },
2413
2414    { "&[before 3][last secondary ignorable]<<<a",
2415        { "\\u0000", "a"}, 2
2416    },
2417
2418    /* 'normal' befores */
2419
2420    /*
2421     * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,
2422     * it is not possible to tailor &[first primary ignorable]<a or &[last primary ignorable]<a
2423     * because there is no tailoring space before that boundary.
2424     * Made the tests work by tailoring to a space instead.
2425     */
2426    { "&[before 3][first primary ignorable]<<<c<<<b &' '<a",  /* was &[first primary ignorable]<a */
2427        {  "c", "b", "\\u0332", "a" }, 4
2428    },
2429
2430    /* we don't have a code point that corresponds to
2431     * the last primary ignorable
2432     */
2433    { "&[before 3][last primary ignorable]<<<c<<<b &' '<a",  /* was &[last primary ignorable]<a */
2434        {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
2435    },
2436
2437    { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
2438        {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
2439    },
2440
2441    { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
2442        { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
2443    },
2444
2445    { "&[first regular]<a"
2446      "&[before 1][first regular]<b",
2447      { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
2448    },
2449
2450    { "&[before 1][last regular]<b"
2451      "&[last regular]<a",
2452        { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
2453    },
2454
2455    { "&[before 1][first implicit]<b"
2456      "&[first implicit]<a",
2457        { "b", "\\u4e00", "a", "\\u4e01"}, 4
2458    },
2459#if 0  /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */
2460    { "&[before 1][last implicit]<b"
2461      "&[last implicit]<a",
2462        { "b", "\\U0010FFFD", "a" }, 3
2463    },
2464#endif
2465    { "&[last variable]<z"
2466      "&' '<x"  /* was &[last primary ignorable]<x, see above */
2467      "&[last secondary ignorable]<<y"
2468      "&[last tertiary ignorable]<<<w"
2469      "&[top]<u",
2470      {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
2471    }
2472
2473  };
2474  uint32_t i;
2475
2476  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2477    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2478  }
2479}
2480
2481
2482static void TestOptimize(void) {
2483  /* this is not really a test - just trying out
2484   * whether copying of UCA contents will fail
2485   * Cannot really test, since the functionality
2486   * remains the same.
2487   */
2488  static const struct {
2489    const char *rules;
2490    const char *data[10];
2491    const uint32_t len;
2492  } tests[] = {
2493    /* - all befores here amount to zero */
2494    { "[optimize [\\uAC00-\\uD7FF]]",
2495    { "a", "b"}, 2}
2496  };
2497  uint32_t i;
2498
2499  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
2500    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
2501  }
2502}
2503
2504/*
2505cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
2506weiv    ucol_strcollIter?
2507cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
2508weiv    these are the input strings?
2509cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
2510weiv    will check - could be a problem with utf-8 iterator
2511cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
2512weiv    hmmm
2513cycheng@ca.ibm.c... note that we have a standalone high surrogate
2514weiv    that doesn't sound right
2515cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
2516weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
2517cycheng@ca.ibm.c... yes
2518weiv    and then do the comparison
2519cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
2520weiv    utf-16 strings look like a little endian ones in the example you sent me
2521weiv    It could be a bug - let me try to test it out
2522cycheng@ca.ibm.c... ok
2523cycheng@ca.ibm.c... we can wait till the conf. call
2524cycheng@ca.ibm.c... next weke
2525weiv    that would be great
2526weiv    hmmm
2527weiv    I might be wrong
2528weiv    let me play with it some more
2529cycheng@ca.ibm.c... ok
2530cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
2531cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
2532cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
2533weiv    ok
2534cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
2535weiv    thanks
2536cycheng@ca.ibm.c... the 4 strings we sent are just samples
2537*/
2538#if 0
2539static void Alexis(void) {
2540  UErrorCode status = U_ZERO_ERROR;
2541  UCollator *coll = ucol_open("", &status);
2542
2543
2544  const char utf16be[2][4] = {
2545    { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
2546    { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
2547  };
2548
2549  const char utf8[2][4] = {
2550    { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
2551    { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
2552  };
2553
2554  UCharIterator iterU161, iterU162;
2555  UCharIterator iterU81, iterU82;
2556
2557  UCollationResult resU16, resU8;
2558
2559  uiter_setUTF16BE(&iterU161, utf16be[0], 4);
2560  uiter_setUTF16BE(&iterU162, utf16be[1], 4);
2561
2562  uiter_setUTF8(&iterU81, utf8[0], 4);
2563  uiter_setUTF8(&iterU82, utf8[1], 4);
2564
2565  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2566
2567  resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
2568  resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
2569
2570
2571  if(resU16 != resU8) {
2572    log_err("different results\n");
2573  }
2574
2575  ucol_close(coll);
2576}
2577#endif
2578
2579#define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
2580static void Alexis2(void) {
2581  UErrorCode status = U_ZERO_ERROR;
2582  UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2583  char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2584  char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
2585  int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
2586
2587  UConverter *conv = NULL;
2588
2589  UCharIterator U16BEItS, U16BEItT;
2590  UCharIterator U8ItS, U8ItT;
2591
2592  UCollationResult resU16, resU16BE, resU8;
2593
2594  static const char* const pairs[][2] = {
2595    { "\\ud800\\u0021", "\\uFFFC\\u0062"},
2596    { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
2597    { "\\u0E40\\u0021", "\\u00A1\\u0021"},
2598    { "\\u0E40\\u0021", "\\uFE57\\u0062"},
2599    { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
2600    { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
2601    { "\\u0020", "\\u0020\\u0000"}
2602/*
26035F20 (my result here)
26045F204E008E3F
26055F20 (your result here)
2606*/
2607  };
2608
2609  int32_t i = 0;
2610
2611  UCollator *coll = ucol_open("", &status);
2612  if(status == U_FILE_ACCESS_ERROR) {
2613    log_data_err("Is your data around?\n");
2614    return;
2615  } else if(U_FAILURE(status)) {
2616    log_err("Error opening collator\n");
2617    return;
2618  }
2619  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2620  conv = ucnv_open("UTF16BE", &status);
2621  for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
2622    U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2623    U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
2624
2625    resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
2626
2627    log_verbose("Result of strcoll is %i\n", resU16);
2628
2629    U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
2630    U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
2631    (void)U16BELenS;    /* Suppress set but not used warnings. */
2632    (void)U16BELenT;
2633
2634    /* use the original sizes, as the result from converter is in bytes */
2635    uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
2636    uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
2637
2638    resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
2639
2640    log_verbose("Result of U16BE is %i\n", resU16BE);
2641
2642    if(resU16 != resU16BE) {
2643      log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
2644    }
2645
2646    u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
2647    u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
2648
2649    uiter_setUTF8(&U8ItS, U8Source, U8LenS);
2650    uiter_setUTF8(&U8ItT, U8Target, U8LenT);
2651
2652    resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
2653
2654    if(resU16 != resU8) {
2655      log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
2656    }
2657
2658  }
2659
2660  ucol_close(coll);
2661  ucnv_close(conv);
2662}
2663
2664static void TestHebrewUCA(void) {
2665  UErrorCode status = U_ZERO_ERROR;
2666  static const char *first[] = {
2667    "d790d6b8d79cd795d6bcd7a9",
2668    "d790d79cd79ed7a7d799d799d7a1",
2669    "d790d6b4d79ed795d6bcd7a9",
2670  };
2671
2672  char utf8String[3][256];
2673  UChar utf16String[3][256];
2674
2675  int32_t i = 0, j = 0;
2676  int32_t sizeUTF8[3];
2677  int32_t sizeUTF16[3];
2678
2679  UCollator *coll = ucol_open("", &status);
2680  if (U_FAILURE(status)) {
2681      log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
2682      return;
2683  }
2684  /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
2685
2686  for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
2687    sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
2688    u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
2689    log_verbose("%i: ");
2690    for(j = 0; j < sizeUTF16[i]; j++) {
2691      /*log_verbose("\\u%04X", utf16String[i][j]);*/
2692      log_verbose("%04X", utf16String[i][j]);
2693    }
2694    log_verbose("\n");
2695  }
2696  for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
2697    for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
2698      doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
2699    }
2700  }
2701
2702  ucol_close(coll);
2703
2704}
2705
2706static void TestPartialSortKeyTermination(void) {
2707  static const char* cases[] = {
2708    "\\u1234\\u1234\\udc00",
2709    "\\udc00\\ud800\\ud800"
2710  };
2711
2712  int32_t i;
2713
2714  UErrorCode status = U_ZERO_ERROR;
2715
2716  UCollator *coll = ucol_open("", &status);
2717
2718  UCharIterator iter;
2719
2720  UChar currCase[256];
2721  int32_t length = 0;
2722  int32_t pKeyLen = 0;
2723
2724  uint8_t key[256];
2725
2726  for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
2727    uint32_t state[2] = {0, 0};
2728    length = u_unescape(cases[i], currCase, 256);
2729    uiter_setString(&iter, currCase, length);
2730    pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
2731    (void)pKeyLen;   /* Suppress set but not used warning. */
2732
2733    log_verbose("Done\n");
2734
2735  }
2736  ucol_close(coll);
2737}
2738
2739static void TestSettings(void) {
2740  static const char* cases[] = {
2741    "apple",
2742      "Apple"
2743  };
2744
2745  static const char* locales[] = {
2746    "",
2747      "en"
2748  };
2749
2750  UErrorCode status = U_ZERO_ERROR;
2751
2752  int32_t i = 0, j = 0;
2753
2754  UChar source[256], target[256];
2755  int32_t sLen = 0, tLen = 0;
2756
2757  UCollator *collateObject = NULL;
2758  for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
2759    collateObject = ucol_open(locales[i], &status);
2760    ucol_setStrength(collateObject, UCOL_PRIMARY);
2761    ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
2762    for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
2763      sLen = u_unescape(cases[j-1], source, 256);
2764      source[sLen] = 0;
2765      tLen = u_unescape(cases[j], target, 256);
2766      source[tLen] = 0;
2767      doTest(collateObject, source, target, UCOL_EQUAL);
2768    }
2769    ucol_close(collateObject);
2770  }
2771}
2772
2773static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
2774    UErrorCode status = U_ZERO_ERROR;
2775    int32_t errorNo = 0;
2776    const UChar *sourceRules = NULL;
2777    int32_t sourceRulesLen = 0;
2778    UParseError parseError;
2779    UColAttributeValue french = UCOL_OFF;
2780
2781    if(!ucol_equals(source, target)) {
2782        log_err("Same collators, different address not equal\n");
2783        errorNo++;
2784    }
2785    ucol_close(target);
2786    if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
2787        target = ucol_safeClone(source, NULL, NULL, &status);
2788        if(U_FAILURE(status)) {
2789            log_err("Error creating clone\n");
2790            errorNo++;
2791            return errorNo;
2792        }
2793        if(!ucol_equals(source, target)) {
2794            log_err("Collator different from it's clone\n");
2795            errorNo++;
2796        }
2797        french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
2798        if(french == UCOL_ON) {
2799            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
2800        } else {
2801            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
2802        }
2803        if(U_FAILURE(status)) {
2804            log_err("Error setting attributes\n");
2805            errorNo++;
2806            return errorNo;
2807        }
2808        if(ucol_equals(source, target)) {
2809            log_err("Collators same even when options changed\n");
2810            errorNo++;
2811        }
2812        ucol_close(target);
2813
2814        sourceRules = ucol_getRules(source, &sourceRulesLen);
2815        target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2816        if(U_FAILURE(status)) {
2817            log_err("Error instantiating target from rules - %s\n", u_errorName(status));
2818            errorNo++;
2819            return errorNo;
2820        }
2821        /* Note: The tailoring rule string is an optional data item. */
2822        if(!ucol_equals(source, target) && sourceRulesLen != 0) {
2823            log_err("Collator different from collator that was created from the same rules\n");
2824            errorNo++;
2825        }
2826        ucol_close(target);
2827    }
2828    return errorNo;
2829}
2830
2831
2832static void TestEquals(void) {
2833    /* ucol_equals is not currently a public API. There is a chance that it will become
2834    * something like this.
2835    */
2836    /* test whether the two collators instantiated from the same locale are equal */
2837    UErrorCode status = U_ZERO_ERROR;
2838    UParseError parseError;
2839    int32_t noOfLoc = uloc_countAvailable();
2840    const char *locName = NULL;
2841    UCollator *source = NULL, *target = NULL;
2842    int32_t i = 0;
2843
2844    const char* rules[] = {
2845        "&l < lj <<< Lj <<< LJ",
2846        "&n < nj <<< Nj <<< NJ",
2847        "&ae <<< \\u00e4",
2848        "&AE <<< \\u00c4"
2849    };
2850    /*
2851    const char* badRules[] = {
2852    "&l <<< Lj",
2853    "&n < nj <<< nJ <<< NJ",
2854    "&a <<< \\u00e4",
2855    "&AE <<< \\u00c4 <<< x"
2856    };
2857    */
2858
2859    UChar sourceRules[1024], targetRules[1024];
2860    int32_t sourceRulesSize = 0, targetRulesSize = 0;
2861    int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
2862
2863    for(i = 0; i < rulesSize; i++) {
2864        sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
2865        targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
2866    }
2867
2868    source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2869    if(status == U_FILE_ACCESS_ERROR) {
2870        log_data_err("Is your data around?\n");
2871        return;
2872    } else if(U_FAILURE(status)) {
2873        log_err("Error opening collator\n");
2874        return;
2875    }
2876    target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
2877    if(!ucol_equals(source, target)) {
2878        log_err("Equivalent collators not equal!\n");
2879    }
2880    ucol_close(source);
2881    ucol_close(target);
2882
2883    source = ucol_open("root", &status);
2884    target = ucol_open("root", &status);
2885    log_verbose("Testing root\n");
2886    if(!ucol_equals(source, source)) {
2887        log_err("Same collator not equal\n");
2888    }
2889    if(TestEqualsForCollator(locName, source, target)) {
2890        log_err("Errors for root\n", locName);
2891    }
2892    ucol_close(source);
2893
2894    for(i = 0; i<noOfLoc; i++) {
2895        status = U_ZERO_ERROR;
2896        locName = uloc_getAvailable(i);
2897        /*if(hasCollationElements(locName)) {*/
2898        log_verbose("Testing equality for locale %s\n", locName);
2899        source = ucol_open(locName, &status);
2900        target = ucol_open(locName, &status);
2901        if (U_FAILURE(status)) {
2902            log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
2903            continue;
2904        }
2905        if(TestEqualsForCollator(locName, source, target)) {
2906            log_err("Errors for locale %s\n", locName);
2907        }
2908        ucol_close(source);
2909        /*}*/
2910    }
2911}
2912
2913static void TestJ2726(void) {
2914    UChar a[2] = { 0x61, 0x00 }; /*"a"*/
2915    UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
2916    UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
2917    UErrorCode status = U_ZERO_ERROR;
2918    UCollator *coll = ucol_open("en", &status);
2919    ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
2920    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
2921    doTest(coll, a, aSpace, UCOL_EQUAL);
2922    doTest(coll, aSpace, a, UCOL_EQUAL);
2923    doTest(coll, a, spaceA, UCOL_EQUAL);
2924    doTest(coll, spaceA, a, UCOL_EQUAL);
2925    doTest(coll, spaceA, aSpace, UCOL_EQUAL);
2926    doTest(coll, aSpace, spaceA, UCOL_EQUAL);
2927    ucol_close(coll);
2928}
2929
2930static void NullRule(void) {
2931    UChar r[3] = {0};
2932    UErrorCode status = U_ZERO_ERROR;
2933    UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2934    if(U_SUCCESS(status)) {
2935        log_err("This should have been an error!\n");
2936        ucol_close(coll);
2937    } else {
2938        status = U_ZERO_ERROR;
2939    }
2940    coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2941    if(U_FAILURE(status)) {
2942        log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
2943    } else {
2944        ucol_close(coll);
2945    }
2946}
2947
2948/**
2949 * Test for CollationElementIterator previous and next for the whole set of
2950 * unicode characters with normalization on.
2951 */
2952static void TestNumericCollation(void)
2953{
2954    UErrorCode status = U_ZERO_ERROR;
2955
2956    const static char *basicTestStrings[]={
2957    "hello1",
2958    "hello2",
2959    "hello2002",
2960    "hello2003",
2961    "hello123456",
2962    "hello1234567",
2963    "hello10000000",
2964    "hello100000000",
2965    "hello1000000000",
2966    "hello10000000000",
2967    };
2968
2969    const static char *preZeroTestStrings[]={
2970    "avery10000",
2971    "avery010000",
2972    "avery0010000",
2973    "avery00010000",
2974    "avery000010000",
2975    "avery0000010000",
2976    "avery00000010000",
2977    "avery000000010000",
2978    };
2979
2980    const static char *thirtyTwoBitNumericStrings[]={
2981    "avery42949672960",
2982    "avery42949672961",
2983    "avery42949672962",
2984    "avery429496729610"
2985    };
2986
2987     const static char *longNumericStrings[]={
2988     /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
2989        In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
2990        are treated as multiple collation elements. */
2991    "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
2992    "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
2993    "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
2994    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
2995    "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
2996    "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
2997    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
2998    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
2999    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
3000    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
3001    };
3002
3003    const static char *supplementaryDigits[] = {
3004      "\\uD835\\uDFCE", /* 0 */
3005      "\\uD835\\uDFCF", /* 1 */
3006      "\\uD835\\uDFD0", /* 2 */
3007      "\\uD835\\uDFD1", /* 3 */
3008      "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
3009      "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
3010      "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
3011      "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
3012      "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
3013      "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
3014    };
3015
3016    const static char *foreignDigits[] = {
3017      "\\u0661",
3018        "\\u0662",
3019        "\\u0663",
3020      "\\u0661\\u0660",
3021      "\\u0661\\u0662",
3022      "\\u0661\\u0663",
3023      "\\u0662\\u0660",
3024      "\\u0662\\u0662",
3025      "\\u0662\\u0663",
3026      "\\u0663\\u0660",
3027      "\\u0663\\u0662",
3028      "\\u0663\\u0663"
3029    };
3030
3031    const static char *evenZeroes[] = {
3032      "2000",
3033      "2001",
3034        "2002",
3035        "2003"
3036    };
3037
3038    UColAttribute att = UCOL_NUMERIC_COLLATION;
3039    UColAttributeValue val = UCOL_ON;
3040
3041    /* Open our collator. */
3042    UCollator* coll = ucol_open("root", &status);
3043    if (U_FAILURE(status)){
3044        log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
3045              myErrorName(status));
3046        return;
3047    }
3048    genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
3049    genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
3050    genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
3051    genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
3052    genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
3053    genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
3054
3055    /* Setting up our collator to do digits. */
3056    ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
3057    if (U_FAILURE(status)){
3058        log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
3059              myErrorName(status));
3060        return;
3061    }
3062
3063    /*
3064       Testing that prepended zeroes still yield the correct collation behavior.
3065       We expect that every element in our strings array will be equal.
3066    */
3067    genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
3068
3069    ucol_close(coll);
3070}
3071
3072static void TestTibetanConformance(void)
3073{
3074    const char* test[] = {
3075        "\\u0FB2\\u0591\\u0F71\\u0061",
3076        "\\u0FB2\\u0F71\\u0061"
3077    };
3078
3079    UErrorCode status = U_ZERO_ERROR;
3080    UCollator *coll = ucol_open("", &status);
3081    UChar source[100];
3082    UChar target[100];
3083    int result;
3084    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3085    if (U_SUCCESS(status)) {
3086        u_unescape(test[0], source, 100);
3087        u_unescape(test[1], target, 100);
3088        doTest(coll, source, target, UCOL_EQUAL);
3089        result = ucol_strcoll(coll, source, -1,   target, -1);
3090        log_verbose("result %d\n", result);
3091        if (UCOL_EQUAL != result) {
3092            log_err("Tibetan comparison error\n");
3093        }
3094    }
3095    ucol_close(coll);
3096
3097    genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
3098}
3099
3100static void TestPinyinProblem(void) {
3101    static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
3102    genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
3103}
3104
3105/**
3106 * Iterate through the given iterator, checking to see that all the strings
3107 * in the expected array are present.
3108 * @param expected array of strings we expect to see, or NULL
3109 * @param expectedCount number of elements of expected, or 0
3110 */
3111static int32_t checkUEnumeration(const char* msg,
3112                                 UEnumeration* iter,
3113                                 const char** expected,
3114                                 int32_t expectedCount) {
3115    UErrorCode ec = U_ZERO_ERROR;
3116    int32_t i = 0, n, j, bit;
3117    int32_t seenMask = 0;
3118
3119    U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
3120    n = uenum_count(iter, &ec);
3121    if (!assertSuccess("count", &ec)) return -1;
3122    log_verbose("%s = [", msg);
3123    for (;; ++i) {
3124        const char* s = uenum_next(iter, NULL, &ec);
3125        if (!assertSuccess("snext", &ec) || s == NULL) break;
3126        if (i != 0) log_verbose(",");
3127        log_verbose("%s", s);
3128        /* check expected list */
3129        for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3130            if ((seenMask&bit) == 0 &&
3131                uprv_strcmp(s, expected[j]) == 0) {
3132                seenMask |= bit;
3133                break;
3134            }
3135        }
3136    }
3137    log_verbose("] (%d)\n", i);
3138    assertTrue("count verified", i==n);
3139    /* did we see all expected strings? */
3140    for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
3141        if ((seenMask&bit)!=0) {
3142            log_verbose("Ok: \"%s\" seen\n", expected[j]);
3143        } else {
3144            log_err("FAIL: \"%s\" not seen\n", expected[j]);
3145        }
3146    }
3147    return n;
3148}
3149
3150/**
3151 * Test new API added for separate collation tree.
3152 */
3153static void TestSeparateTrees(void) {
3154    UErrorCode ec = U_ZERO_ERROR;
3155    UEnumeration *e = NULL;
3156    int32_t n = -1;
3157    UBool isAvailable;
3158    char loc[256];
3159
3160    static const char* AVAIL[] = { "en", "de" };
3161
3162    static const char* KW[] = { "collation" };
3163
3164    static const char* KWVAL[] = { "phonebook", "stroke" };
3165
3166#if !UCONFIG_NO_SERVICE
3167    e = ucol_openAvailableLocales(&ec);
3168    if (e != NULL) {
3169        assertSuccess("ucol_openAvailableLocales", &ec);
3170        assertTrue("ucol_openAvailableLocales!=0", e!=0);
3171        n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
3172        (void)n;    /* Suppress set but not used warnings. */
3173        /* Don't need to check n because we check list */
3174        uenum_close(e);
3175    } else {
3176        log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
3177    }
3178#endif
3179
3180    e = ucol_getKeywords(&ec);
3181    if (e != NULL) {
3182        assertSuccess("ucol_getKeywords", &ec);
3183        assertTrue("ucol_getKeywords!=0", e!=0);
3184        n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
3185        /* Don't need to check n because we check list */
3186        uenum_close(e);
3187    } else {
3188        log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
3189    }
3190
3191    e = ucol_getKeywordValues(KW[0], &ec);
3192    if (e != NULL) {
3193        assertSuccess("ucol_getKeywordValues", &ec);
3194        assertTrue("ucol_getKeywordValues!=0", e!=0);
3195        n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
3196        /* Don't need to check n because we check list */
3197        uenum_close(e);
3198    } else {
3199        log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
3200    }
3201
3202    /* Try setting a warning before calling ucol_getKeywordValues */
3203    ec = U_USING_FALLBACK_WARNING;
3204    e = ucol_getKeywordValues(KW[0], &ec);
3205    if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
3206        assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
3207        n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
3208        /* Don't need to check n because we check list */
3209        uenum_close(e);
3210    }
3211
3212    /*
3213U_DRAFT int32_t U_EXPORT2
3214ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
3215                             const char* locale, UBool* isAvailable,
3216                             UErrorCode* status);
3217}
3218*/
3219    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
3220                                     &isAvailable, &ec);
3221    if (assertSuccess("getFunctionalEquivalent", &ec)) {
3222        assertEquals("getFunctionalEquivalent(de)", "root", loc);
3223        assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
3224                   isAvailable == TRUE);
3225    }
3226
3227    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
3228                                     &isAvailable, &ec);
3229    if (assertSuccess("getFunctionalEquivalent", &ec)) {
3230        assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
3231        assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
3232                   isAvailable == FALSE);
3233    }
3234}
3235
3236/* supercedes TestJ784 */
3237static void TestBeforePinyin(void) {
3238    const static char rules[] = {
3239        "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
3240        "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
3241        "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
3242        "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
3243        "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
3244        "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
3245    };
3246
3247    const static char *test[] = {
3248        "l\\u0101",
3249        "la",
3250        "l\\u0101n",
3251        "lan ",
3252        "l\\u0113",
3253        "le",
3254        "l\\u0113n",
3255        "len"
3256    };
3257
3258    const static char *test2[] = {
3259        "x\\u0101",
3260        "x\\u0100",
3261        "X\\u0101",
3262        "X\\u0100",
3263        "x\\u00E1",
3264        "x\\u00C1",
3265        "X\\u00E1",
3266        "X\\u00C1",
3267        "x\\u01CE",
3268        "x\\u01CD",
3269        "X\\u01CE",
3270        "X\\u01CD",
3271        "x\\u00E0",
3272        "x\\u00C0",
3273        "X\\u00E0",
3274        "X\\u00C0",
3275        "xa",
3276        "xA",
3277        "Xa",
3278        "XA",
3279        "x\\u0101x",
3280        "x\\u0100x",
3281        "x\\u00E1x",
3282        "x\\u00C1x",
3283        "x\\u01CEx",
3284        "x\\u01CDx",
3285        "x\\u00E0x",
3286        "x\\u00C0x",
3287        "xax",
3288        "xAx"
3289    };
3290
3291    genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
3292    genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
3293    genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
3294    genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
3295}
3296
3297static void TestBeforeTightening(void) {
3298    static const struct {
3299        const char *rules;
3300        UErrorCode expectedStatus;
3301    } tests[] = {
3302        { "&[before 1]a<x", U_ZERO_ERROR },
3303        { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
3304        { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
3305        { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
3306        { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
3307        { "&[before 2]a<<x",U_ZERO_ERROR },
3308        { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
3309        { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
3310        { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
3311        { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
3312        { "&[before 3]a<<<x",U_ZERO_ERROR },
3313        { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
3314        { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
3315    };
3316
3317    int32_t i = 0;
3318
3319    UErrorCode status = U_ZERO_ERROR;
3320    UChar rlz[RULE_BUFFER_LEN] = { 0 };
3321    uint32_t rlen = 0;
3322
3323    UCollator *coll = NULL;
3324
3325
3326    for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
3327        rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
3328        coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3329        if(status != tests[i].expectedStatus) {
3330            log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
3331                tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
3332        }
3333        ucol_close(coll);
3334        status = U_ZERO_ERROR;
3335    }
3336
3337}
3338
3339/*
3340&m < a
3341&[before 1] a < x <<< X << q <<< Q < z
3342assert: m <<< M < x <<< X << q <<< Q < z < a < n
3343
3344&m < a
3345&[before 2] a << x <<< X << q <<< Q < z
3346assert: m <<< M < x <<< X << q <<< Q << a < z < n
3347
3348&m < a
3349&[before 3] a <<< x <<< X << q <<< Q < z
3350assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
3351
3352
3353&m << a
3354&[before 1] a < x <<< X << q <<< Q < z
3355assert: x <<< X << q <<< Q < z < m <<< M << a < n
3356
3357&m << a
3358&[before 2] a << x <<< X << q <<< Q < z
3359assert: m <<< M << x <<< X << q <<< Q << a < z < n
3360
3361&m << a
3362&[before 3] a <<< x <<< X << q <<< Q < z
3363assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
3364
3365
3366&m <<< a
3367&[before 1] a < x <<< X << q <<< Q < z
3368assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
3369
3370&m <<< a
3371&[before 2] a << x <<< X << q <<< Q < z
3372assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
3373
3374&m <<< a
3375&[before 3] a <<< x <<< X << q <<< Q < z
3376assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
3377
3378
3379&[before 1] s < x <<< X << q <<< Q < z
3380assert: r <<< R < x <<< X << q <<< Q < z < s < n
3381
3382&[before 2] s << x <<< X << q <<< Q < z
3383assert: r <<< R < x <<< X << q <<< Q << s < z < n
3384
3385&[before 3] s <<< x <<< X << q <<< Q < z
3386assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
3387
3388
3389&[before 1] \u24DC < x <<< X << q <<< Q < z
3390assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
3391
3392&[before 2] \u24DC << x <<< X << q <<< Q < z
3393assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
3394
3395&[before 3] \u24DC <<< x <<< X << q <<< Q < z
3396assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
3397*/
3398
3399
3400#if 0
3401/* requires features not yet supported */
3402static void TestMoreBefore(void) {
3403    static const struct {
3404        const char* rules;
3405        const char* order[16];
3406        int32_t size;
3407    } tests[] = {
3408        { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
3409        { "m","M","x","X","q","Q","z","a","n" }, 9},
3410        { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
3411        { "m","M","x","X","q","Q","a","z","n" }, 9},
3412        { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
3413        { "m","M","x","X","a","q","Q","z","n" }, 9},
3414        { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
3415        { "x","X","q","Q","z","m","M","a","n" }, 9},
3416        { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
3417        { "m","M","x","X","q","Q","a","z","n" }, 9},
3418        { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
3419        { "m","M","x","X","a","q","Q","z","n" }, 9},
3420        { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
3421        { "x","X","q","Q","z","n","m","a","M" }, 9},
3422        { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
3423        { "x","X","q","Q","m","a","M","z","n" }, 9},
3424        { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
3425        { "m","x","X","a","M","q","Q","z","n" }, 9},
3426        { "&[before 1] s < x <<< X << q <<< Q < z",
3427        { "r","R","x","X","q","Q","z","s","n" }, 9},
3428        { "&[before 2] s << x <<< X << q <<< Q < z",
3429        { "r","R","x","X","q","Q","s","z","n" }, 9},
3430        { "&[before 3] s <<< x <<< X << q <<< Q < z",
3431        { "r","R","x","X","s","q","Q","z","n" }, 9},
3432        { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
3433        { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
3434        { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
3435        { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
3436        { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
3437        { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
3438    };
3439
3440    int32_t i = 0;
3441
3442    for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
3443        genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
3444    }
3445}
3446#endif
3447
3448static void TestTailorNULL( void ) {
3449    const static char* rule = "&a <<< '\\u0000'";
3450    UErrorCode status = U_ZERO_ERROR;
3451    UChar rlz[RULE_BUFFER_LEN] = { 0 };
3452    uint32_t rlen = 0;
3453    UChar a = 1, null = 0;
3454    UCollationResult res = UCOL_EQUAL;
3455
3456    UCollator *coll = NULL;
3457
3458
3459    rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
3460    coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
3461
3462    if(U_FAILURE(status)) {
3463        log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
3464    } else {
3465        res = ucol_strcoll(coll, &a, 1, &null, 1);
3466
3467        if(res != UCOL_LESS) {
3468            log_err("NULL was not tailored properly!\n");
3469        }
3470    }
3471
3472    ucol_close(coll);
3473}
3474
3475static void
3476TestUpperFirstQuaternary(void)
3477{
3478  const char* tests[] = { "B", "b", "Bb", "bB" };
3479  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
3480  UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
3481  genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
3482}
3483
3484static void
3485TestJ4960(void)
3486{
3487  const char* tests[] = { "\\u00e2T", "aT" };
3488  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
3489  UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
3490  const char* tests2[] = { "a", "A" };
3491  const char* rule = "&[first tertiary ignorable]=A=a";
3492  UColAttribute att2[] = { UCOL_CASE_LEVEL };
3493  UColAttributeValue attVals2[] = { UCOL_ON };
3494  /* Test whether we correctly ignore primary ignorables on case level when */
3495  /* we have only primary & case level */
3496  genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
3497  /* Test whether ICU4J will make case level for sortkeys that have primary strength */
3498  /* and case level */
3499  genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
3500  /* Test whether completely ignorable letters have case level info (they shouldn't) */
3501  genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
3502}
3503
3504static void
3505TestJ5223(void)
3506{
3507  static const char *test = "this is a test string";
3508  UChar ustr[256];
3509  int32_t ustr_length = u_unescape(test, ustr, 256);
3510  unsigned char sortkey[256];
3511  int32_t sortkey_length;
3512  UErrorCode status = U_ZERO_ERROR;
3513  static UCollator *coll = NULL;
3514  coll = ucol_open("root", &status);
3515  if(U_FAILURE(status)) {
3516    log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
3517    return;
3518  }
3519  ucol_setStrength(coll, UCOL_PRIMARY);
3520  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
3521  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3522  if (U_FAILURE(status)) {
3523    log_err("Failed setting atributes\n");
3524    return;
3525  }
3526  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
3527  if (sortkey_length > 256) return;
3528
3529  /* we mark the position where the null byte should be written in advance */
3530  sortkey[sortkey_length-1] = 0xAA;
3531
3532  /* we set the buffer size one byte higher than needed */
3533  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3534    sortkey_length+1);
3535
3536  /* no error occurs (for me) */
3537  if (sortkey[sortkey_length-1] == 0xAA) {
3538    log_err("Hit bug at first try\n");
3539  }
3540
3541  /* we mark the position where the null byte should be written again */
3542  sortkey[sortkey_length-1] = 0xAA;
3543
3544  /* this time we set the buffer size to the exact amount needed */
3545  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
3546    sortkey_length);
3547
3548  /* now the trailing null byte is not written */
3549  if (sortkey[sortkey_length-1] == 0xAA) {
3550    log_err("Hit bug at second try\n");
3551  }
3552
3553  ucol_close(coll);
3554}
3555
3556/* Regression test for Thai partial sort key problem */
3557static void
3558TestJ5232(void)
3559{
3560    const static char *test[] = {
3561        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
3562        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
3563    };
3564
3565    genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
3566}
3567
3568static void
3569TestJ5367(void)
3570{
3571    const static char *test[] = { "a", "y" };
3572    const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
3573    genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
3574}
3575
3576static void
3577TestVI5913(void)
3578{
3579    UErrorCode status = U_ZERO_ERROR;
3580    int32_t i, j;
3581    UCollator *coll =NULL;
3582    uint8_t  resColl[100], expColl[100];
3583    int32_t  rLen, tLen, ruleLen, sLen, kLen;
3584    UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &b<0x1FF3-omega with Ypogegrammeni*/
3585    UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
3586    /*
3587     * Note: Just tailoring &z<ae^ does not work as expected:
3588     * The UCA spec requires for discontiguous contractions that they
3589     * extend an *existing match* by one combining mark at a time.
3590     * Therefore, ae must be a contraction so that the builder finds
3591     * discontiguous contractions for ae^, for example with an intervening underdot.
3592     * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302, etc.
3593     */
3594    UChar rule3[256]={
3595        0x26, 0x78, 0x3c, 0x61, 0x65,      /* &x<ae */
3596        0x26, 0x7a, 0x3c, 0x0061, 0x00ea,  /* &z<a+e with circumflex.*/
3597        0};
3598    static const UChar tData[][20]={
3599        {0x1EAC, 0},
3600        {0x0041, 0x0323, 0x0302, 0},
3601        {0x1EA0, 0x0302, 0},
3602        {0x00C2, 0x0323, 0},
3603        {0x1ED8, 0},  /* O with dot and circumflex */
3604        {0x1ECC, 0x0302, 0},
3605        {0x1EB7, 0},
3606        {0x1EA1, 0x0306, 0},
3607    };
3608    static const UChar tailorData[][20]={
3609        {0x1FA2, 0},  /* Omega with 3 combining marks */
3610        {0x03C9, 0x0313, 0x0300, 0x0345, 0},
3611        {0x1FF3, 0x0313, 0x0300, 0},
3612        {0x1F60, 0x0300, 0x0345, 0},
3613        {0x1F62, 0x0345, 0},
3614        {0x1FA0, 0x0300, 0},
3615    };
3616    static const UChar tailorData2[][20]={
3617        {0x1E63, 0x030C, 0},  /* s with dot below + caron */
3618        {0x0073, 0x0323, 0x030C, 0},
3619        {0x0073, 0x030C, 0x0323, 0},
3620    };
3621    static const UChar tailorData3[][20]={
3622        {0x007a, 0},  /*  z */
3623        {0x0061, 0x0065, 0},  /*  a + e */
3624        {0x0061, 0x00ea, 0}, /* a + e with circumflex */
3625        {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
3626        {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
3627        {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
3628        {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
3629        {0x00EA, 0},  /* e with circumflex  */
3630    };
3631
3632    /* Test Vietnamese sort. */
3633    coll = ucol_open("vi", &status);
3634    if(U_FAILURE(status)) {
3635        log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
3636        return;
3637    }
3638    log_verbose("\n\nVI collation:");
3639    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
3640        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3641    }
3642    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
3643        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3644    }
3645    if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
3646        log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
3647    }
3648    if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
3649        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3650    }
3651
3652    for (j=0; j<8; j++) {
3653        tLen = u_strlen(tData[j]);
3654        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3655        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3656        for(i = 0; i<rLen; i++) {
3657            log_verbose(" %02X", resColl[i]);
3658        }
3659    }
3660
3661    ucol_close(coll);
3662
3663    /* Test Romanian sort. */
3664    coll = ucol_open("ro", &status);
3665    log_verbose("\n\nRO collation:");
3666    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
3667        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
3668    }
3669    if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
3670        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
3671    }
3672    if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
3673        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
3674    }
3675
3676    for (j=4; j<8; j++) {
3677        tLen = u_strlen(tData[j]);
3678        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
3679        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
3680        for(i = 0; i<rLen; i++) {
3681            log_verbose(" %02X", resColl[i]);
3682        }
3683    }
3684    ucol_close(coll);
3685
3686    /* Test the precomposed Greek character with 3 combining marks. */
3687    log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
3688    ruleLen = u_strlen(rule);
3689    coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3690    if (U_FAILURE(status)) {
3691        log_err("ucol_openRules failed with %s\n", u_errorName(status));
3692        return;
3693    }
3694    sLen = u_strlen(tailorData[0]);
3695    for (j=1; j<6; j++) {
3696        tLen = u_strlen(tailorData[j]);
3697        if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
3698            log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
3699        }
3700    }
3701    /* Test getSortKey. */
3702    tLen = u_strlen(tailorData[0]);
3703    kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
3704    for (j=0; j<6; j++) {
3705        tLen = u_strlen(tailorData[j]);
3706        rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
3707        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3708            log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3709            for(i = 0; i<rLen; i++) {
3710                log_err(" %02X", resColl[i]);
3711            }
3712        }
3713    }
3714    ucol_close(coll);
3715
3716    log_verbose("\n\nTailoring test for s with caron:");
3717    ruleLen = u_strlen(rule2);
3718    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3719    tLen = u_strlen(tailorData2[0]);
3720    kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
3721    for (j=1; j<3; j++) {
3722        tLen = u_strlen(tailorData2[j]);
3723        rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
3724        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3725            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
3726            for(i = 0; i<rLen; i++) {
3727                log_err(" %02X", resColl[i]);
3728            }
3729        }
3730    }
3731    ucol_close(coll);
3732
3733    log_verbose("\n\nTailoring test for &z< ae with circumflex:");
3734    ruleLen = u_strlen(rule3);
3735    coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3736    tLen = u_strlen(tailorData3[3]);
3737    kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
3738    log_verbose("\n Test Data[3] :%s  \tlen: %d key: ", aescstrdup(tailorData3[3], tLen), tLen);
3739    for(i = 0; i<kLen; i++) {
3740        log_verbose(" %02X", expColl[i]);
3741    }
3742    for (j=4; j<6; j++) {
3743        tLen = u_strlen(tailorData3[j]);
3744        rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
3745
3746        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
3747            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3748            for(i = 0; i<rLen; i++) {
3749                log_err(" %02X", resColl[i]);
3750            }
3751        }
3752
3753        log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, aescstrdup(tailorData3[j], tLen), tLen);
3754         for(i = 0; i<rLen; i++) {
3755             log_verbose(" %02X", resColl[i]);
3756         }
3757    }
3758    ucol_close(coll);
3759}
3760
3761static void
3762TestTailor6179(void)
3763{
3764    UErrorCode status = U_ZERO_ERROR;
3765    int32_t i;
3766    UCollator *coll =NULL;
3767    uint8_t  resColl[100];
3768    int32_t  rLen, tLen, ruleLen;
3769    /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
3770    static const UChar rule1[]={
3771            0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
3772            0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
3773            0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
3774            0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
3775    /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
3776    static const UChar rule2[]={
3777            0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
3778            0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
3779            0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
3780            0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
3781            0x3C,0x3C,0x20,0x62,0};
3782
3783    static const UChar tData1[][4]={
3784        {0x61, 0},
3785        {0x62, 0},
3786        { 0xFDD0,0x009E, 0}
3787    };
3788    static const UChar tData2[][4]={
3789        {0x61, 0},
3790        {0x62, 0},
3791        { 0xFDD0,0x009E, 0}
3792     };
3793
3794    /*
3795     * These values from FractionalUCA.txt will change,
3796     * and need to be updated here.
3797     * TODO: Make this not check for particular sort keys.
3798     * Instead, test that we get CEs before & after other ignorables; see ticket #6179.
3799     */
3800    static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};
3801    static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};
3802    static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};
3803    static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};
3804
3805    UParseError parseError;
3806
3807    /* Test [Last Primary ignorable] */
3808
3809    log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
3810    ruleLen = u_strlen(rule1);
3811    coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3812    if (U_FAILURE(status)) {
3813        log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
3814        return;
3815    }
3816    tLen = u_strlen(tData1[0]);
3817    rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
3818    if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
3819        log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
3820        for(i = 0; i<rLen; i++) {
3821            log_err(" %02X", resColl[i]);
3822        }
3823        log_err("\n");
3824    }
3825    tLen = u_strlen(tData1[1]);
3826    rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
3827    if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
3828        log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
3829        for(i = 0; i<rLen; i++) {
3830            log_err(" %02X", resColl[i]);
3831        }
3832        log_err("\n");
3833    }
3834    ucol_close(coll);
3835
3836
3837    /* Test [Last Secondary ignorable] */
3838    log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
3839    ruleLen = u_strlen(rule2);
3840    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);
3841    if (U_FAILURE(status)) {
3842        log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
3843        log_info("  offset=%d  \"%s\" | \"%s\"\n",
3844                 parseError.offset, aescstrdup(parseError.preContext, -1), aescstrdup(parseError.postContext, -1));
3845        return;
3846    }
3847    tLen = u_strlen(tData2[0]);
3848    rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
3849    if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
3850        log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
3851        for(i = 0; i<rLen; i++) {
3852            log_err(" %02X", resColl[i]);
3853        }
3854        log_err("\n");
3855    }
3856    tLen = u_strlen(tData2[1]);
3857    rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
3858    if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
3859      log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
3860      for(i = 0; i<rLen; i++) {
3861        log_err(" %02X", resColl[i]);
3862      }
3863      log_err("\n");
3864    }
3865    ucol_close(coll);
3866}
3867
3868static void
3869TestUCAPrecontext(void)
3870{
3871    UErrorCode status = U_ZERO_ERROR;
3872    int32_t i, j;
3873    UCollator *coll =NULL;
3874    uint8_t  resColl[100], prevColl[100];
3875    int32_t  rLen, tLen, ruleLen;
3876    UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
3877    UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
3878    /* & l middle-dot << a  a is an expansion. */
3879
3880    UChar tData1[][20]={
3881            { 0xb7, 0},  /* standalone middle dot(0xb7) */
3882            { 0x387, 0}, /* standalone middle dot(0x387) */
3883            { 0x61, 0},  /* a */
3884            { 0x6C, 0},  /* l */
3885            { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
3886            { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
3887            { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
3888            { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
3889            { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
3890            { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
3891            { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
3892     };
3893
3894    log_verbose("\n\nEN collation:");
3895    coll = ucol_open("en", &status);
3896    if (U_FAILURE(status)) {
3897        log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
3898        return;
3899    }
3900    for (j=0; j<11; j++) {
3901        tLen = u_strlen(tData1[j]);
3902        rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3903        if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3904            log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3905                    j, tData1[j]);
3906        }
3907        log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3908        for(i = 0; i<rLen; i++) {
3909            log_verbose(" %02X", resColl[i]);
3910        }
3911        uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3912     }
3913     ucol_close(coll);
3914
3915
3916     log_verbose("\n\nJA collation:");
3917     coll = ucol_open("ja", &status);
3918     if (U_FAILURE(status)) {
3919         log_err("Tailoring test: &z <<a|- failed!");
3920         return;
3921     }
3922     for (j=0; j<11; j++) {
3923         tLen = u_strlen(tData1[j]);
3924         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3925         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3926             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3927                     j, tData1[j]);
3928         }
3929         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3930         for(i = 0; i<rLen; i++) {
3931             log_verbose(" %02X", resColl[i]);
3932         }
3933         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3934      }
3935      ucol_close(coll);
3936
3937
3938      log_verbose("\n\nTailoring test: & middle dot < a ");
3939      ruleLen = u_strlen(rule1);
3940      coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3941      if (U_FAILURE(status)) {
3942          log_err("Tailoring test: & middle dot < a failed!");
3943          return;
3944      }
3945      for (j=0; j<11; j++) {
3946          tLen = u_strlen(tData1[j]);
3947          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3948          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3949              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3950                      j, tData1[j]);
3951          }
3952          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3953          for(i = 0; i<rLen; i++) {
3954              log_verbose(" %02X", resColl[i]);
3955          }
3956          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3957       }
3958       ucol_close(coll);
3959
3960
3961       log_verbose("\n\nTailoring test: & l middle-dot << a ");
3962       ruleLen = u_strlen(rule2);
3963       coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
3964       if (U_FAILURE(status)) {
3965           log_err("Tailoring test: & l middle-dot << a failed!");
3966           return;
3967       }
3968       for (j=0; j<11; j++) {
3969           tLen = u_strlen(tData1[j]);
3970           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
3971           if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
3972               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
3973                       j, tData1[j]);
3974           }
3975           if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
3976               log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
3977                       j, tData1[j]);
3978           }
3979           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
3980           for(i = 0; i<rLen; i++) {
3981               log_verbose(" %02X", resColl[i]);
3982           }
3983           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
3984        }
3985        ucol_close(coll);
3986}
3987
3988static void
3989TestOutOfBuffer5468(void)
3990{
3991    static const char *test = "\\u4e00";
3992    UChar ustr[256];
3993    int32_t ustr_length = u_unescape(test, ustr, 256);
3994    unsigned char shortKeyBuf[1];
3995    int32_t sortkey_length;
3996    UErrorCode status = U_ZERO_ERROR;
3997    static UCollator *coll = NULL;
3998
3999    coll = ucol_open("root", &status);
4000    if(U_FAILURE(status)) {
4001      log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4002      return;
4003    }
4004    ucol_setStrength(coll, UCOL_PRIMARY);
4005    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4006    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4007    if (U_FAILURE(status)) {
4008      log_err("Failed setting atributes\n");
4009      return;
4010    }
4011
4012    sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
4013    if (sortkey_length != 4) {
4014        log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
4015    }
4016    log_verbose("length of sortKey is %d", sortkey_length);
4017    ucol_close(coll);
4018}
4019
4020#define TSKC_DATA_SIZE 5
4021#define TSKC_BUF_SIZE  50
4022static void
4023TestSortKeyConsistency(void)
4024{
4025    UErrorCode icuRC = U_ZERO_ERROR;
4026    UCollator* ucol;
4027    UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
4028
4029    uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4030    uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
4031    int32_t i, j, i2;
4032
4033    ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
4034    if (U_FAILURE(icuRC))
4035    {
4036        log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
4037        return;
4038    }
4039
4040    for (i = 0; i < TSKC_DATA_SIZE; i++)
4041    {
4042        UCharIterator uiter;
4043        uint32_t state[2] = { 0, 0 };
4044        int32_t dataLen = i+1;
4045        for (j=0; j<TSKC_BUF_SIZE; j++)
4046            bufFull[i][j] = bufPart[i][j] = 0;
4047
4048        /* Full sort key */
4049        ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
4050
4051        /* Partial sort key */
4052        uiter_setString(&uiter, data, dataLen);
4053        ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
4054        if (U_FAILURE(icuRC))
4055        {
4056            log_err("ucol_nextSortKeyPart failed\n");
4057            ucol_close(ucol);
4058            return;
4059        }
4060
4061        for (i2=0; i2<i; i2++)
4062        {
4063            UBool fullMatch = TRUE;
4064            UBool partMatch = TRUE;
4065            for (j=0; j<TSKC_BUF_SIZE; j++)
4066            {
4067                fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
4068                partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
4069            }
4070            if (fullMatch != partMatch) {
4071                log_err(fullMatch ? "full key was consistent, but partial key changed\n"
4072                                  : "partial key was consistent, but full key changed\n");
4073                ucol_close(ucol);
4074                return;
4075            }
4076        }
4077    }
4078
4079    /*=============================================*/
4080   ucol_close(ucol);
4081}
4082
4083/* ticket: 6101 */
4084static void TestCroatianSortKey(void) {
4085    const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
4086    UErrorCode status = U_ZERO_ERROR;
4087    UCollator *ucol;
4088    UCharIterator iter;
4089
4090    static const UChar text[] = { 0x0044, 0xD81A };
4091
4092    size_t length = sizeof(text)/sizeof(*text);
4093
4094    uint8_t textSortKey[32];
4095    size_t lenSortKey = 32;
4096    size_t actualSortKeyLen;
4097    uint32_t uStateInfo[2] = { 0, 0 };
4098
4099    ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
4100    if (U_FAILURE(status)) {
4101        log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
4102        return;
4103    }
4104
4105    uiter_setString(&iter, text, length);
4106
4107    actualSortKeyLen = ucol_nextSortKeyPart(
4108        ucol, &iter, (uint32_t*)uStateInfo,
4109        textSortKey, lenSortKey, &status
4110        );
4111
4112    if (actualSortKeyLen == lenSortKey) {
4113        log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
4114    }
4115
4116    ucol_close(ucol);
4117}
4118
4119/* ticket: 6140 */
4120/* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
4121 * they are both Hiragana and Katakana
4122 */
4123#define SORTKEYLEN 50
4124static void TestHiragana(void) {
4125    UErrorCode status = U_ZERO_ERROR;
4126    UCollator* ucol;
4127    UCollationResult strcollresult;
4128    UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
4129    UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
4130    int32_t data1Len = sizeof(data1)/sizeof(*data1);
4131    int32_t data2Len = sizeof(data2)/sizeof(*data2);
4132    int32_t i, j;
4133    uint8_t sortKey1[SORTKEYLEN];
4134    uint8_t sortKey2[SORTKEYLEN];
4135
4136    UCharIterator uiter1;
4137    UCharIterator uiter2;
4138    uint32_t state1[2] = { 0, 0 };
4139    uint32_t state2[2] = { 0, 0 };
4140    int32_t keySize1;
4141    int32_t keySize2;
4142
4143    ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
4144            &status);
4145    if (U_FAILURE(status)) {
4146        log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
4147        return;
4148    }
4149
4150    /* Start of full sort keys */
4151    /* Full sort key1 */
4152    keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
4153    /* Full sort key2 */
4154    keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
4155    if (keySize1 == keySize2) {
4156        for (i = 0; i < keySize1; i++) {
4157            if (sortKey1[i] != sortKey2[i]) {
4158                log_err("Full sort keys are different. Should be equal.");
4159            }
4160        }
4161    } else {
4162        log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
4163    }
4164    /* End of full sort keys */
4165
4166    /* Start of partial sort keys */
4167    /* Partial sort key1 */
4168    uiter_setString(&uiter1, data1, data1Len);
4169    keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
4170    /* Partial sort key2 */
4171    uiter_setString(&uiter2, data2, data2Len);
4172    keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
4173    if (U_SUCCESS(status) && keySize1 == keySize2) {
4174        for (j = 0; j < keySize1; j++) {
4175            if (sortKey1[j] != sortKey2[j]) {
4176                log_err("Partial sort keys are different. Should be equal");
4177            }
4178        }
4179    } else {
4180        log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
4181    }
4182    /* End of partial sort keys */
4183
4184    /* Start of strcoll */
4185    /* Use ucol_strcoll() to determine ordering */
4186    strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
4187    if (strcollresult != UCOL_EQUAL) {
4188        log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
4189    }
4190
4191    ucol_close(ucol);
4192}
4193
4194/* Convenient struct for running collation tests */
4195typedef struct {
4196  const UChar source[MAX_TOKEN_LEN];  /* String on left */
4197  const UChar target[MAX_TOKEN_LEN];  /* String on right */
4198  UCollationResult result;            /* -1, 0 or +1, depending on collation */
4199} OneTestCase;
4200
4201/*
4202 * Utility function to test one collation test case.
4203 * @param testcases Array of test cases.
4204 * @param n_testcases Size of the array testcases.
4205 * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
4206 * @param n_rules Size of the array str_rules.
4207 */
4208static void doTestOneTestCase(const OneTestCase testcases[],
4209                              int n_testcases,
4210                              const char* str_rules[],
4211                              int n_rules)
4212{
4213  int rule_no, testcase_no;
4214  UChar rule[500];
4215  int32_t length = 0;
4216  UErrorCode status = U_ZERO_ERROR;
4217  UParseError parse_error;
4218  UCollator  *myCollation;
4219
4220  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4221
4222    length = u_unescape(str_rules[rule_no], rule, 500);
4223    if (length == 0) {
4224        log_err("ERROR: The rule cannot be unescaped: %s\n");
4225        return;
4226    }
4227    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4228    if(U_FAILURE(status)){
4229        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4230        log_info("  offset=%d  \"%s\" | \"%s\"\n",
4231                 parse_error.offset,
4232                 aescstrdup(parse_error.preContext, -1),
4233                 aescstrdup(parse_error.postContext, -1));
4234        return;
4235    }
4236    log_verbose("Testing the <<* syntax\n");
4237    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4238    ucol_setStrength(myCollation, UCOL_TERTIARY);
4239    for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
4240      doTest(myCollation,
4241             testcases[testcase_no].source,
4242             testcases[testcase_no].target,
4243             testcases[testcase_no].result
4244             );
4245    }
4246    ucol_close(myCollation);
4247  }
4248}
4249
4250const static OneTestCase rangeTestcases[] = {
4251  { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
4252  { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
4253  { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
4254
4255  { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
4256  { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
4257  { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
4258  { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
4259  { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
4260
4261  { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
4262  { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
4263  { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
4264  { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
4265
4266  { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
4267  { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
4268  { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
4269  { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
4270  { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
4271  { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
4272  { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
4273  { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
4274};
4275
4276static int nRangeTestcases = LEN(rangeTestcases);
4277
4278const static OneTestCase rangeTestcasesSupplemental[] = {
4279  { {0x4e00},                            {0xfffb},                          UCOL_LESS }, /* U+4E00 < U+FFFB */
4280  { {0xfffb},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFB < U+10000 */
4281  { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
4282  { {0x4e00},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4283  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4284  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
4285  { {0x4e00},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+4E00 < U+10001 */
4286};
4287
4288static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
4289
4290const static OneTestCase rangeTestcasesQwerty[] = {
4291  { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
4292  { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
4293
4294  { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
4295  { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
4296
4297  { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
4298  { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
4299
4300  { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
4301  { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
4302
4303  { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
4304    {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
4305  { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
4306    {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
4307};
4308
4309static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
4310
4311static void TestSameStrengthList(void)
4312{
4313  const char* strRules[] = {
4314    /* Normal */
4315    "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
4316
4317    /* Lists */
4318    "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
4319  };
4320  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
4321}
4322
4323static void TestSameStrengthListQuoted(void)
4324{
4325  const char* strRules[] = {
4326    /* Lists with quoted characters */
4327    "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
4328    "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
4329
4330    "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
4331    "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
4332
4333    "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
4334    "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
4335  };
4336  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
4337}
4338
4339static void TestSameStrengthListSupplemental(void)
4340{
4341  const char* strRules[] = {
4342    "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",
4343    "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
4344    "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",
4345    "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
4346  };
4347  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
4348}
4349
4350static void TestSameStrengthListQwerty(void)
4351{
4352  const char* strRules[] = {
4353    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4354    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4355    "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
4356    "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
4357    "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
4358
4359    /* Quoted characters also will work if two quoted characters are not consecutive.  */
4360    "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
4361
4362    /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
4363    /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
4364
4365 };
4366  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
4367}
4368
4369static void TestSameStrengthListQuotedQwerty(void)
4370{
4371  const char* strRules[] = {
4372    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
4373    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
4374    "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
4375
4376    /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
4377    /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
4378   };
4379  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
4380}
4381
4382static void TestSameStrengthListRanges(void)
4383{
4384  const char* strRules[] = {
4385    "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
4386  };
4387  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
4388}
4389
4390static void TestSameStrengthListSupplementalRanges(void)
4391{
4392  const char* strRules[] = {
4393    /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */
4394    "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",
4395  };
4396  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
4397}
4398
4399static void TestSpecialCharacters(void)
4400{
4401  const char* strRules[] = {
4402    /* Normal */
4403    "&';'<'+'<','<'-'<'&'<'*'",
4404
4405    /* List */
4406    "&';'<*'+,-&*'",
4407
4408    /* Range */
4409    "&';'<*'+'-'-&*'",
4410  };
4411
4412  const static OneTestCase specialCharacterStrings[] = {
4413    { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
4414    { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
4415    { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
4416    { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
4417  };
4418  doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
4419}
4420
4421static void TestPrivateUseCharacters(void)
4422{
4423  const char* strRules[] = {
4424    /* Normal */
4425    "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
4426    "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
4427  };
4428
4429  const static OneTestCase privateUseCharacterStrings[] = {
4430    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4431    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4432    { {0xe2d9}, {0xe2da}, UCOL_LESS },
4433    { {0xe2da}, {0xe2db}, UCOL_LESS },
4434    { {0xe2db}, {0xe2dc}, UCOL_LESS },
4435    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4436  };
4437  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
4438}
4439
4440static void TestPrivateUseCharactersInList(void)
4441{
4442  const char* strRules[] = {
4443    /* List */
4444    "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
4445    /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
4446    "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
4447  };
4448
4449  const static OneTestCase privateUseCharacterStrings[] = {
4450    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4451    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4452    { {0xe2d9}, {0xe2da}, UCOL_LESS },
4453    { {0xe2da}, {0xe2db}, UCOL_LESS },
4454    { {0xe2db}, {0xe2dc}, UCOL_LESS },
4455    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4456  };
4457  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
4458}
4459
4460static void TestPrivateUseCharactersInRange(void)
4461{
4462  const char* strRules[] = {
4463    /* Range */
4464    "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
4465    "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
4466    /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
4467  };
4468
4469  const static OneTestCase privateUseCharacterStrings[] = {
4470    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
4471    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
4472    { {0xe2d9}, {0xe2da}, UCOL_LESS },
4473    { {0xe2da}, {0xe2db}, UCOL_LESS },
4474    { {0xe2db}, {0xe2dc}, UCOL_LESS },
4475    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
4476  };
4477  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
4478}
4479
4480static void TestInvalidListsAndRanges(void)
4481{
4482  const char* invalidRules[] = {
4483    /* Range not in starred expression */
4484    "&\\ufffe<\\uffff-\\U00010002",
4485
4486    /* Range without start */
4487    "&a<*-c",
4488
4489    /* Range without end */
4490    "&a<*b-",
4491
4492    /* More than one hyphen */
4493    "&a<*b-g-l",
4494
4495    /* Range in the wrong order */
4496    "&a<*k-b",
4497
4498  };
4499
4500  UChar rule[500];
4501  UErrorCode status = U_ZERO_ERROR;
4502  UParseError parse_error;
4503  int n_rules = LEN(invalidRules);
4504  int rule_no;
4505  int length;
4506  UCollator  *myCollation;
4507
4508  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
4509
4510    length = u_unescape(invalidRules[rule_no], rule, 500);
4511    if (length == 0) {
4512        log_err("ERROR: The rule cannot be unescaped: %s\n");
4513        return;
4514    }
4515    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
4516    (void)myCollation;      /* Suppress set but not used warning. */
4517    if(!U_FAILURE(status)){
4518      log_err("ERROR: Could not cause a failure as expected: \n");
4519    }
4520    status = U_ZERO_ERROR;
4521  }
4522}
4523
4524/*
4525 * This test ensures that characters placed before a character in a different script have the same lead byte
4526 * in their collation key before and after script reordering.
4527 */
4528static void TestBeforeRuleWithScriptReordering(void)
4529{
4530    UParseError error;
4531    UErrorCode status = U_ZERO_ERROR;
4532    UCollator  *myCollation;
4533    char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
4534    UChar rules[500];
4535    uint32_t rulesLength = 0;
4536    int32_t reorderCodes[1] = {USCRIPT_GREEK};
4537    UCollationResult collResult;
4538
4539    uint8_t baseKey[256];
4540    uint32_t baseKeyLength;
4541    uint8_t beforeKey[256];
4542    uint32_t beforeKeyLength;
4543
4544    UChar base[] = { 0x03b1 }; /* base */
4545    int32_t baseLen = sizeof(base)/sizeof(*base);
4546
4547    UChar before[] = { 0x0e01 }; /* ko kai */
4548    int32_t beforeLen = sizeof(before)/sizeof(*before);
4549
4550    /*UChar *data[] = { before, base };
4551    genericRulesStarter(srules, data, 2);*/
4552
4553    log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
4554
4555    (void)beforeKeyLength;   /* Suppress set but not used warnings. */
4556    (void)baseKeyLength;
4557
4558    /* build collator */
4559    log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
4560
4561    rulesLength = u_unescape(srules, rules, LEN(rules));
4562    myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
4563    if(U_FAILURE(status)) {
4564        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
4565        return;
4566    }
4567
4568    /* check collation results - before rule applied but not script reordering */
4569    collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4570    if (collResult != UCOL_GREATER) {
4571        log_err("Collation result not correct before script reordering = %d\n", collResult);
4572    }
4573
4574    /* check the lead byte of the collation keys before script reordering */
4575    baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4576    beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4577    if (baseKey[0] != beforeKey[0]) {
4578      log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4579   }
4580
4581    /* reorder the scripts */
4582    ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
4583    if(U_FAILURE(status)) {
4584        log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
4585        return;
4586    }
4587
4588    /* check collation results - before rule applied and after script reordering */
4589    collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
4590    if (collResult != UCOL_GREATER) {
4591        log_err("Collation result not correct after script reordering = %d\n", collResult);
4592    }
4593
4594    /* check the lead byte of the collation keys after script reordering */
4595    ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
4596    ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
4597    if (baseKey[0] != beforeKey[0]) {
4598        log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
4599    }
4600
4601    ucol_close(myCollation);
4602}
4603
4604/*
4605 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
4606 */
4607static void TestNonLeadBytesDuringCollationReordering(void)
4608{
4609    UErrorCode status = U_ZERO_ERROR;
4610    UCollator  *myCollation;
4611    int32_t reorderCodes[1] = {USCRIPT_GREEK};
4612
4613    uint8_t baseKey[256];
4614    uint32_t baseKeyLength;
4615    uint8_t reorderKey[256];
4616    uint32_t reorderKeyLength;
4617
4618    UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
4619
4620    uint32_t i;
4621
4622
4623    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4624
4625    /* build collator tertiary */
4626    myCollation = ucol_open("", &status);
4627    ucol_setStrength(myCollation, UCOL_TERTIARY);
4628    if(U_FAILURE(status)) {
4629        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4630        return;
4631    }
4632    baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
4633
4634    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
4635    if(U_FAILURE(status)) {
4636        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4637        return;
4638    }
4639    reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
4640
4641    if (baseKeyLength != reorderKeyLength) {
4642        log_err("Key lengths not the same during reordering.\n");
4643        return;
4644    }
4645
4646    for (i = 1; i < baseKeyLength; i++) {
4647        if (baseKey[i] != reorderKey[i]) {
4648            log_err("Collation key bytes not the same at position %d.\n", i);
4649            return;
4650        }
4651    }
4652    ucol_close(myCollation);
4653
4654    /* build collator quaternary */
4655    myCollation = ucol_open("", &status);
4656    ucol_setStrength(myCollation, UCOL_QUATERNARY);
4657    if(U_FAILURE(status)) {
4658        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4659        return;
4660    }
4661    baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
4662
4663    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
4664    if(U_FAILURE(status)) {
4665        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4666        return;
4667    }
4668    reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
4669
4670    if (baseKeyLength != reorderKeyLength) {
4671        log_err("Key lengths not the same during reordering.\n");
4672        return;
4673    }
4674
4675    for (i = 1; i < baseKeyLength; i++) {
4676        if (baseKey[i] != reorderKey[i]) {
4677            log_err("Collation key bytes not the same at position %d.\n", i);
4678            return;
4679        }
4680    }
4681    ucol_close(myCollation);
4682}
4683
4684/*
4685 * Test reordering API.
4686 */
4687static void TestReorderingAPI(void)
4688{
4689    UErrorCode status = U_ZERO_ERROR;
4690    UCollator  *myCollation;
4691    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4692    int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS};
4693    int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4694    UCollationResult collResult;
4695    int32_t retrievedReorderCodesLength;
4696    int32_t retrievedReorderCodes[10];
4697    UChar greekString[] = { 0x03b1 };
4698    UChar punctuationString[] = { 0x203e };
4699    int loopIndex;
4700
4701    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4702
4703    /* build collator tertiary */
4704    myCollation = ucol_open("", &status);
4705    ucol_setStrength(myCollation, UCOL_TERTIARY);
4706    if(U_FAILURE(status)) {
4707        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4708        return;
4709    }
4710
4711    /* set the reorderding */
4712    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
4713    if (U_FAILURE(status)) {
4714        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4715        return;
4716    }
4717
4718    /* get the reordering */
4719    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4720    if (status != U_BUFFER_OVERFLOW_ERROR) {
4721        log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4722        return;
4723    }
4724    status = U_ZERO_ERROR;
4725    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
4726        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
4727        return;
4728    }
4729    /* now let's really get it */
4730    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
4731    if (U_FAILURE(status)) {
4732        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4733        return;
4734    }
4735    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
4736        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
4737        return;
4738    }
4739    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4740        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4741            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4742            return;
4743        }
4744    }
4745    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
4746    if (collResult != UCOL_LESS) {
4747        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4748        return;
4749    }
4750
4751    /* clear the reordering */
4752    ucol_setReorderCodes(myCollation, NULL, 0, &status);
4753    if (U_FAILURE(status)) {
4754        log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4755        return;
4756    }
4757
4758    /* get the reordering again */
4759    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4760    if (retrievedReorderCodesLength != 0) {
4761        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4762        return;
4763    }
4764
4765    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
4766    if (collResult != UCOL_GREATER) {
4767        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4768        return;
4769    }
4770
4771    /* test for error condition on duplicate reorder codes */
4772    ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
4773    if (!U_FAILURE(status)) {
4774        log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
4775        return;
4776    }
4777
4778    status = U_ZERO_ERROR;
4779    /* test for reorder codes after a reset code */
4780    ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status);
4781    if (!U_FAILURE(status)) {
4782        log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
4783        return;
4784    }
4785
4786    ucol_close(myCollation);
4787}
4788
4789/*
4790 * Test reordering API.
4791 */
4792static void TestReorderingAPIWithRuleCreatedCollator(void)
4793{
4794    UErrorCode status = U_ZERO_ERROR;
4795    UCollator  *myCollation;
4796    UChar rules[90];
4797    static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
4798    static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
4799    static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
4800    UCollationResult collResult;
4801    int32_t retrievedReorderCodesLength;
4802    int32_t retrievedReorderCodes[10];
4803    static const UChar greekString[] = { 0x03b1 };
4804    static const UChar punctuationString[] = { 0x203e };
4805    static const UChar hanString[] = { 0x65E5, 0x672C };
4806    int loopIndex;
4807
4808    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
4809
4810    /* build collator from rules */
4811    u_uastrcpy(rules, "[reorder Hani Grek]");
4812    myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
4813    if(U_FAILURE(status)) {
4814        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
4815        return;
4816    }
4817
4818    /* get the reordering */
4819    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
4820    if (U_FAILURE(status)) {
4821        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4822        return;
4823    }
4824    if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
4825        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
4826        return;
4827    }
4828    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4829        if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4830            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4831            return;
4832        }
4833    }
4834    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString));
4835    if (collResult != UCOL_GREATER) {
4836        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4837        return;
4838    }
4839
4840    /* set the reordering */
4841    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
4842    if (U_FAILURE(status)) {
4843        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
4844        return;
4845    }
4846
4847    /* get the reordering */
4848    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4849    if (status != U_BUFFER_OVERFLOW_ERROR) {
4850        log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
4851        return;
4852    }
4853    status = U_ZERO_ERROR;
4854    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
4855        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
4856        return;
4857    }
4858    /* now let's really get it */
4859    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
4860    if (U_FAILURE(status)) {
4861        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4862        return;
4863    }
4864    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
4865        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
4866        return;
4867    }
4868    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4869        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
4870            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4871            return;
4872        }
4873    }
4874    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
4875    if (collResult != UCOL_LESS) {
4876        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
4877        return;
4878    }
4879
4880    /* clear the reordering */
4881    ucol_setReorderCodes(myCollation, NULL, 0, &status);
4882    if (U_FAILURE(status)) {
4883        log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
4884        return;
4885    }
4886
4887    /* get the reordering again */
4888    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
4889    if (retrievedReorderCodesLength != 0) {
4890        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
4891        return;
4892    }
4893
4894    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
4895    if (collResult != UCOL_GREATER) {
4896        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
4897        return;
4898    }
4899
4900    /* reset the reordering */
4901    ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
4902    if (U_FAILURE(status)) {
4903        log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
4904        return;
4905    }
4906    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
4907    if (U_FAILURE(status)) {
4908        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
4909        return;
4910    }
4911    if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
4912        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
4913        return;
4914    }
4915    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
4916        if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
4917            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
4918            return;
4919        }
4920    }
4921
4922    ucol_close(myCollation);
4923}
4924
4925static int compareUScriptCodes(const void * a, const void * b)
4926{
4927  return ( *(int32_t*)a - *(int32_t*)b );
4928}
4929
4930static void TestEquivalentReorderingScripts(void) {
4931    UErrorCode status = U_ZERO_ERROR;
4932    int32_t equivalentScripts[50];
4933    int32_t equivalentScriptsLength;
4934    int loopIndex;
4935    int32_t equivalentScriptsResult[] = {
4936        USCRIPT_BOPOMOFO,
4937        USCRIPT_LISU,
4938        USCRIPT_LYCIAN,
4939        USCRIPT_CARIAN,
4940        USCRIPT_LYDIAN,
4941        USCRIPT_YI,
4942        USCRIPT_OLD_ITALIC,
4943        USCRIPT_GOTHIC,
4944        USCRIPT_DESERET,
4945        USCRIPT_SHAVIAN,
4946        USCRIPT_OSMANYA,
4947        USCRIPT_LINEAR_B,
4948        USCRIPT_CYPRIOT,
4949        USCRIPT_OLD_SOUTH_ARABIAN,
4950        USCRIPT_AVESTAN,
4951        USCRIPT_IMPERIAL_ARAMAIC,
4952        USCRIPT_INSCRIPTIONAL_PARTHIAN,
4953        USCRIPT_INSCRIPTIONAL_PAHLAVI,
4954        USCRIPT_UGARITIC,
4955        USCRIPT_OLD_PERSIAN,
4956        USCRIPT_CUNEIFORM,
4957        USCRIPT_EGYPTIAN_HIEROGLYPHS,
4958        USCRIPT_PHONETIC_POLLARD,
4959        USCRIPT_SORA_SOMPENG,
4960        USCRIPT_MEROITIC_CURSIVE,
4961        USCRIPT_MEROITIC_HIEROGLYPHS
4962    };
4963
4964    qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes);
4965
4966    /* UScript.GOTHIC */
4967    equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
4968    if (U_FAILURE(status)) {
4969        log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4970        return;
4971    }
4972    /*
4973    fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
4974    fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength);
4975    for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
4976        fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
4977    }
4978    */
4979    if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
4980        log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
4981        return;
4982    }
4983    for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
4984        if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
4985            log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
4986            return;
4987        }
4988    }
4989
4990    /* UScript.SHAVIAN */
4991    equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status);
4992    if (U_FAILURE(status)) {
4993        log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
4994        return;
4995    }
4996    if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
4997        log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
4998        return;
4999    }
5000    for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
5001        if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
5002            log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
5003            return;
5004        }
5005    }
5006}
5007
5008static void TestReorderingAcrossCloning(void)
5009{
5010    UErrorCode status = U_ZERO_ERROR;
5011    UCollator  *myCollation;
5012    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5013    UCollator *clonedCollation;
5014    int32_t retrievedReorderCodesLength;
5015    int32_t retrievedReorderCodes[10];
5016    int loopIndex;
5017
5018    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5019
5020    /* build collator tertiary */
5021    myCollation = ucol_open("", &status);
5022    ucol_setStrength(myCollation, UCOL_TERTIARY);
5023    if(U_FAILURE(status)) {
5024        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5025        return;
5026    }
5027
5028    /* set the reorderding */
5029    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5030    if (U_FAILURE(status)) {
5031        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5032        return;
5033    }
5034
5035    /* clone the collator */
5036    clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
5037    if (U_FAILURE(status)) {
5038        log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
5039        return;
5040    }
5041
5042    /* get the reordering */
5043    retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
5044    if (U_FAILURE(status)) {
5045        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
5046        return;
5047    }
5048    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
5049        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
5050        return;
5051    }
5052    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
5053        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
5054            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
5055            return;
5056        }
5057    }
5058
5059    /*uprv_free(buffer);*/
5060    ucol_close(myCollation);
5061    ucol_close(clonedCollation);
5062}
5063
5064/*
5065 * Utility function to test one collation reordering test case set.
5066 * @param testcases Array of test cases.
5067 * @param n_testcases Size of the array testcases.
5068 * @param reorderTokens Array of reordering codes.
5069 * @param reorderTokensLen Size of the array reorderTokens.
5070 */
5071static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
5072{
5073    uint32_t testCaseNum;
5074    UErrorCode status = U_ZERO_ERROR;
5075    UCollator  *myCollation;
5076
5077    myCollation = ucol_open("", &status);
5078    if (U_FAILURE(status)) {
5079        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5080        return;
5081    }
5082    ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
5083    if(U_FAILURE(status)) {
5084        log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5085        return;
5086    }
5087
5088    for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
5089        doTest(myCollation,
5090            testCases[testCaseNum].source,
5091            testCases[testCaseNum].target,
5092            testCases[testCaseNum].result
5093        );
5094    }
5095    ucol_close(myCollation);
5096}
5097
5098static void TestGreekFirstReorder(void)
5099{
5100    const char* strRules[] = {
5101        "[reorder Grek]"
5102    };
5103
5104    const int32_t apiRules[] = {
5105        USCRIPT_GREEK
5106    };
5107
5108    const static OneTestCase privateUseCharacterStrings[] = {
5109        { {0x0391}, {0x0391}, UCOL_EQUAL },
5110        { {0x0041}, {0x0391}, UCOL_GREATER },
5111        { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
5112        { {0x0060}, {0x0391}, UCOL_LESS },
5113        { {0x0391}, {0xe2dc}, UCOL_LESS },
5114        { {0x0391}, {0x0060}, UCOL_GREATER },
5115    };
5116
5117    /* Test rules creation */
5118    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5119
5120    /* Test collation reordering API */
5121    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
5122}
5123
5124static void TestGreekLastReorder(void)
5125{
5126    const char* strRules[] = {
5127        "[reorder Zzzz Grek]"
5128    };
5129
5130    const int32_t apiRules[] = {
5131        USCRIPT_UNKNOWN, USCRIPT_GREEK
5132    };
5133
5134    const static OneTestCase privateUseCharacterStrings[] = {
5135        { {0x0391}, {0x0391}, UCOL_EQUAL },
5136        { {0x0041}, {0x0391}, UCOL_LESS },
5137        { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
5138        { {0x0060}, {0x0391}, UCOL_LESS },
5139        { {0x0391}, {0xe2dc}, UCOL_GREATER },
5140    };
5141
5142    /* Test rules creation */
5143    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5144
5145    /* Test collation reordering API */
5146    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
5147}
5148
5149static void TestNonScriptReorder(void)
5150{
5151    const char* strRules[] = {
5152        "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
5153    };
5154
5155    const int32_t apiRules[] = {
5156        USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
5157        UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
5158        UCOL_REORDER_CODE_CURRENCY
5159    };
5160
5161    const static OneTestCase privateUseCharacterStrings[] = {
5162        { {0x0391}, {0x0041}, UCOL_LESS },
5163        { {0x0041}, {0x0391}, UCOL_GREATER },
5164        { {0x0060}, {0x0041}, UCOL_LESS },
5165        { {0x0060}, {0x0391}, UCOL_GREATER },
5166        { {0x0024}, {0x0041}, UCOL_GREATER },
5167    };
5168
5169    /* Test rules creation */
5170    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5171
5172    /* Test collation reordering API */
5173    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
5174}
5175
5176static void TestHaniReorder(void)
5177{
5178    const char* strRules[] = {
5179        "[reorder Hani]"
5180    };
5181    const int32_t apiRules[] = {
5182        USCRIPT_HAN
5183    };
5184
5185    const static OneTestCase privateUseCharacterStrings[] = {
5186        { {0x4e00}, {0x0041}, UCOL_LESS },
5187        { {0x4e00}, {0x0060}, UCOL_GREATER },
5188        { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5189        { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5190        { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5191        { {0xfa27}, {0x0041}, UCOL_LESS },
5192        { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5193    };
5194
5195    /* Test rules creation */
5196    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5197
5198    /* Test collation reordering API */
5199    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
5200}
5201
5202static void TestHaniReorderWithOtherRules(void)
5203{
5204    const char* strRules[] = {
5205        "[reorder Hani] &b<a"
5206    };
5207    /*const int32_t apiRules[] = {
5208        USCRIPT_HAN
5209    };*/
5210
5211    const static OneTestCase privateUseCharacterStrings[] = {
5212        { {0x4e00}, {0x0041}, UCOL_LESS },
5213        { {0x4e00}, {0x0060}, UCOL_GREATER },
5214        { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
5215        { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
5216        { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
5217        { {0xfa27}, {0x0041}, UCOL_LESS },
5218        { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
5219        { {0x0062}, {0x0061}, UCOL_LESS },
5220    };
5221
5222    /* Test rules creation */
5223    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5224}
5225
5226static void TestMultipleReorder(void)
5227{
5228    const char* strRules[] = {
5229        "[reorder Grek Zzzz DIGIT Latn Hani]"
5230    };
5231
5232    const int32_t apiRules[] = {
5233        USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
5234    };
5235
5236    const static OneTestCase collationTestCases[] = {
5237        { {0x0391}, {0x0041}, UCOL_LESS},
5238        { {0x0031}, {0x0041}, UCOL_LESS},
5239        { {0x0041}, {0x4e00}, UCOL_LESS},
5240    };
5241
5242    /* Test rules creation */
5243    doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
5244
5245    /* Test collation reordering API */
5246    doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
5247}
5248
5249/*
5250 * Test that covers issue reported in ticket 8814
5251 */
5252static void TestReorderWithNumericCollation(void)
5253{
5254    UErrorCode status = U_ZERO_ERROR;
5255    UCollator  *myCollation;
5256    UCollator  *myReorderCollation;
5257    int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
5258    /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
5259    UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
5260    UChar fortyS[] = { 0x0053 };
5261    UChar fortyThreeP[] = { 0x0050 };
5262    uint8_t fortyS_sortKey[128];
5263    int32_t fortyS_sortKey_Length;
5264    uint8_t fortyThreeP_sortKey[128];
5265    int32_t fortyThreeP_sortKey_Length;
5266    uint8_t fortyS_sortKey_reorder[128];
5267    int32_t fortyS_sortKey_reorder_Length;
5268    uint8_t fortyThreeP_sortKey_reorder[128];
5269    int32_t fortyThreeP_sortKey_reorder_Length;
5270    UCollationResult collResult;
5271    UCollationResult collResultReorder;
5272
5273    log_verbose("Testing reordering with and without numeric collation\n");
5274
5275    /* build collator tertiary with numeric */
5276    myCollation = ucol_open("", &status);
5277    /*
5278    ucol_setStrength(myCollation, UCOL_TERTIARY);
5279    */
5280    ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5281    if(U_FAILURE(status)) {
5282        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5283        return;
5284    }
5285
5286    /* build collator tertiary with numeric and reordering */
5287    myReorderCollation = ucol_open("", &status);
5288    /*
5289    ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
5290    */
5291    ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
5292    ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status);
5293    if(U_FAILURE(status)) {
5294        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5295        return;
5296    }
5297
5298    fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128);
5299    fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128);
5300    fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128);
5301    fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
5302
5303    if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
5304        log_err_status(status, "ERROR: couldn't generate sort keys\n");
5305        return;
5306    }
5307    collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
5308    collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
5309    /*
5310    fprintf(stderr, "\tcollResult = %x\n", collResult);
5311    fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
5312    fprintf(stderr, "\nfortyS\n");
5313    for (i = 0; i < fortyS_sortKey_Length; i++) {
5314        fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
5315    }
5316    fprintf(stderr, "\nfortyThreeP\n");
5317    for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
5318        fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
5319    }
5320    */
5321    if (collResult != collResultReorder) {
5322        log_err_status(status, "ERROR: collation results should have been the same.\n");
5323        return;
5324    }
5325
5326    ucol_close(myCollation);
5327    ucol_close(myReorderCollation);
5328}
5329
5330static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
5331{
5332  for (; *a == *b; ++a, ++b) {
5333    if (*a == 0) {
5334      return 0;
5335    }
5336  }
5337  return (*a < *b ? -1 : 1);
5338}
5339
5340static void TestImportRulesDeWithPhonebook(void)
5341{
5342  const char* normalRules[] = {
5343    "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
5344    "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
5345    "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
5346  };
5347  const OneTestCase normalTests[] = {
5348    { {0x00e6}, {0x00c6}, UCOL_LESS},
5349    { {0x00fc}, {0x00dc}, UCOL_GREATER},
5350  };
5351
5352  const char* importRules[] = {
5353    "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
5354    "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5355    "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
5356  };
5357  const OneTestCase importTests[] = {
5358    { {0x00e6}, {0x00c6}, UCOL_LESS},
5359    { {0x00fc}, {0x00dc}, UCOL_LESS},
5360  };
5361
5362  doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules));
5363  doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules));
5364}
5365
5366#if 0
5367static void TestImportRulesFiWithEor(void)
5368{
5369  /* DUCET. */
5370  const char* defaultRules[] = {
5371    "&a<b",                                    /* Dummy rule. */
5372  };
5373
5374  const OneTestCase defaultTests[] = {
5375    { {0x0110}, {0x00F0}, UCOL_LESS},
5376    { {0x00a3}, {0x00a5}, UCOL_LESS},
5377    { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5378  };
5379
5380  /* European Ordering rules: ignore currency characters. */
5381  const char* eorRules[] = {
5382    "[import root-u-co-eor]",
5383  };
5384
5385  const OneTestCase eorTests[] = {
5386    { {0x0110}, {0x00F0}, UCOL_LESS},
5387    { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5388    { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5389  };
5390
5391  const char* fiStdRules[] = {
5392    "[import fi-u-co-standard]",
5393  };
5394
5395  const OneTestCase fiStdTests[] = {
5396    { {0x0110}, {0x00F0}, UCOL_GREATER},
5397    { {0x00a3}, {0x00a5}, UCOL_LESS},
5398    { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
5399  };
5400
5401  /* Both European Ordering Rules and Fi Standard Rules. */
5402  const char* eorFiStdRules[] = {
5403    "[import root-u-co-eor][import fi-u-co-standard]",
5404  };
5405
5406  /* This is essentially same as the one before once fi.txt is updated with import. */
5407  const char* fiEorRules[] = {
5408    "[import fi-u-co-eor]",
5409  };
5410
5411  const OneTestCase fiEorTests[] = {
5412    { {0x0110}, {0x00F0}, UCOL_GREATER},
5413    { {0x00a3}, {0x00a5}, UCOL_EQUAL},
5414    { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
5415  };
5416
5417  doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
5418  doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules));
5419  doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules));
5420  doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules));
5421
5422  log_knownIssue("8962", NULL);
5423  /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
5424        eor{
5425            Sequence{
5426                "[import root-u-co-eor][import fi-u-co-standard]"
5427            }
5428            Version{"21.0"}
5429        }
5430  */
5431  /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
5432
5433}
5434#endif
5435
5436#if 0
5437/*
5438 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
5439 * the resource files are built with -includeUnihanColl option.
5440 * TODO: Uncomment this function and make it work when unihan rules are built by default.
5441 */
5442static void TestImportRulesCJKWithUnihan(void)
5443{
5444  /* DUCET. */
5445  const char* defaultRules[] = {
5446    "&a<b",                                    /* Dummy rule. */
5447  };
5448
5449  const OneTestCase defaultTests[] = {
5450    { {0x3402}, {0x4e1e}, UCOL_GREATER},
5451  };
5452
5453  /* European Ordering rules: ignore currency characters. */
5454  const char* unihanRules[] = {
5455    "[import ko-u-co-unihan]",
5456  };
5457
5458  const OneTestCase unihanTests[] = {
5459    { {0x3402}, {0x4e1e}, UCOL_LESS},
5460  };
5461
5462  doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
5463  doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules));
5464
5465}
5466#endif
5467
5468static void TestImport(void)
5469{
5470    UCollator* vicoll;
5471    UCollator* escoll;
5472    UCollator* viescoll;
5473    UCollator* importviescoll;
5474    UParseError error;
5475    UErrorCode status = U_ZERO_ERROR;
5476    UChar* virules;
5477    int32_t viruleslength;
5478    UChar* esrules;
5479    int32_t esruleslength;
5480    UChar* viesrules;
5481    int32_t viesruleslength;
5482    char srules[500] = "[import vi][import es]";
5483    UChar rules[500];
5484    uint32_t length = 0;
5485    int32_t itemCount;
5486    int32_t i, k;
5487    UChar32 start;
5488    UChar32 end;
5489    UChar str[500];
5490    int32_t strLength;
5491
5492    uint8_t sk1[500];
5493    uint8_t sk2[500];
5494
5495    UBool b;
5496    USet* tailoredSet;
5497    USet* importTailoredSet;
5498
5499
5500    vicoll = ucol_open("vi", &status);
5501    if(U_FAILURE(status)){
5502        log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
5503        return;
5504    }
5505
5506    virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
5507    if(viruleslength == 0) {
5508        log_data_err("missing vi tailoring rule string\n");
5509        ucol_close(vicoll);
5510        return;
5511    }
5512    escoll = ucol_open("es", &status);
5513    esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
5514    viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
5515    viesrules[0] = 0;
5516    u_strcat(viesrules, virules);
5517    u_strcat(viesrules, esrules);
5518    viesruleslength = viruleslength + esruleslength;
5519    viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5520
5521    /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5522    length = u_unescape(srules, rules, 500);
5523    importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5524    if(U_FAILURE(status)){
5525        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5526        return;
5527    }
5528
5529    tailoredSet = ucol_getTailoredSet(viescoll, &status);
5530    importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
5531
5532    if(!uset_equals(tailoredSet, importTailoredSet)){
5533        log_err("Tailored sets not equal");
5534    }
5535
5536    uset_close(importTailoredSet);
5537
5538    itemCount = uset_getItemCount(tailoredSet);
5539
5540    for( i = 0; i < itemCount; i++){
5541        strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5542        if(strLength < 2){
5543            for (; start <= end; start++){
5544                k = 0;
5545                U16_APPEND(str, k, 500, start, b);
5546                (void)b;    /* Suppress set but not used warning. */
5547                ucol_getSortKey(viescoll, str, 1, sk1, 500);
5548                ucol_getSortKey(importviescoll, str, 1, sk2, 500);
5549                if(compare_uint8_t_arrays(sk1, sk2) != 0){
5550                    log_err("Sort key for %s not equal\n", str);
5551                    break;
5552                }
5553            }
5554        }else{
5555            ucol_getSortKey(viescoll, str, strLength, sk1, 500);
5556            ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
5557            if(compare_uint8_t_arrays(sk1, sk2) != 0){
5558                log_err("ZZSort key for %s not equal\n", str);
5559                break;
5560            }
5561
5562        }
5563    }
5564
5565    uset_close(tailoredSet);
5566
5567    uprv_free(viesrules);
5568
5569    ucol_close(vicoll);
5570    ucol_close(escoll);
5571    ucol_close(viescoll);
5572    ucol_close(importviescoll);
5573}
5574
5575static void TestImportWithType(void)
5576{
5577    UCollator* vicoll;
5578    UCollator* decoll;
5579    UCollator* videcoll;
5580    UCollator* importvidecoll;
5581    UParseError error;
5582    UErrorCode status = U_ZERO_ERROR;
5583    const UChar* virules;
5584    int32_t viruleslength;
5585    const UChar* derules;
5586    int32_t deruleslength;
5587    UChar* viderules;
5588    int32_t videruleslength;
5589    const char srules[500] = "[import vi][import de-u-co-phonebk]";
5590    UChar rules[500];
5591    uint32_t length = 0;
5592    int32_t itemCount;
5593    int32_t i, k;
5594    UChar32 start;
5595    UChar32 end;
5596    UChar str[500];
5597    int32_t strLength;
5598
5599    uint8_t sk1[500];
5600    uint8_t sk2[500];
5601
5602    USet* tailoredSet;
5603    USet* importTailoredSet;
5604
5605    vicoll = ucol_open("vi", &status);
5606    if(U_FAILURE(status)){
5607        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5608        return;
5609    }
5610    virules = ucol_getRules(vicoll, &viruleslength);
5611    if(viruleslength == 0) {
5612        log_data_err("missing vi tailoring rule string\n");
5613        ucol_close(vicoll);
5614        return;
5615    }
5616    /* decoll = ucol_open("de@collation=phonebook", &status); */
5617    decoll = ucol_open("de-u-co-phonebk", &status);
5618    if(U_FAILURE(status)){
5619        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5620        return;
5621    }
5622
5623
5624    derules = ucol_getRules(decoll, &deruleslength);
5625    viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
5626    viderules[0] = 0;
5627    u_strcat(viderules, virules);
5628    u_strcat(viderules, derules);
5629    videruleslength = viruleslength + deruleslength;
5630    videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5631
5632    /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
5633    length = u_unescape(srules, rules, 500);
5634    importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
5635    if(U_FAILURE(status)){
5636        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5637        return;
5638    }
5639
5640    tailoredSet = ucol_getTailoredSet(videcoll, &status);
5641    importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
5642
5643    if(!uset_equals(tailoredSet, importTailoredSet)){
5644        log_err("Tailored sets not equal");
5645    }
5646
5647    uset_close(importTailoredSet);
5648
5649    itemCount = uset_getItemCount(tailoredSet);
5650
5651    for( i = 0; i < itemCount; i++){
5652        strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
5653        if(strLength < 2){
5654            for (; start <= end; start++){
5655                k = 0;
5656                U16_APPEND_UNSAFE(str, k, start);
5657                ucol_getSortKey(videcoll, str, 1, sk1, 500);
5658                ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
5659                if(compare_uint8_t_arrays(sk1, sk2) != 0){
5660                    log_err("Sort key for %s not equal\n", str);
5661                    break;
5662                }
5663            }
5664        }else{
5665            ucol_getSortKey(videcoll, str, strLength, sk1, 500);
5666            ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
5667            if(compare_uint8_t_arrays(sk1, sk2) != 0){
5668                log_err("Sort key for %s not equal\n", str);
5669                break;
5670            }
5671
5672        }
5673    }
5674
5675    uset_close(tailoredSet);
5676
5677    uprv_free(viderules);
5678
5679    ucol_close(videcoll);
5680    ucol_close(importvidecoll);
5681    ucol_close(vicoll);
5682    ucol_close(decoll);
5683}
5684
5685/* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
5686static const UChar longUpperStr1[]= { /* 155 chars */
5687    0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
5688    0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
5689    0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
5690    0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
5691    0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
5692    0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
5693    0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
5694    0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
5695    0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
5696    0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
5697};
5698
5699/* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
5700static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
5701    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5702    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5703    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5704    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
5705    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
5706};
5707
5708/* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
5709static const UChar longUpperStr3[]= { /* 324 chars */
5710    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5711    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5712    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5713    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5714    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5715    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5716    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5717    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5718    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5719    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5720    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
5721    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
5722};
5723
5724#define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
5725
5726typedef struct {
5727    const UChar * longUpperStrPtr;
5728    int32_t       longUpperStrLen;
5729} LongUpperStrItem;
5730
5731/* String pointers must be in reverse collation order of the corresponding strings */
5732static const LongUpperStrItem longUpperStrItems[] = {
5733    { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },
5734    { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },
5735    { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },
5736    { NULL,          0                           }
5737};
5738
5739enum { kCollKeyLenMax = 850 }; /* may change with collation changes */
5740
5741/* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
5742static void TestCaseLevelBufferOverflow(void)
5743{
5744    UErrorCode status = U_ZERO_ERROR;
5745    UCollator * ucol = ucol_open("root", &status);
5746    if ( U_SUCCESS(status) ) {
5747        ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
5748        if ( U_SUCCESS(status) ) {
5749            const LongUpperStrItem * itemPtr;
5750            uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
5751            for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
5752                int32_t sortKeyLen;
5753                if (itemPtr > longUpperStrItems) {
5754                    uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
5755                }
5756                sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
5757                if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
5758                    log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
5759                    break;
5760                }
5761                if ( itemPtr > longUpperStrItems ) {
5762                    int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
5763                    if (compareResult >= 0) {
5764                        log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
5765                    }
5766                }
5767            }
5768        } else {
5769            log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
5770        }
5771        ucol_close(ucol);
5772    } else {
5773        log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
5774    }
5775}
5776
5777/* Test for #10595 */
5778static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */
5779#define KEY_PART_SIZE 16
5780
5781static void TestNextSortKeyPartJaIdentical(void)
5782{
5783    UErrorCode status = U_ZERO_ERROR;
5784    UCollator *coll;
5785    uint8_t keyPart[KEY_PART_SIZE];
5786    UCharIterator iter;
5787    uint32_t state[2] = {0, 0};
5788    int32_t keyPartLen;
5789
5790    coll = ucol_open("ja", &status);
5791    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
5792    if (U_FAILURE(status)) {
5793        log_err_status(status, "ERROR: in creation of Japanese collator with identical strength: %s\n", myErrorName(status));
5794        return;
5795    }
5796
5797    uiter_setString(&iter, testJapaneseName, 5);
5798    keyPartLen = KEY_PART_SIZE;
5799    while (keyPartLen == KEY_PART_SIZE) {
5800        keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_SIZE, &status);
5801        if (U_FAILURE(status)) {
5802            log_err_status(status, "ERROR: in iterating next sort key part: %s\n", myErrorName(status));
5803            break;
5804        }
5805    }
5806
5807    ucol_close(coll);
5808}
5809
5810#define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
5811
5812void addMiscCollTest(TestNode** root)
5813{
5814    TEST(TestRuleOptions);
5815    TEST(TestBeforePrefixFailure);
5816    TEST(TestContractionClosure);
5817    TEST(TestPrefixCompose);
5818    TEST(TestStrCollIdenticalPrefix);
5819    TEST(TestPrefix);
5820    TEST(TestNewJapanese);
5821    /*TEST(TestLimitations);*/
5822    TEST(TestNonChars);
5823    TEST(TestExtremeCompression);
5824    TEST(TestSurrogates);
5825    TEST(TestVariableTopSetting);
5826    TEST(TestMaxVariable);
5827    TEST(TestBocsuCoverage);
5828    TEST(TestCyrillicTailoring);
5829    TEST(TestCase);
5830    TEST(IncompleteCntTest);
5831    TEST(BlackBirdTest);
5832    TEST(FunkyATest);
5833    TEST(BillFairmanTest);
5834    TEST(TestChMove);
5835    TEST(TestImplicitTailoring);
5836    TEST(TestFCDProblem);
5837    TEST(TestEmptyRule);
5838    /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
5839    TEST(TestJ815);
5840    /*TEST(TestJ831);*/ /* we changed lv locale */
5841    TEST(TestBefore);
5842    TEST(TestHangulTailoring);
5843    TEST(TestUCARules);
5844    TEST(TestIncrementalNormalize);
5845    TEST(TestComposeDecompose);
5846    TEST(TestCompressOverlap);
5847    TEST(TestContraction);
5848    TEST(TestExpansion);
5849    /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
5850    /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
5851    TEST(TestOptimize);
5852    TEST(TestSuppressContractions);
5853    TEST(Alexis2);
5854    TEST(TestHebrewUCA);
5855    TEST(TestPartialSortKeyTermination);
5856    TEST(TestSettings);
5857    TEST(TestEquals);
5858    TEST(TestJ2726);
5859    TEST(NullRule);
5860    TEST(TestNumericCollation);
5861    TEST(TestTibetanConformance);
5862    TEST(TestPinyinProblem);
5863    TEST(TestSeparateTrees);
5864    TEST(TestBeforePinyin);
5865    TEST(TestBeforeTightening);
5866    /*TEST(TestMoreBefore);*/
5867    TEST(TestTailorNULL);
5868    TEST(TestUpperFirstQuaternary);
5869    TEST(TestJ4960);
5870    TEST(TestJ5223);
5871    TEST(TestJ5232);
5872    TEST(TestJ5367);
5873    TEST(TestHiragana);
5874    TEST(TestSortKeyConsistency);
5875    TEST(TestVI5913);  /* VI, RO tailored rules */
5876    TEST(TestCroatianSortKey);
5877    TEST(TestTailor6179);
5878    TEST(TestUCAPrecontext);
5879    TEST(TestOutOfBuffer5468);
5880    TEST(TestSameStrengthList);
5881
5882    TEST(TestSameStrengthListQuoted);
5883    TEST(TestSameStrengthListSupplemental);
5884    TEST(TestSameStrengthListQwerty);
5885    TEST(TestSameStrengthListQuotedQwerty);
5886    TEST(TestSameStrengthListRanges);
5887    TEST(TestSameStrengthListSupplementalRanges);
5888    TEST(TestSpecialCharacters);
5889    TEST(TestPrivateUseCharacters);
5890    TEST(TestPrivateUseCharactersInList);
5891    TEST(TestPrivateUseCharactersInRange);
5892    TEST(TestInvalidListsAndRanges);
5893    TEST(TestImportRulesDeWithPhonebook);
5894    /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
5895    /* TEST(TestImportRulesCJKWithUnihan); */
5896    TEST(TestImport);
5897    TEST(TestImportWithType);
5898
5899    TEST(TestBeforeRuleWithScriptReordering);
5900    TEST(TestNonLeadBytesDuringCollationReordering);
5901    TEST(TestReorderingAPI);
5902    TEST(TestReorderingAPIWithRuleCreatedCollator);
5903    TEST(TestEquivalentReorderingScripts);
5904    TEST(TestGreekFirstReorder);
5905    TEST(TestGreekLastReorder);
5906    TEST(TestNonScriptReorder);
5907    TEST(TestHaniReorder);
5908    TEST(TestHaniReorderWithOtherRules);
5909    TEST(TestMultipleReorder);
5910    TEST(TestReorderingAcrossCloning);
5911    TEST(TestReorderWithNumericCollation);
5912
5913    TEST(TestCaseLevelBufferOverflow);
5914    TEST(TestNextSortKeyPartJaIdentical);
5915}
5916
5917#endif /* #if !UCONFIG_NO_COLLATION */
5918