1
2/********************************************************************
3 * COPYRIGHT:
4 * Copyright (c) 2001-2013, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************/
7/*******************************************************************************
8*
9* File cmsccoll.C
10*
11*******************************************************************************/
12/**
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where
14 * to fit.
15 */
16
17#include <stdio.h>
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_COLLATION
22
23#include "unicode/ucol.h"
24#include "unicode/ucoleitr.h"
25#include "unicode/uloc.h"
26#include "cintltst.h"
27#include "ccolltst.h"
28#include "callcoll.h"
29#include "unicode/ustring.h"
30#include "string.h"
31#include "ucol_imp.h"
32#include "ucol_tok.h"
33#include "cmemory.h"
34#include "cstring.h"
35#include "uassert.h"
36#include "unicode/parseerr.h"
37#include "unicode/ucnv.h"
38#include "unicode/ures.h"
39#include "unicode/uscript.h"
40#include "unicode/utf16.h"
41#include "uparse.h"
42#include "putilimp.h"
43
44
45#define LEN(a) (sizeof(a)/sizeof(a[0]))
46
47#define MAX_TOKEN_LEN 16
48
49typedef UCollationResult tst_strcoll(void *collator, const int object,
50                        const UChar *source, const int sLen,
51                        const UChar *target, const int tLen);
52
53
54
55const static char cnt1[][10] = {
56
57  "AA",
58  "AC",
59  "AZ",
60  "AQ",
61  "AB",
62  "ABZ",
63  "ABQ",
64  "Z",
65  "ABC",
66  "Q",
67  "B"
68};
69
70const static char cnt2[][10] = {
71  "DA",
72  "DAD",
73  "DAZ",
74  "MAR",
75  "Z",
76  "DAVIS",
77  "MARK",
78  "DAV",
79  "DAVI"
80};
81
82static void IncompleteCntTest(void)
83{
84  UErrorCode status = U_ZERO_ERROR;
85  UChar temp[90];
86  UChar t1[90];
87  UChar t2[90];
88
89  UCollator *coll =  NULL;
90  uint32_t i = 0, j = 0;
91  uint32_t size = 0;
92
93  u_uastrcpy(temp, " & Z < ABC < Q < B");
94
95  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
96
97  if(U_SUCCESS(status)) {
98    size = sizeof(cnt1)/sizeof(cnt1[0]);
99    for(i = 0; i < size-1; i++) {
100      for(j = i+1; j < size; j++) {
101        UCollationElements *iter;
102        u_uastrcpy(t1, cnt1[i]);
103        u_uastrcpy(t2, cnt1[j]);
104        doTest(coll, t1, t2, UCOL_LESS);
105        /* synwee : added collation element iterator test */
106        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
107        if (U_FAILURE(status)) {
108          log_err("Creation of iterator failed\n");
109          break;
110        }
111        backAndForth(iter);
112        ucol_closeElements(iter);
113      }
114    }
115  }
116
117  ucol_close(coll);
118
119
120  u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
121  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
122
123  if(U_SUCCESS(status)) {
124    size = sizeof(cnt2)/sizeof(cnt2[0]);
125    for(i = 0; i < size-1; i++) {
126      for(j = i+1; j < size; j++) {
127        UCollationElements *iter;
128        u_uastrcpy(t1, cnt2[i]);
129        u_uastrcpy(t2, cnt2[j]);
130        doTest(coll, t1, t2, UCOL_LESS);
131
132        /* synwee : added collation element iterator test */
133        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
134        if (U_FAILURE(status)) {
135          log_err("Creation of iterator failed\n");
136          break;
137        }
138        backAndForth(iter);
139        ucol_closeElements(iter);
140      }
141    }
142  }
143
144  ucol_close(coll);
145
146
147}
148
149const static char shifted[][20] = {
150  "black bird",
151  "black-bird",
152  "blackbird",
153  "black Bird",
154  "black-Bird",
155  "blackBird",
156  "black birds",
157  "black-birds",
158  "blackbirds"
159};
160
161const static UCollationResult shiftedTert[] = {
162  UCOL_EQUAL,
163  UCOL_EQUAL,
164  UCOL_EQUAL,
165  UCOL_LESS,
166  UCOL_EQUAL,
167  UCOL_EQUAL,
168  UCOL_LESS,
169  UCOL_EQUAL,
170  UCOL_EQUAL
171};
172
173const static char nonignorable[][20] = {
174  "black bird",
175  "black Bird",
176  "black birds",
177  "black-bird",
178  "black-Bird",
179  "black-birds",
180  "blackbird",
181  "blackBird",
182  "blackbirds"
183};
184
185static void BlackBirdTest(void) {
186  UErrorCode status = U_ZERO_ERROR;
187  UChar t1[90];
188  UChar t2[90];
189
190  uint32_t i = 0, j = 0;
191  uint32_t size = 0;
192  UCollator *coll = ucol_open("en_US", &status);
193
194  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
195  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
196
197  if(U_SUCCESS(status)) {
198    size = sizeof(nonignorable)/sizeof(nonignorable[0]);
199    for(i = 0; i < size-1; i++) {
200      for(j = i+1; j < size; j++) {
201        u_uastrcpy(t1, nonignorable[i]);
202        u_uastrcpy(t2, nonignorable[j]);
203        doTest(coll, t1, t2, UCOL_LESS);
204      }
205    }
206  }
207
208  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
209  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
210
211  if(U_SUCCESS(status)) {
212    size = sizeof(shifted)/sizeof(shifted[0]);
213    for(i = 0; i < size-1; i++) {
214      for(j = i+1; j < size; j++) {
215        u_uastrcpy(t1, shifted[i]);
216        u_uastrcpy(t2, shifted[j]);
217        doTest(coll, t1, t2, UCOL_LESS);
218      }
219    }
220  }
221
222  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
223  if(U_SUCCESS(status)) {
224    size = sizeof(shifted)/sizeof(shifted[0]);
225    for(i = 1; i < size; i++) {
226      u_uastrcpy(t1, shifted[i-1]);
227      u_uastrcpy(t2, shifted[i]);
228      doTest(coll, t1, t2, shiftedTert[i]);
229    }
230  }
231
232  ucol_close(coll);
233}
234
235const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
236    {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
237    {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
238    {0x0041/*'A'*/, 0x0300, 0x0000},
239    {0x00C0, 0x0301, 0x0000},
240    /* this would work with forced normalization */
241    {0x00C0, 0x0316, 0x0000}
242};
243
244const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
245    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
246    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
247    {0x00C0, 0},
248    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
249    /* this would work with forced normalization */
250    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
251};
252
253const static UCollationResult results[] = {
254    UCOL_GREATER,
255    UCOL_EQUAL,
256    UCOL_EQUAL,
257    UCOL_GREATER,
258    UCOL_EQUAL
259};
260
261static void FunkyATest(void)
262{
263
264    int32_t i;
265    UErrorCode status = U_ZERO_ERROR;
266    UCollator  *myCollation;
267    myCollation = ucol_open("en_US", &status);
268    if(U_FAILURE(status)){
269        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
270        return;
271    }
272    log_verbose("Testing some A letters, for some reason\n");
273    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
274    ucol_setStrength(myCollation, UCOL_TERTIARY);
275    for (i = 0; i < 4 ; i++)
276    {
277        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
278    }
279    ucol_close(myCollation);
280}
281
282UColAttributeValue caseFirst[] = {
283    UCOL_OFF,
284    UCOL_LOWER_FIRST,
285    UCOL_UPPER_FIRST
286};
287
288
289UColAttributeValue alternateHandling[] = {
290    UCOL_NON_IGNORABLE,
291    UCOL_SHIFTED
292};
293
294UColAttributeValue caseLevel[] = {
295    UCOL_OFF,
296    UCOL_ON
297};
298
299UColAttributeValue strengths[] = {
300    UCOL_PRIMARY,
301    UCOL_SECONDARY,
302    UCOL_TERTIARY,
303    UCOL_QUATERNARY,
304    UCOL_IDENTICAL
305};
306
307#if 0
308static const char * strengthsC[] = {
309    "UCOL_PRIMARY",
310    "UCOL_SECONDARY",
311    "UCOL_TERTIARY",
312    "UCOL_QUATERNARY",
313    "UCOL_IDENTICAL"
314};
315
316static const char * caseFirstC[] = {
317    "UCOL_OFF",
318    "UCOL_LOWER_FIRST",
319    "UCOL_UPPER_FIRST"
320};
321
322
323static const char * alternateHandlingC[] = {
324    "UCOL_NON_IGNORABLE",
325    "UCOL_SHIFTED"
326};
327
328static const char * caseLevelC[] = {
329    "UCOL_OFF",
330    "UCOL_ON"
331};
332
333/* not used currently - does not test only prints */
334static void PrintMarkDavis(void)
335{
336  UErrorCode status = U_ZERO_ERROR;
337  UChar m[256];
338  uint8_t sortkey[256];
339  UCollator *coll = ucol_open("en_US", &status);
340  uint32_t h,i,j,k, sortkeysize;
341  uint32_t sizem = 0;
342  char buffer[512];
343  uint32_t len = 512;
344
345  log_verbose("PrintMarkDavis");
346
347  u_uastrcpy(m, "Mark Davis");
348  sizem = u_strlen(m);
349
350
351  m[1] = 0xe4;
352
353  for(i = 0; i<sizem; i++) {
354    fprintf(stderr, "\\u%04X ", m[i]);
355  }
356  fprintf(stderr, "\n");
357
358  for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
359    ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
360    fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
361
362    for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
363      ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
364      fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
365
366      for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
367        ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
368        fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
369
370        for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
371          ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
372          sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
373          fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
374          fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
375        }
376
377      }
378
379    }
380
381  }
382}
383#endif
384
385static void BillFairmanTest(void) {
386/*
387** check for actual locale via ICU resource bundles
388**
389** lp points to the original locale ("fr_FR_....")
390*/
391
392    UResourceBundle *lr,*cr;
393    UErrorCode              lec = U_ZERO_ERROR;
394    const char *lp = "fr_FR_you_ll_never_find_this_locale";
395
396    log_verbose("BillFairmanTest\n");
397
398    lr = ures_open(NULL,lp,&lec);
399    if (lr) {
400        cr = ures_getByKey(lr,"collations",0,&lec);
401        if (cr) {
402            lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
403            if (lp) {
404                if (U_SUCCESS(lec)) {
405                    if(strcmp(lp, "fr") != 0) {
406                        log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
407                    }
408                }
409            }
410            ures_close(cr);
411        }
412        ures_close(lr);
413    }
414}
415
416static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
417    UChar source[256] = { '\0'};
418    UChar target[256] = { '\0'};
419    UChar preP = 0x31a3;
420    UChar preQ = 0x310d;
421/*
422    UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
423    UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
424*/
425    /*log_verbose("Testing primary\n");*/
426
427    doTest(col, p, q, UCOL_LESS);
428/*
429    UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
430
431    if(result!=UCOL_LESS){
432       aescstrdup(p,utfSource,256);
433       aescstrdup(q,utfTarget,256);
434       fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
435    }
436*/
437    source[0] = preP;
438    u_strcpy(source+1,p);
439    target[0] = preQ;
440    u_strcpy(target+1,q);
441    doTest(col, source, target, UCOL_LESS);
442/*
443    fprintf(file,"Primary swamps 2nd failed  source: %s target: %s \n", utfSource,utfTarget);
444*/
445}
446
447static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
448    UChar source[256] = { '\0'};
449    UChar target[256] = { '\0'};
450
451    /*log_verbose("Testing secondary\n");*/
452
453    doTest(col, p, q, UCOL_LESS);
454/*
455    fprintf(file,"secondary failed  source: %s target: %s \n", utfSource,utfTarget);
456*/
457    source[0] = 0x0053;
458    u_strcpy(source+1,p);
459    target[0]= 0x0073;
460    u_strcpy(target+1,q);
461
462    doTest(col, source, target, UCOL_LESS);
463/*
464    fprintf(file,"secondary swamps 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
465*/
466
467
468    u_strcpy(source,p);
469    source[u_strlen(p)] = 0x62;
470    source[u_strlen(p)+1] = 0;
471
472
473    u_strcpy(target,q);
474    target[u_strlen(q)] = 0x61;
475    target[u_strlen(q)+1] = 0;
476
477    doTest(col, source, target, UCOL_GREATER);
478
479/*
480    fprintf(file,"secondary is swamped by 1  failed  source: %s target: %s \n",utfSource,utfTarget);
481*/
482}
483
484static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
485    UChar source[256] = { '\0'};
486    UChar target[256] = { '\0'};
487
488    /*log_verbose("Testing tertiary\n");*/
489
490    doTest(col, p, q, UCOL_LESS);
491/*
492    fprintf(file,"Tertiary failed  source: %s target: %s \n",utfSource,utfTarget);
493*/
494    source[0] = 0x0020;
495    u_strcpy(source+1,p);
496    target[0]= 0x002D;
497    u_strcpy(target+1,q);
498
499    doTest(col, source, target, UCOL_LESS);
500/*
501    fprintf(file,"Tertiary swamps 4th failed  source: %s target: %s \n", utfSource,utfTarget);
502*/
503
504    u_strcpy(source,p);
505    source[u_strlen(p)] = 0xE0;
506    source[u_strlen(p)+1] = 0;
507
508    u_strcpy(target,q);
509    target[u_strlen(q)] = 0x61;
510    target[u_strlen(q)+1] = 0;
511
512    doTest(col, source, target, UCOL_GREATER);
513
514/*
515    fprintf(file,"Tertiary is swamped by 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
516*/
517}
518
519static void testEquality(UCollator* col, const UChar* p,const UChar* q){
520/*
521    UChar source[256] = { '\0'};
522    UChar target[256] = { '\0'};
523*/
524
525    doTest(col, p, q, UCOL_EQUAL);
526/*
527    fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
528*/
529}
530
531static void testCollator(UCollator *coll, UErrorCode *status) {
532  const UChar *rules = NULL, *current = NULL;
533  int32_t ruleLen = 0;
534  uint32_t strength = 0;
535  uint32_t chOffset = 0; uint32_t chLen = 0;
536  uint32_t exOffset = 0; uint32_t exLen = 0;
537  uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
538  uint32_t firstEx = 0;
539/*  uint32_t rExpsLen = 0; */
540  uint32_t firstLen = 0;
541  UBool varT = FALSE; UBool top_ = TRUE;
542  uint16_t specs = 0;
543  UBool startOfRules = TRUE;
544  UBool lastReset = FALSE;
545  UBool before = FALSE;
546  uint32_t beforeStrength = 0;
547  UColTokenParser src;
548  UColOptionSet opts;
549
550  UChar first[256];
551  UChar second[256];
552  UChar tempB[256];
553  uint32_t tempLen;
554  UChar *rulesCopy = NULL;
555  UParseError parseError;
556
557  uprv_memset(&src, 0, sizeof(UColTokenParser));
558
559  src.opts = &opts;
560
561  rules = ucol_getRules(coll, &ruleLen);
562  if(U_SUCCESS(*status) && ruleLen > 0) {
563    rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
564    uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
565    src.current = src.source = rulesCopy;
566    src.end = rulesCopy+ruleLen;
567    src.extraCurrent = src.end;
568    src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
569    *first = *second = 0;
570
571	/* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
572	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
573    while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
574      strength = src.parsedToken.strength;
575      chOffset = src.parsedToken.charsOffset;
576      chLen = src.parsedToken.charsLen;
577      exOffset = src.parsedToken.extensionOffset;
578      exLen = src.parsedToken.extensionLen;
579      prefixOffset = src.parsedToken.prefixOffset;
580      prefixLen = src.parsedToken.prefixLen;
581      specs = src.parsedToken.flags;
582
583      startOfRules = FALSE;
584      varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
585      (void)varT;    /* Suppress set but not used warning. */
586      top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
587      if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
588        second[0] = 0;
589      } else {
590        u_strncpy(second,src.source+chOffset, chLen);
591        second[chLen] = 0;
592
593        if(exLen > 0 && firstEx == 0) {
594          u_strncat(first, src.source+exOffset, exLen);
595          first[firstLen+exLen] = 0;
596        }
597
598        if(lastReset == TRUE && prefixLen != 0) {
599          u_strncpy(first+prefixLen, first, firstLen);
600          u_strncpy(first, src.source+prefixOffset, prefixLen);
601          first[firstLen+prefixLen] = 0;
602          firstLen = firstLen+prefixLen;
603        }
604
605        if(before == TRUE) { /* swap first and second */
606          u_strcpy(tempB, first);
607          u_strcpy(first, second);
608          u_strcpy(second, tempB);
609
610          tempLen = firstLen;
611          firstLen = chLen;
612          chLen = tempLen;
613
614          tempLen = firstEx;
615          firstEx = exLen;
616          exLen = tempLen;
617          if(beforeStrength < strength) {
618            strength = beforeStrength;
619          }
620        }
621      }
622      lastReset = FALSE;
623
624      switch(strength){
625      case UCOL_IDENTICAL:
626          testEquality(coll,first,second);
627          break;
628      case UCOL_PRIMARY:
629          testPrimary(coll,first,second);
630          break;
631      case UCOL_SECONDARY:
632          testSecondary(coll,first,second);
633          break;
634      case UCOL_TERTIARY:
635          testTertiary(coll,first,second);
636          break;
637      case UCOL_TOK_RESET:
638        lastReset = TRUE;
639        before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
640        if(before) {
641          beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
642        }
643        break;
644      default:
645          break;
646      }
647
648      if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
649        before = FALSE;
650      } else {
651        firstLen = chLen;
652        firstEx = exLen;
653        u_strcpy(first, second);
654      }
655    }
656    uprv_free(src.source);
657    uprv_free(src.reorderCodes);
658  }
659}
660
661static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
662  UCollator *UCA = (UCollator *)collator;
663  return ucol_strcoll(UCA, source, sLen, target, tLen);
664}
665
666/*
667static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
668#if U_PLATFORM_HAS_WIN32_API
669  LCID lcid = (LCID)collator;
670  return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
671#else
672  return 0;
673#endif
674}
675*/
676
677static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
678                                     UChar s1, UChar s2,
679                                     const UChar *s, const uint32_t sLen,
680                                     const UChar *t, const uint32_t tLen) {
681  UChar source[256] = {0};
682  UChar target[256] = {0};
683
684  source[0] = s1;
685  u_strcpy(source+1, s);
686  target[0] = s2;
687  u_strcpy(target+1, t);
688
689  return func(collator, opts, source, sLen+1, target, tLen+1);
690}
691
692static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
693                                   UChar s1, UChar s2,
694                                   const UChar *s, const uint32_t sLen,
695                                   const UChar *t, const uint32_t tLen) {
696  UChar source[256] = {0};
697  UChar target[256] = {0};
698
699  u_strcpy(source, s);
700  source[sLen] = s1;
701  u_strcpy(target, t);
702  target[tLen] = s2;
703
704  return func(collator, opts, source, sLen+1, target, tLen+1);
705}
706
707static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
708                              const UChar *s, const uint32_t sLen,
709                              const UChar *t, const uint32_t tLen,
710                              UCollationResult result) {
711  /*UChar fPrimary = 0x6d;*/
712  /*UChar sPrimary = 0x6e;*/
713  UChar fSecondary = 0x310d;
714  UChar sSecondary = 0x31a3;
715  UChar fTertiary = 0x310f;
716  UChar sTertiary = 0x31b7;
717
718  UCollationResult oposite;
719  if(result == UCOL_EQUAL) {
720    return UCOL_IDENTICAL;
721  } else if(result == UCOL_GREATER) {
722    oposite = UCOL_LESS;
723  } else {
724    oposite = UCOL_GREATER;
725  }
726
727  if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
728    return UCOL_PRIMARY;
729  } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
730    (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
731    return UCOL_SECONDARY;
732  } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
733    (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
734    return UCOL_TERTIARY;
735  } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
736    (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
737    return UCOL_QUATERNARY;
738  } else {
739    return UCOL_IDENTICAL;
740  }
741}
742
743static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
744  uint32_t i = 0;
745
746  if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
747    buffer[0] = '=';
748    buffer[1] = '=';
749    buffer[2] = '\0';
750  } else if(res == UCOL_GREATER) {
751    for(i = 0; i<strength+1; i++) {
752      buffer[i] = '>';
753    }
754    buffer[strength+1] = '\0';
755  } else {
756    for(i = 0; i<strength+1; i++) {
757      buffer[i] = '<';
758    }
759    buffer[strength+1] = '\0';
760  }
761
762  return buffer;
763}
764
765
766
767static void logFailure (const char *platform, const char *test,
768                        const UChar *source, const uint32_t sLen,
769                        const UChar *target, const uint32_t tLen,
770                        UCollationResult realRes, uint32_t realStrength,
771                        UCollationResult expRes, uint32_t expStrength, UBool error) {
772
773  uint32_t i = 0;
774
775  char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
776  static int32_t maxOutputLength = 0;
777  int32_t outputLength;
778
779  *sEsc = *tEsc = *s = *t = 0;
780  if(error == TRUE) {
781    log_err("Difference between expected and generated order. Run test with -v for more info\n");
782  } else if(getTestOption(VERBOSITY_OPTION) == 0) {
783    return;
784  }
785  for(i = 0; i<sLen; i++) {
786    sprintf(b, "%04X", source[i]);
787    strcat(sEsc, "\\u");
788    strcat(sEsc, b);
789    strcat(s, b);
790    strcat(s, " ");
791    if(source[i] < 0x80) {
792      sprintf(b, "(%c)", source[i]);
793      strcat(sEsc, b);
794    }
795  }
796  for(i = 0; i<tLen; i++) {
797    sprintf(b, "%04X", target[i]);
798    strcat(tEsc, "\\u");
799    strcat(tEsc, b);
800    strcat(t, b);
801    strcat(t, " ");
802    if(target[i] < 0x80) {
803      sprintf(b, "(%c)", target[i]);
804      strcat(tEsc, b);
805    }
806  }
807/*
808  strcpy(output, "[[ ");
809  strcat(output, sEsc);
810  strcat(output, getRelationSymbol(expRes, expStrength, relation));
811  strcat(output, tEsc);
812
813  strcat(output, " : ");
814
815  strcat(output, sEsc);
816  strcat(output, getRelationSymbol(realRes, realStrength, relation));
817  strcat(output, tEsc);
818  strcat(output, " ]] ");
819
820  log_verbose("%s", output);
821*/
822
823
824  strcpy(output, "DIFF: ");
825
826  strcat(output, s);
827  strcat(output, " : ");
828  strcat(output, t);
829
830  strcat(output, test);
831  strcat(output, ": ");
832
833  strcat(output, sEsc);
834  strcat(output, getRelationSymbol(expRes, expStrength, relation));
835  strcat(output, tEsc);
836
837  strcat(output, " ");
838
839  strcat(output, platform);
840  strcat(output, ": ");
841
842  strcat(output, sEsc);
843  strcat(output, getRelationSymbol(realRes, realStrength, relation));
844  strcat(output, tEsc);
845
846  outputLength = (int32_t)strlen(output);
847  if(outputLength > maxOutputLength) {
848    maxOutputLength = outputLength;
849    U_ASSERT(outputLength < sizeof(output));
850  }
851
852  log_verbose("%s\n", output);
853
854}
855
856/*
857static void printOutRules(const UChar *rules) {
858  uint32_t len = u_strlen(rules);
859  uint32_t i = 0;
860  char toPrint;
861  uint32_t line = 0;
862
863  fprintf(stdout, "Rules:");
864
865  for(i = 0; i<len; i++) {
866    if(rules[i]<0x7f && rules[i]>=0x20) {
867      toPrint = (char)rules[i];
868      if(toPrint == '&') {
869        line = 1;
870        fprintf(stdout, "\n&");
871      } else if(toPrint == ';') {
872        fprintf(stdout, "<<");
873        line+=2;
874      } else if(toPrint == ',') {
875        fprintf(stdout, "<<<");
876        line+=3;
877      } else {
878        fprintf(stdout, "%c", toPrint);
879        line++;
880      }
881    } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
882      fprintf(stdout, "\\u%04X", rules[i]);
883      line+=6;
884    }
885    if(line>72) {
886      fprintf(stdout, "\n");
887      line = 0;
888    }
889  }
890
891  log_verbose("\n");
892
893}
894*/
895
896static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
897  uint32_t diffs = 0;
898  UCollationResult realResult;
899  uint32_t realStrength;
900
901  uint32_t sLen = u_strlen(first);
902  uint32_t tLen = u_strlen(second);
903
904  realResult = func(collator, opts, first, sLen, second, tLen);
905  realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
906
907  if(strength == UCOL_IDENTICAL && realResult != UCOL_EQUAL) {
908    logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
909    diffs++;
910  } else if(realResult != UCOL_LESS || realStrength != strength) {
911    logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
912    diffs++;
913  }
914  return diffs;
915}
916
917
918static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
919  const UChar *rules = NULL, *current = NULL;
920  int32_t ruleLen = 0;
921  uint32_t strength = 0;
922  uint32_t chOffset = 0; uint32_t chLen = 0;
923  uint32_t exOffset = 0; uint32_t exLen = 0;
924  uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
925/*  uint32_t rExpsLen = 0; */
926  uint32_t firstLen = 0, secondLen = 0;
927  UBool varT = FALSE; UBool top_ = TRUE;
928  uint16_t specs = 0;
929  UBool startOfRules = TRUE;
930  UColTokenParser src;
931  UColOptionSet opts;
932
933  UChar first[256];
934  UChar second[256];
935  UChar *rulesCopy = NULL;
936
937  uint32_t UCAdiff = 0;
938  uint32_t Windiff = 1;
939  UParseError parseError;
940
941  (void)top_;      /* Suppress set but not used warnings. */
942  (void)varT;
943  (void)secondLen;
944  (void)prefixLen;
945  (void)prefixOffset;
946
947  uprv_memset(&src, 0, sizeof(UColTokenParser));
948  src.opts = &opts;
949
950  rules = ucol_getRules(coll, &ruleLen);
951
952  /*printOutRules(rules);*/
953
954  if(U_SUCCESS(*status) && ruleLen > 0) {
955    rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
956    uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
957    src.current = src.source = rulesCopy;
958    src.end = rulesCopy+ruleLen;
959    src.extraCurrent = src.end;
960    src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
961    *first = *second = 0;
962
963    /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
964       the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
965    while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
966      strength = src.parsedToken.strength;
967      chOffset = src.parsedToken.charsOffset;
968      chLen = src.parsedToken.charsLen;
969      exOffset = src.parsedToken.extensionOffset;
970      exLen = src.parsedToken.extensionLen;
971      prefixOffset = src.parsedToken.prefixOffset;
972      prefixLen = src.parsedToken.prefixLen;
973      specs = src.parsedToken.flags;
974
975      startOfRules = FALSE;
976      varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
977      top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
978
979      u_strncpy(second,src.source+chOffset, chLen);
980      second[chLen] = 0;
981      secondLen = chLen;
982
983      if(exLen > 0) {
984        u_strncat(first, src.source+exOffset, exLen);
985        first[firstLen+exLen] = 0;
986        firstLen += exLen;
987      }
988
989      if(strength != UCOL_TOK_RESET) {
990        if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
991          UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
992          /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
993        }
994      }
995
996
997      firstLen = chLen;
998      u_strcpy(first, second);
999
1000    }
1001    if(UCAdiff != 0 && Windiff != 0) {
1002      log_verbose("\n");
1003    }
1004    if(UCAdiff == 0) {
1005      log_verbose("No immediate difference with %s!\n", refName);
1006    }
1007    if(Windiff == 0) {
1008      log_verbose("No immediate difference with Win32!\n");
1009    }
1010    uprv_free(src.source);
1011    uprv_free(src.reorderCodes);
1012  }
1013}
1014
1015/*
1016 * Takes two CEs (lead and continuation) and
1017 * compares them as CEs should be compared:
1018 * primary vs. primary, secondary vs. secondary
1019 * tertiary vs. tertiary
1020 */
1021static int32_t compareCEs(uint32_t s1, uint32_t s2,
1022                   uint32_t t1, uint32_t t2) {
1023  uint32_t s = 0, t = 0;
1024  if(s1 == t1 && s2 == t2) {
1025    return 0;
1026  }
1027  s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1028  t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1029  if(s < t) {
1030    return -1;
1031  } else if(s > t) {
1032    return 1;
1033  } else {
1034    s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1035    t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1036    if(s < t) {
1037      return -1;
1038    } else if(s > t) {
1039      return 1;
1040    } else {
1041      s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1042      t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1043      if(s < t) {
1044        return -1;
1045      } else {
1046        return 1;
1047      }
1048    }
1049  }
1050}
1051
1052typedef struct {
1053  uint32_t startCE;
1054  uint32_t startContCE;
1055  uint32_t limitCE;
1056  uint32_t limitContCE;
1057} indirectBoundaries;
1058
1059/* these values are used for finding CE values for indirect positioning. */
1060/* Indirect positioning is a mechanism for allowing resets on symbolic   */
1061/* values. It only works for resets and you cannot tailor indirect names */
1062/* An indirect name can define either an anchor point or a range. An     */
1063/* anchor point behaves in exactly the same way as a code point in reset */
1064/* would, except that it cannot be tailored. A range (we currently only  */
1065/* know for the [top] range will explicitly set the upper bound for      */
1066/* generated CEs, thus allowing for better control over how many CEs can */
1067/* be squeezed between in the range without performance penalty.         */
1068/* In that respect, we use [top] for tailoring of locales that use CJK   */
1069/* characters. Other indirect values are currently a pure convenience,   */
1070/* they can be used to assure that the CEs will be always positioned in  */
1071/* the same place relative to a point with known properties (e.g. first  */
1072/* primary ignorable). */
1073static indirectBoundaries ucolIndirectBoundaries[15];
1074static UBool indirectBoundariesSet = FALSE;
1075static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
1076    /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1077    /* to initalize here. */
1078    ucolIndirectBoundaries[indexR].startCE = start[0];
1079    ucolIndirectBoundaries[indexR].startContCE = start[1];
1080    if(end) {
1081        ucolIndirectBoundaries[indexR].limitCE = end[0];
1082        ucolIndirectBoundaries[indexR].limitContCE = end[1];
1083    } else {
1084        ucolIndirectBoundaries[indexR].limitCE = 0;
1085        ucolIndirectBoundaries[indexR].limitContCE = 0;
1086    }
1087}
1088
1089static void testCEs(UCollator *coll, UErrorCode *status) {
1090    const UChar *rules = NULL, *current = NULL;
1091    int32_t ruleLen = 0;
1092
1093    uint32_t strength = 0;
1094    uint32_t maxStrength = UCOL_IDENTICAL;
1095    uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1096    uint32_t lastCE;
1097    uint32_t lastContCE;
1098
1099    int32_t result = 0;
1100    uint32_t chOffset = 0; uint32_t chLen = 0;
1101    uint32_t exOffset = 0; uint32_t exLen = 0;
1102    uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1103    uint32_t oldOffset = 0;
1104
1105    /* uint32_t rExpsLen = 0; */
1106    /* uint32_t firstLen = 0; */
1107    uint16_t specs = 0;
1108    UBool varT = FALSE; UBool top_ = TRUE;
1109    UBool startOfRules = TRUE;
1110    UBool before = FALSE;
1111    UColTokenParser src;
1112    UColOptionSet opts;
1113    UParseError parseError;
1114    UChar *rulesCopy = NULL;
1115    collIterate *c = uprv_new_collIterate(status);
1116    UCAConstants *consts = NULL;
1117    uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
1118        UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
1119    const char *colLoc;
1120    UCollator *UCA = ucol_open("root", status);
1121
1122    (void)varT;             /* Suppress set but not used warnings. */
1123    (void)prefixLen;
1124    (void)prefixOffset;
1125    (void)exLen;
1126    (void)exOffset;
1127
1128    if (U_FAILURE(*status)) {
1129        log_err("Could not open root collator %s\n", u_errorName(*status));
1130        uprv_delete_collIterate(c);
1131        return;
1132    }
1133
1134    colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
1135    if (U_FAILURE(*status)) {
1136        log_err("Could not get collator name: %s\n", u_errorName(*status));
1137        ucol_close(UCA);
1138        uprv_delete_collIterate(c);
1139        return;
1140    }
1141
1142    uprv_memset(&src, 0, sizeof(UColTokenParser));
1143
1144    consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1145    UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
1146    /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
1147    UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
1148    UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1149
1150    baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1151
1152    src.opts = &opts;
1153
1154    rules = ucol_getRules(coll, &ruleLen);
1155
1156    src.invUCA = ucol_initInverseUCA(status);
1157
1158    if(indirectBoundariesSet == FALSE) {
1159        /* UCOL_RESET_TOP_VALUE */
1160        setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1161        /* UCOL_FIRST_PRIMARY_IGNORABLE */
1162        setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1163        /* UCOL_LAST_PRIMARY_IGNORABLE */
1164        setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1165        /* UCOL_FIRST_SECONDARY_IGNORABLE */
1166        setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1167        /* UCOL_LAST_SECONDARY_IGNORABLE */
1168        setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1169        /* UCOL_FIRST_TERTIARY_IGNORABLE */
1170        setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1171        /* UCOL_LAST_TERTIARY_IGNORABLE */
1172        setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1173        /* UCOL_FIRST_VARIABLE */
1174        setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1175        /* UCOL_LAST_VARIABLE */
1176        setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1177        /* UCOL_FIRST_NON_VARIABLE */
1178        setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1179        /* UCOL_LAST_NON_VARIABLE */
1180        setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1181        /* UCOL_FIRST_IMPLICIT */
1182        setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1183        /* UCOL_LAST_IMPLICIT */
1184        setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
1185        /* UCOL_FIRST_TRAILING */
1186        setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1187        /* UCOL_LAST_TRAILING */
1188        setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1189        ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
1190        indirectBoundariesSet = TRUE;
1191    }
1192
1193
1194    if(U_SUCCESS(*status) && ruleLen > 0) {
1195        rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1196        uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1197        src.current = src.source = rulesCopy;
1198        src.end = rulesCopy+ruleLen;
1199        src.extraCurrent = src.end;
1200        src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1201
1202	    /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1203	       the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1204        while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1205            strength = src.parsedToken.strength;
1206            chOffset = src.parsedToken.charsOffset;
1207            chLen = src.parsedToken.charsLen;
1208            exOffset = src.parsedToken.extensionOffset;
1209            exLen = src.parsedToken.extensionLen;
1210            prefixOffset = src.parsedToken.prefixOffset;
1211            prefixLen = src.parsedToken.prefixLen;
1212            specs = src.parsedToken.flags;
1213
1214            startOfRules = FALSE;
1215            varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1216            top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1217
1218            uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
1219
1220            currCE = ucol_getNextCE(coll, c, status);
1221            if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
1222                log_verbose("Thai prevowel detected. Will pick next CE\n");
1223                currCE = ucol_getNextCE(coll, c, status);
1224            }
1225
1226            currContCE = ucol_getNextCE(coll, c, status);
1227            if(!isContinuation(currContCE)) {
1228                currContCE = 0;
1229            }
1230
1231            /* we need to repack CEs here */
1232
1233            if(strength == UCOL_TOK_RESET) {
1234                before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1235                if(top_ == TRUE) {
1236                    int32_t tokenIndex = src.parsedToken.indirectIndex;
1237
1238                    nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE;
1239                    nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE;
1240                } else {
1241                    nextCE = baseCE = currCE;
1242                    nextContCE = baseContCE = currContCE;
1243                }
1244                maxStrength = UCOL_IDENTICAL;
1245            } else {
1246                if(strength < maxStrength) {
1247                    maxStrength = strength;
1248                    if(baseCE == UCOL_RESET_TOP_VALUE) {
1249                        log_verbose("Resetting to [top]\n");
1250                        nextCE = UCOL_NEXT_TOP_VALUE;
1251                        nextContCE = UCOL_NEXT_TOP_CONT;
1252                    } else {
1253                        result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1254                    }
1255                    if(result < 0) {
1256                        if(ucol_isTailored(coll, *(src.source+oldOffset), status)) {
1257                            log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
1258                            return;
1259                        } else {
1260                            log_err("%s: couldn't find the CE\n", colLoc);
1261                            return;
1262                        }
1263                    }
1264                }
1265
1266                currCE &= 0xFFFFFF3F;
1267                currContCE &= 0xFFFFFFBF;
1268
1269                if(maxStrength == UCOL_IDENTICAL) {
1270                    if(baseCE != currCE || baseContCE != currContCE) {
1271                        log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
1272                    }
1273                } else {
1274                    if(strength == UCOL_IDENTICAL) {
1275                        if(lastCE != currCE || lastContCE != currContCE) {
1276                            log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
1277                        }
1278                    } else {
1279                        if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1280                            /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1281                            log_err("%s: current CE is not less than base CE\n", colLoc);
1282                        }
1283                        if(!before) {
1284                            if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1285                                /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1286                                log_err("%s: sequence of generated CEs is broken\n", colLoc);
1287                            }
1288                        } else {
1289                            before = FALSE;
1290                            if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
1291                                /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1292                                log_err("%s: sequence of generated CEs is broken\n", colLoc);
1293                            }
1294                        }
1295                    }
1296                }
1297
1298            }
1299
1300            oldOffset = chOffset;
1301            lastCE = currCE & 0xFFFFFF3F;
1302            lastContCE = currContCE & 0xFFFFFFBF;
1303        }
1304        uprv_free(src.source);
1305        uprv_free(src.reorderCodes);
1306    }
1307    ucol_close(UCA);
1308    uprv_delete_collIterate(c);
1309}
1310
1311#if 0
1312/* these locales are now picked from index RB */
1313static const char* localesToTest[] = {
1314"ar", "bg", "ca", "cs", "da",
1315"el", "en_BE", "en_US_POSIX",
1316"es", "et", "fi", "fr", "hi",
1317"hr", "hu", "is", "iw", "ja",
1318"ko", "lt", "lv", "mk", "mt",
1319"nb", "nn", "nn_NO", "pl", "ro",
1320"ru", "sh", "sk", "sl", "sq",
1321"sr", "sv", "th", "tr", "uk",
1322"vi", "zh", "zh_TW"
1323};
1324#endif
1325
1326static const char* rulesToTest[] = {
1327  /* Funky fa rule */
1328  "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1329  /*"& Z < p, P",*/
1330    /* Cui Mins rules */
1331    "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1332    "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1333    "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1334    "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1335    "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1336    "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1337    "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U"  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1338};
1339
1340
1341static void TestCollations(void) {
1342    int32_t noOfLoc = uloc_countAvailable();
1343    int32_t i = 0, j = 0;
1344
1345    UErrorCode status = U_ZERO_ERROR;
1346    char cName[256];
1347    UChar name[256];
1348    int32_t nameSize;
1349
1350
1351    const char *locName = NULL;
1352    UCollator *coll = NULL;
1353    UCollator *UCA = ucol_open("", &status);
1354    UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1355    if (U_FAILURE(status)) {
1356        log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
1357        return;
1358    }
1359    ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1360
1361    for(i = 0; i<noOfLoc; i++) {
1362        status = U_ZERO_ERROR;
1363        locName = uloc_getAvailable(i);
1364        if(uprv_strcmp("ja", locName) == 0) {
1365            log_verbose("Don't know how to test prefixes\n");
1366            continue;
1367        }
1368        if(hasCollationElements(locName)) {
1369            nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1370            for(j = 0; j<nameSize; j++) {
1371                cName[j] = (char)name[j];
1372            }
1373            cName[nameSize] = 0;
1374            log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1375            coll = ucol_open(locName, &status);
1376            if(U_SUCCESS(status)) {
1377                testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1378                ucol_close(coll);
1379            } else {
1380                log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
1381                status = U_ZERO_ERROR;
1382            }
1383        }
1384    }
1385    ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1386    ucol_close(UCA);
1387}
1388
1389static void RamsRulesTest(void) {
1390    UErrorCode status = U_ZERO_ERROR;
1391    int32_t i = 0;
1392    UCollator *coll = NULL;
1393    UChar rule[2048];
1394    uint32_t ruleLen;
1395    int32_t noOfLoc = uloc_countAvailable();
1396    const char *locName = NULL;
1397
1398    log_verbose("RamsRulesTest\n");
1399
1400    if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
1401        /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
1402        return;
1403    }
1404
1405    for(i = 0; i<noOfLoc; i++) {
1406        locName = uloc_getAvailable(i);
1407        if(hasCollationElements(locName)) {
1408            if (uprv_strcmp("ja", locName)==0) {
1409                log_verbose("Don't know how to test Japanese because of prefixes\n");
1410                continue;
1411            }
1412            if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1413                log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1414                continue;
1415            }
1416            if (uprv_strcmp("bn", locName)==0 ||
1417                uprv_strcmp("bs", locName)==0 ||            /* Add due to import per cldrbug 5647 */
1418                uprv_strcmp("bs_Cyrl", locName)==0 ||       /* Add due to import per cldrbug 5647 */
1419                uprv_strcmp("en_US_POSIX", locName)==0 ||
1420                uprv_strcmp("fa", locName)==0 ||            /* Add in #10222 with CLDR 24 integration */
1421                uprv_strcmp("fa_AF", locName)==0 ||         /* Add due to import per cldrbug 5647 */
1422                uprv_strcmp("gl", locName)==0 ||            /* Add due to import per cldrbug 5647 */
1423                uprv_strcmp("gl_ES", locName)==0 ||         /* Add due to import per cldrbug 5647 */
1424                uprv_strcmp("he", locName)==0 ||            /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
1425                uprv_strcmp("he_IL", locName)==0 ||         /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
1426                uprv_strcmp("km", locName)==0 ||
1427                uprv_strcmp("km_KH", locName)==0 ||
1428                uprv_strcmp("my", locName)==0 ||
1429                uprv_strcmp("ps", locName)==0 ||            /* Add in #10222 with CLDR 24 integration */
1430                uprv_strcmp("si", locName)==0 ||
1431                uprv_strcmp("si_LK", locName)==0 ||
1432                uprv_strcmp("sr_Latn", locName)==0 ||       /* Add due to import per cldrbug 5647 */
1433                uprv_strcmp("th", locName)==0 ||
1434                uprv_strcmp("th_TH", locName)==0 ||
1435                uprv_strcmp("zh", locName)==0 ||
1436                uprv_strcmp("zh_Hant", locName)==0
1437            ) {
1438              if(log_knownIssue("6040", NULL)) {
1439                log_verbose("Can't test %s - TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
1440                continue;
1441              }
1442            }
1443            log_verbose("Testing locale %s\n", locName);
1444            status = U_ZERO_ERROR;
1445            coll = ucol_open(locName, &status);
1446            if(U_SUCCESS(status)) {
1447              if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) {
1448                if(coll->image->jamoSpecial == TRUE) {
1449                  log_err("%s has special JAMOs\n", locName);
1450                }
1451                ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1452                testCollator(coll, &status);
1453                testCEs(coll, &status);
1454              } else {
1455                log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
1456              }
1457              ucol_close(coll);
1458            } else {
1459              log_err("Could not open %s: %s\n", locName, u_errorName(status));
1460            }
1461        }
1462    }
1463
1464    for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1465        log_verbose("Testing rule: %s\n", rulesToTest[i]);
1466        ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1467        status = U_ZERO_ERROR;
1468        coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1469        if(U_SUCCESS(status)) {
1470            testCollator(coll, &status);
1471            testCEs(coll, &status);
1472            ucol_close(coll);
1473        } else {
1474          log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]);
1475        }
1476    }
1477
1478}
1479
1480static void IsTailoredTest(void) {
1481    UErrorCode status = U_ZERO_ERROR;
1482    uint32_t i = 0;
1483    UCollator *coll = NULL;
1484    UChar rule[2048];
1485    UChar tailored[2048];
1486    UChar notTailored[2048];
1487    uint32_t ruleLen, tailoredLen, notTailoredLen;
1488
1489    log_verbose("IsTailoredTest\n");
1490
1491    u_uastrcpy(rule, "&Z < A, B, C;c < d");
1492    ruleLen = u_strlen(rule);
1493
1494    u_uastrcpy(tailored, "ABCcd");
1495    tailoredLen = u_strlen(tailored);
1496
1497    u_uastrcpy(notTailored, "ZabD");
1498    notTailoredLen = u_strlen(notTailored);
1499
1500    coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1501    if(U_SUCCESS(status)) {
1502        for(i = 0; i<tailoredLen; i++) {
1503            if(!ucol_isTailored(coll, tailored[i], &status)) {
1504                log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1505            }
1506        }
1507        for(i = 0; i<notTailoredLen; i++) {
1508            if(ucol_isTailored(coll, notTailored[i], &status)) {
1509                log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1510            }
1511        }
1512        ucol_close(coll);
1513    }
1514    else {
1515        log_err_status(status, "Can't tailor rules\n");
1516    }
1517    /* Code coverage */
1518    status = U_ZERO_ERROR;
1519    coll = ucol_open("ja", &status);
1520    if(!ucol_isTailored(coll, 0x4E9C, &status)) {
1521        log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
1522    }
1523    ucol_close(coll);
1524}
1525
1526
1527const static char chTest[][20] = {
1528  "c",
1529  "C",
1530  "ca", "cb", "cx", "cy", "CZ",
1531  "c\\u030C", "C\\u030C",
1532  "h",
1533  "H",
1534  "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1535  "ch", "cH", "Ch", "CH",
1536  "cha", "charly", "che", "chh", "chch", "chr",
1537  "i", "I", "iarly",
1538  "r", "R",
1539  "r\\u030C", "R\\u030C",
1540  "s",
1541  "S",
1542  "s\\u030C", "S\\u030C",
1543  "z", "Z",
1544  "z\\u030C", "Z\\u030C"
1545};
1546
1547static void TestChMove(void) {
1548    UChar t1[256] = {0};
1549    UChar t2[256] = {0};
1550
1551    uint32_t i = 0, j = 0;
1552    uint32_t size = 0;
1553    UErrorCode status = U_ZERO_ERROR;
1554
1555    UCollator *coll = ucol_open("cs", &status);
1556
1557    if(U_SUCCESS(status)) {
1558        size = sizeof(chTest)/sizeof(chTest[0]);
1559        for(i = 0; i < size-1; i++) {
1560            for(j = i+1; j < size; j++) {
1561                u_unescape(chTest[i], t1, 256);
1562                u_unescape(chTest[j], t2, 256);
1563                doTest(coll, t1, t2, UCOL_LESS);
1564            }
1565        }
1566    }
1567    else {
1568        log_data_err("Can't open collator");
1569    }
1570    ucol_close(coll);
1571}
1572
1573
1574
1575
1576const static char impTest[][20] = {
1577  "\\u4e00",
1578    "a",
1579    "A",
1580    "b",
1581    "B",
1582    "\\u4e01"
1583};
1584
1585
1586static void TestImplicitTailoring(void) {
1587  static const struct {
1588    const char *rules;
1589    const char *data[10];
1590    const uint32_t len;
1591  } tests[] = {
1592      { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1593      { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1594      { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1595      { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1596  };
1597
1598  int32_t i = 0;
1599
1600  for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1601      genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1602  }
1603
1604/*
1605  UChar t1[256] = {0};
1606  UChar t2[256] = {0};
1607
1608  const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1609
1610  uint32_t i = 0, j = 0;
1611  uint32_t size = 0;
1612  uint32_t ruleLen = 0;
1613  UErrorCode status = U_ZERO_ERROR;
1614  UCollator *coll = NULL;
1615  ruleLen = u_unescape(rule, t1, 256);
1616
1617  coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1618
1619  if(U_SUCCESS(status)) {
1620    size = sizeof(impTest)/sizeof(impTest[0]);
1621    for(i = 0; i < size-1; i++) {
1622      for(j = i+1; j < size; j++) {
1623        u_unescape(impTest[i], t1, 256);
1624        u_unescape(impTest[j], t2, 256);
1625        doTest(coll, t1, t2, UCOL_LESS);
1626      }
1627    }
1628  }
1629  else {
1630    log_err("Can't open collator");
1631  }
1632  ucol_close(coll);
1633  */
1634}
1635
1636static void TestFCDProblem(void) {
1637  UChar t1[256] = {0};
1638  UChar t2[256] = {0};
1639
1640  const char *s1 = "\\u0430\\u0306\\u0325";
1641  const char *s2 = "\\u04D1\\u0325";
1642
1643  UErrorCode status = U_ZERO_ERROR;
1644  UCollator *coll = ucol_open("", &status);
1645  u_unescape(s1, t1, 256);
1646  u_unescape(s2, t2, 256);
1647
1648  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1649  doTest(coll, t1, t2, UCOL_EQUAL);
1650
1651  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1652  doTest(coll, t1, t2, UCOL_EQUAL);
1653
1654  ucol_close(coll);
1655}
1656
1657/*
1658The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
1659We're only using NFC/NFD in this test.
1660*/
1661#define NORM_BUFFER_TEST_LEN 18
1662typedef struct {
1663  UChar32 u;
1664  UChar NFC[NORM_BUFFER_TEST_LEN];
1665  UChar NFD[NORM_BUFFER_TEST_LEN];
1666} tester;
1667
1668static void TestComposeDecompose(void) {
1669    /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
1670    static const UChar UNICODESET_STR[] = {
1671        0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
1672        0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
1673        0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
1674    };
1675    int32_t noOfLoc;
1676    int32_t i = 0, j = 0;
1677
1678    UErrorCode status = U_ZERO_ERROR;
1679    const char *locName = NULL;
1680    uint32_t nfcSize;
1681    uint32_t nfdSize;
1682    tester **t;
1683    uint32_t noCases = 0;
1684    UCollator *coll = NULL;
1685    UChar32 u = 0;
1686    UChar comp[NORM_BUFFER_TEST_LEN];
1687    uint32_t len = 0;
1688    UCollationElements *iter;
1689    USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
1690    int32_t charsToTestSize;
1691
1692    noOfLoc = uloc_countAvailable();
1693
1694    coll = ucol_open("", &status);
1695    if (U_FAILURE(status)) {
1696        log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
1697        return;
1698    }
1699    charsToTestSize = uset_size(charsToTest);
1700    if (charsToTestSize <= 0) {
1701        log_err("Set was zero. Missing data?\n");
1702        return;
1703    }
1704    t = (tester **)malloc(charsToTestSize * sizeof(tester *));
1705    t[0] = (tester *)malloc(sizeof(tester));
1706    log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
1707
1708    for(u = 0; u < charsToTestSize; u++) {
1709        UChar32 ch = uset_charAt(charsToTest, u);
1710        len = 0;
1711        U16_APPEND_UNSAFE(comp, len, ch);
1712        nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1713        nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1714
1715        if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1716          || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1717            t[noCases]->u = ch;
1718            if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1719                u_strncpy(t[noCases]->NFC, comp, len);
1720                t[noCases]->NFC[len] = 0;
1721            }
1722            noCases++;
1723            t[noCases] = (tester *)malloc(sizeof(tester));
1724            uprv_memset(t[noCases], 0, sizeof(tester));
1725        }
1726    }
1727    log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
1728    uset_close(charsToTest);
1729    charsToTest = NULL;
1730
1731    for(u=0; u<(UChar32)noCases; u++) {
1732        if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1733            log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1734            doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1735        }
1736    }
1737    /*
1738    for(u = 0; u < charsToTestSize; u++) {
1739      if(!(u&0xFFFF)) {
1740        log_verbose("%08X ", u);
1741      }
1742      uprv_memset(t[noCases], 0, sizeof(tester));
1743      t[noCases]->u = u;
1744      len = 0;
1745      U16_APPEND_UNSAFE(comp, len, u);
1746      comp[len] = 0;
1747      nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1748      nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1749      doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1750      doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1751    }
1752    */
1753
1754    ucol_close(coll);
1755
1756    log_verbose("Testing locales, number of cases = %i\n", noCases);
1757    for(i = 0; i<noOfLoc; i++) {
1758        status = U_ZERO_ERROR;
1759        locName = uloc_getAvailable(i);
1760        if(hasCollationElements(locName)) {
1761            char cName[256];
1762            UChar name[256];
1763            int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1764
1765            for(j = 0; j<nameSize; j++) {
1766                cName[j] = (char)name[j];
1767            }
1768            cName[nameSize] = 0;
1769            log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1770
1771            coll = ucol_open(locName, &status);
1772            ucol_setStrength(coll, UCOL_IDENTICAL);
1773            iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1774
1775            for(u=0; u<(UChar32)noCases; u++) {
1776                if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1777                    log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1778                    doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1779                    log_verbose("Testing NFC\n");
1780                    ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
1781                    backAndForth(iter);
1782                    log_verbose("Testing NFD\n");
1783                    ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1784                    backAndForth(iter);
1785                }
1786            }
1787            ucol_closeElements(iter);
1788            ucol_close(coll);
1789        }
1790    }
1791    for(u = 0; u <= (UChar32)noCases; u++) {
1792        free(t[u]);
1793    }
1794    free(t);
1795}
1796
1797static void TestEmptyRule(void) {
1798  UErrorCode status = U_ZERO_ERROR;
1799  UChar rulez[] = { 0 };
1800  UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1801
1802  ucol_close(coll);
1803}
1804
1805static void TestUCARules(void) {
1806  UErrorCode status = U_ZERO_ERROR;
1807  UChar b[256];
1808  UChar *rules = b;
1809  uint32_t ruleLen = 0;
1810  UCollator *UCAfromRules = NULL;
1811  UCollator *coll = ucol_open("", &status);
1812  if(status == U_FILE_ACCESS_ERROR) {
1813    log_data_err("Is your data around?\n");
1814    return;
1815  } else if(U_FAILURE(status)) {
1816    log_err("Error opening collator\n");
1817    return;
1818  }
1819  ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1820
1821  log_verbose("TestUCARules\n");
1822  if(ruleLen > 256) {
1823    rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1824    ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1825  }
1826  log_verbose("Rules length is %d\n", ruleLen);
1827  UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1828  if(U_SUCCESS(status)) {
1829    ucol_close(UCAfromRules);
1830  } else {
1831    log_verbose("Unable to create a collator from UCARules!\n");
1832  }
1833/*
1834  u_unescape(blah, b, 256);
1835  ucol_getSortKey(coll, b, 1, res, 256);
1836*/
1837  ucol_close(coll);
1838  if(rules != b) {
1839    free(rules);
1840  }
1841}
1842
1843
1844/* Pinyin tonal order */
1845/*
1846    A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1847          (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
1848    E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1849    I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1850    O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1851    U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1852      < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1853.. (\u00fc)
1854
1855However, in testing we got the following order:
1856    A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1857          (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
1858    E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1859.. (\u0113)
1860    I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1861    O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1862    U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1863.. (\u01d8)
1864      < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1865*/
1866
1867static void TestBefore(void) {
1868  const static char *data[] = {
1869      "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1870      "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1871      "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1872      "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1873      "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1874      "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1875  };
1876  genericRulesStarter(
1877    "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1878    "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1879    "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1880    "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1881    "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1882    "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1883    data, sizeof(data)/sizeof(data[0]));
1884}
1885
1886#if 0
1887/* superceded by TestBeforePinyin */
1888static void TestJ784(void) {
1889  const static char *data[] = {
1890      "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1891      "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1892      "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1893      "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1894      "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1895      "\\u00fc",
1896           "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1897  };
1898  genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1899}
1900#endif
1901
1902#if 0
1903/* superceded by the changes to the lv locale */
1904static void TestJ831(void) {
1905  const static char *data[] = {
1906    "I",
1907      "i",
1908      "Y",
1909      "y"
1910  };
1911  genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1912}
1913#endif
1914
1915static void TestJ815(void) {
1916  const static char *data[] = {
1917    "aa",
1918      "Aa",
1919      "ab",
1920      "Ab",
1921      "ad",
1922      "Ad",
1923      "ae",
1924      "Ae",
1925      "\\u00e6",
1926      "\\u00c6",
1927      "af",
1928      "Af",
1929      "b",
1930      "B"
1931  };
1932  genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1933  genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1934}
1935
1936
1937/*
1938"& a < b < c < d& r < c",                                   "& a < b < d& r < c",
1939"& a < b < c < d& c < m",                                   "& a < b < c < m < d",
1940"& a < b < c < d& a < m",                                   "& a < m < b < c < d",
1941"& a <<< b << c < d& a < m",                                "& a <<< b << c < m < d",
1942"& a < b < c < d& [before 1] c < m",                        "& a < b < m < c < d",
1943"& a < b <<< c << d <<< e& [before 3] e <<< x",            "& a < b <<< c << d <<< x <<< e",
1944"& a < b <<< c << d <<< e& [before 2] e <<< x",            "& a < b <<< c <<< x << d <<< e",
1945"& a < b <<< c << d <<< e& [before 1] e <<< x",            "& a <<< x < b <<< c << d <<< e",
1946"& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",    "& a < b <<< c << d <<< e <<< f < x < g",
1947*/
1948static void TestRedundantRules(void) {
1949  int32_t i;
1950
1951  static const struct {
1952      const char *rules;
1953      const char *expectedRules;
1954      const char *testdata[8];
1955      uint32_t testdatalen;
1956  } tests[] = {
1957    /* this test conflicts with positioning of CODAN placeholder */
1958       /*{
1959        "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1960        "&\\u2089<<<x",
1961        {"\\u2089", "x"}, 2
1962       }, */
1963    /* this test conflicts with the [before x] syntax tightening */
1964      /*{
1965        "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1966        "&\\u0252<<<x",
1967        {"\\u0252", "x"}, 2
1968      }, */
1969    /* this test conflicts with the [before x] syntax tightening */
1970      /*{
1971         "& a < b <<< c << d <<< e& [before 1] e <<< x",
1972         "& a <<< x < b <<< c << d <<< e",
1973        {"a", "x", "b", "c", "d", "e"}, 6
1974      }, */
1975      {
1976        "& a < b < c < d& [before 1] c < m",
1977        "& a < b < m < c < d",
1978        {"a", "b", "m", "c", "d"}, 5
1979      },
1980      {
1981        "& a < b <<< c << d <<< e& [before 3] e <<< x",
1982        "& a < b <<< c << d <<< x <<< e",
1983        {"a", "b", "c", "d", "x", "e"}, 6
1984      },
1985    /* this test conflicts with the [before x] syntax tightening */
1986      /* {
1987        "& a < b <<< c << d <<< e& [before 2] e <<< x",
1988        "& a < b <<< c <<< x << d <<< e",
1989        {"a", "b", "c", "x", "d", "e"},, 6
1990      }, */
1991      {
1992        "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1993        "& a < b <<< c << d <<< e <<< f < x < g",
1994        {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1995      },
1996      {
1997        "& a <<< b << c < d& a < m",
1998        "& a <<< b << c < m < d",
1999        {"a", "b", "c", "m", "d"}, 5
2000      },
2001      {
2002        "&a<b<<b\\u0301 &z<b",
2003        "&a<b\\u0301 &z<b",
2004        {"a", "b\\u0301", "z", "b"}, 4
2005      },
2006      {
2007        "&z<m<<<q<<<m",
2008        "&z<q<<<m",
2009        {"z", "q", "m"},3
2010      },
2011      {
2012        "&z<<<m<q<<<m",
2013        "&z<q<<<m",
2014        {"z", "q", "m"}, 3
2015      },
2016      {
2017        "& a < b < c < d& r < c",
2018        "& a < b < d& r < c",
2019        {"a", "b", "d"}, 3
2020      },
2021      {
2022        "& a < b < c < d& r < c",
2023        "& a < b < d& r < c",
2024        {"r", "c"}, 2
2025      },
2026      {
2027        "& a < b < c < d& c < m",
2028        "& a < b < c < m < d",
2029        {"a", "b", "c", "m", "d"}, 5
2030      },
2031      {
2032        "& a < b < c < d& a < m",
2033        "& a < m < b < c < d",
2034        {"a", "m", "b", "c", "d"}, 5
2035      }
2036  };
2037
2038
2039  UCollator *credundant = NULL;
2040  UCollator *cresulting = NULL;
2041  UErrorCode status = U_ZERO_ERROR;
2042  UChar rlz[2048] = { 0 };
2043  uint32_t rlen = 0;
2044
2045  for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
2046    log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
2047    rlen = u_unescape(tests[i].rules, rlz, 2048);
2048
2049    credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2050    if(status == U_FILE_ACCESS_ERROR) {
2051      log_data_err("Is your data around?\n");
2052      return;
2053    } else if(U_FAILURE(status)) {
2054      log_err("Error opening collator\n");
2055      return;
2056    }
2057
2058    rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
2059    cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2060
2061    testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
2062
2063    ucol_close(credundant);
2064    ucol_close(cresulting);
2065
2066    log_verbose("testing using data\n");
2067
2068    genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
2069  }
2070
2071}
2072
2073static void TestExpansionSyntax(void) {
2074  int32_t i;
2075
2076  const static char *rules[] = {
2077    "&AE <<< a << b <<< c &d <<< f",
2078    "&AE <<< a <<< b << c << d < e < f <<< g",
2079    "&AE <<< B <<< C / D <<< F"
2080  };
2081
2082  const static char *expectedRules[] = {
2083    "&A <<< a / E << b / E <<< c /E  &d <<< f",
2084    "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2085    "&A <<< B / E <<< C / ED <<< F / E"
2086  };
2087
2088  const static char *testdata[][8] = {
2089    {"AE", "a", "b", "c"},
2090    {"AE", "a", "b", "c", "d", "e", "f", "g"},
2091    {"AE", "B", "C"} /* / ED <<< F / E"},*/
2092  };
2093
2094  const static uint32_t testdatalen[] = {
2095      4,
2096      8,
2097      3
2098  };
2099
2100
2101
2102  UCollator *credundant = NULL;
2103  UCollator *cresulting = NULL;
2104  UErrorCode status = U_ZERO_ERROR;
2105  UChar rlz[2048] = { 0 };
2106  uint32_t rlen = 0;
2107
2108  for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2109    log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
2110    rlen = u_unescape(rules[i], rlz, 2048);
2111
2112    credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2113    if(status == U_FILE_ACCESS_ERROR) {
2114      log_data_err("Is your data around?\n");
2115      return;
2116    } else if(U_FAILURE(status)) {
2117      log_err("Error opening collator\n");
2118      return;
2119    }
2120    rlen = u_unescape(expectedRules[i], rlz, 2048);
2121    cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2122
2123    /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2124    /* as a hard error test, but only in information mode */
2125    testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2126
2127    ucol_close(credundant);
2128    ucol_close(cresulting);
2129
2130    log_verbose("testing using data\n");
2131
2132    genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2133  }
2134}
2135
2136static void TestCase(void)
2137{
2138    const static UChar gRules[MAX_TOKEN_LEN] =
2139    /*" & 0 < 1,\u2461<a,A"*/
2140    { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2141
2142    const static UChar testCase[][MAX_TOKEN_LEN] =
2143    {
2144        /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2145        /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2146        /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2147        /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2148    };
2149
2150    const static UCollationResult caseTestResults[][9] =
2151    {
2152        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2153        { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
2154        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2155        { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
2156    };
2157
2158    const static UColAttributeValue caseTestAttributes[][2] =
2159    {
2160        { UCOL_LOWER_FIRST, UCOL_OFF},
2161        { UCOL_UPPER_FIRST, UCOL_OFF},
2162        { UCOL_LOWER_FIRST, UCOL_ON},
2163        { UCOL_UPPER_FIRST, UCOL_ON}
2164    };
2165    int32_t i,j,k;
2166    UErrorCode status = U_ZERO_ERROR;
2167    UCollationElements *iter;
2168    UCollator  *myCollation;
2169    myCollation = ucol_open("en_US", &status);
2170
2171    if(U_FAILURE(status)){
2172        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2173        return;
2174    }
2175    log_verbose("Testing different case settings\n");
2176    ucol_setStrength(myCollation, UCOL_TERTIARY);
2177
2178    for(k = 0; k<4; k++) {
2179      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2180      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2181      log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2182      for (i = 0; i < 3 ; i++) {
2183        for(j = i+1; j<4; j++) {
2184          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2185        }
2186      }
2187    }
2188    ucol_close(myCollation);
2189
2190    myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2191    if(U_FAILURE(status)){
2192        log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2193        return;
2194    }
2195    log_verbose("Testing different case settings with custom rules\n");
2196    ucol_setStrength(myCollation, UCOL_TERTIARY);
2197
2198    for(k = 0; k<4; k++) {
2199      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2200      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2201      for (i = 0; i < 3 ; i++) {
2202        for(j = i+1; j<4; j++) {
2203          log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2204          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2205          iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
2206          backAndForth(iter);
2207          ucol_closeElements(iter);
2208          iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
2209          backAndForth(iter);
2210          ucol_closeElements(iter);
2211        }
2212      }
2213    }
2214    ucol_close(myCollation);
2215    {
2216      const static char *lowerFirst[] = {
2217        "h",
2218        "H",
2219        "ch",
2220        "Ch",
2221        "CH",
2222        "cha",
2223        "chA",
2224        "Cha",
2225        "ChA",
2226        "CHa",
2227        "CHA",
2228        "i",
2229        "I"
2230      };
2231
2232      const static char *upperFirst[] = {
2233        "H",
2234        "h",
2235        "CH",
2236        "Ch",
2237        "ch",
2238        "CHA",
2239        "CHa",
2240        "ChA",
2241        "Cha",
2242        "chA",
2243        "cha",
2244        "I",
2245        "i"
2246      };
2247      log_verbose("mixed case test\n");
2248      log_verbose("lower first, case level off\n");
2249      genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2250      log_verbose("upper first, case level off\n");
2251      genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2252      log_verbose("lower first, case level on\n");
2253      genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2254      log_verbose("upper first, case level on\n");
2255      genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2256    }
2257
2258}
2259
2260static void TestIncrementalNormalize(void) {
2261
2262    /*UChar baseA     =0x61;*/
2263    UChar baseA     =0x41;
2264/*    UChar baseB     = 0x42;*/
2265    static const UChar ccMix[]   = {0x316, 0x321, 0x300};
2266    /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
2267    /*
2268        0x316 is combining grave accent below, cc=220
2269        0x321 is combining palatalized hook below, cc=202
2270        0x300 is combining grave accent, cc=230
2271    */
2272
2273#define MAXSLEN 2000
2274    /*int          maxSLen   = 64000;*/
2275    int          sLen;
2276    int          i;
2277
2278    UCollator        *coll;
2279    UErrorCode       status = U_ZERO_ERROR;
2280    UCollationResult result;
2281
2282    int32_t myQ = getTestOption(QUICK_OPTION);
2283
2284    if(getTestOption(QUICK_OPTION) < 0) {
2285        setTestOption(QUICK_OPTION, 1);
2286    }
2287
2288    {
2289        /* Test 1.  Run very long unnormalized strings, to force overflow of*/
2290        /*          most buffers along the way.*/
2291        UChar            strA[MAXSLEN+1];
2292        UChar            strB[MAXSLEN+1];
2293
2294        coll = ucol_open("en_US", &status);
2295        if(status == U_FILE_ACCESS_ERROR) {
2296          log_data_err("Is your data around?\n");
2297          return;
2298        } else if(U_FAILURE(status)) {
2299          log_err("Error opening collator\n");
2300          return;
2301        }
2302        ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2303
2304        /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
2305        /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2306        /*for (sLen = 1000; sLen<1001; sLen++) {*/
2307        for (sLen = 500; sLen<501; sLen++) {
2308        /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2309            strA[0] = baseA;
2310            strB[0] = baseA;
2311            for (i=1; i<=sLen-1; i++) {
2312                strA[i] = ccMix[i % 3];
2313                strB[sLen-i] = ccMix[i % 3];
2314            }
2315            strA[sLen]   = 0;
2316            strB[sLen]   = 0;
2317
2318            ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
2319            doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
2320            ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
2321            doTest(coll, strA, strB, UCOL_EQUAL);
2322        }
2323    }
2324
2325    setTestOption(QUICK_OPTION, myQ);
2326
2327
2328    /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
2329    /*         of the string.  Checks a couple of edge cases.*/
2330
2331    {
2332        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2333        static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
2334        ucol_setStrength(coll, UCOL_TERTIARY);
2335        doTest(coll, strA, strB, UCOL_EQUAL);
2336    }
2337
2338    /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
2339
2340    {
2341      /* New UCA  3.1.1.
2342       * test below used a code point from Desseret, which sorts differently
2343       * than d800 dc00
2344       */
2345        /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2346        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2347        static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2348        ucol_setStrength(coll, UCOL_TERTIARY);
2349        doTest(coll, strA, strB, UCOL_GREATER);
2350    }
2351
2352    /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
2353
2354    {
2355        static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2356        static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2357        char  sortKeyA[50];
2358        char  sortKeyAz[50];
2359        char  sortKeyB[50];
2360        char  sortKeyBz[50];
2361        int   r;
2362
2363        /* there used to be -3 here. Hmmmm.... */
2364        /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2365        result = ucol_strcoll(coll, strA, 3, strB, 3);
2366        if (result != UCOL_GREATER) {
2367            log_err("ERROR 1 in test 4\n");
2368        }
2369        result = ucol_strcoll(coll, strA, -1, strB, -1);
2370        if (result != UCOL_EQUAL) {
2371            log_err("ERROR 2 in test 4\n");
2372        }
2373
2374        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2375        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2376        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2377        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2378
2379        r = strcmp(sortKeyA, sortKeyAz);
2380        if (r <= 0) {
2381            log_err("Error 3 in test 4\n");
2382        }
2383        r = strcmp(sortKeyA, sortKeyB);
2384        if (r <= 0) {
2385            log_err("Error 4 in test 4\n");
2386        }
2387        r = strcmp(sortKeyAz, sortKeyBz);
2388        if (r != 0) {
2389            log_err("Error 5 in test 4\n");
2390        }
2391
2392        ucol_setStrength(coll, UCOL_IDENTICAL);
2393        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2394        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2395        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2396        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2397
2398        r = strcmp(sortKeyA, sortKeyAz);
2399        if (r <= 0) {
2400            log_err("Error 6 in test 4\n");
2401        }
2402        r = strcmp(sortKeyA, sortKeyB);
2403        if (r <= 0) {
2404            log_err("Error 7 in test 4\n");
2405        }
2406        r = strcmp(sortKeyAz, sortKeyBz);
2407        if (r != 0) {
2408            log_err("Error 8 in test 4\n");
2409        }
2410        ucol_setStrength(coll, UCOL_TERTIARY);
2411    }
2412
2413
2414    /*  Test 5:  Null characters in non-normal source strings.*/
2415
2416    {
2417        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2418        static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2419        char  sortKeyA[50];
2420        char  sortKeyAz[50];
2421        char  sortKeyB[50];
2422        char  sortKeyBz[50];
2423        int   r;
2424
2425        result = ucol_strcoll(coll, strA, 6, strB, 6);
2426        if (result != UCOL_GREATER) {
2427            log_err("ERROR 1 in test 5\n");
2428        }
2429        result = ucol_strcoll(coll, strA, -1, strB, -1);
2430        if (result != UCOL_EQUAL) {
2431            log_err("ERROR 2 in test 5\n");
2432        }
2433
2434        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2435        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2436        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2437        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2438
2439        r = strcmp(sortKeyA, sortKeyAz);
2440        if (r <= 0) {
2441            log_err("Error 3 in test 5\n");
2442        }
2443        r = strcmp(sortKeyA, sortKeyB);
2444        if (r <= 0) {
2445            log_err("Error 4 in test 5\n");
2446        }
2447        r = strcmp(sortKeyAz, sortKeyBz);
2448        if (r != 0) {
2449            log_err("Error 5 in test 5\n");
2450        }
2451
2452        ucol_setStrength(coll, UCOL_IDENTICAL);
2453        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2454        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2455        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2456        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2457
2458        r = strcmp(sortKeyA, sortKeyAz);
2459        if (r <= 0) {
2460            log_err("Error 6 in test 5\n");
2461        }
2462        r = strcmp(sortKeyA, sortKeyB);
2463        if (r <= 0) {
2464            log_err("Error 7 in test 5\n");
2465        }
2466        r = strcmp(sortKeyAz, sortKeyBz);
2467        if (r != 0) {
2468            log_err("Error 8 in test 5\n");
2469        }
2470        ucol_setStrength(coll, UCOL_TERTIARY);
2471    }
2472
2473
2474    /*  Test 6:  Null character as base of a non-normal combining sequence.*/
2475
2476    {
2477        static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2478        static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2479
2480        result = ucol_strcoll(coll, strA, 5, strB, 5);
2481        if (result != UCOL_LESS) {
2482            log_err("Error 1 in test 6\n");
2483        }
2484        result = ucol_strcoll(coll, strA, -1, strB, -1);
2485        if (result != UCOL_EQUAL) {
2486            log_err("Error 2 in test 6\n");
2487        }
2488    }
2489
2490    ucol_close(coll);
2491}
2492
2493
2494
2495#if 0
2496static void TestGetCaseBit(void) {
2497  static const char *caseBitData[] = {
2498    "a", "A", "ch", "Ch", "CH",
2499      "\\uFF9E", "\\u0009"
2500  };
2501
2502  static const uint8_t results[] = {
2503    UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2504      UCOL_UPPER_CASE, UCOL_LOWER_CASE
2505  };
2506
2507  uint32_t i, blen = 0;
2508  UChar b[256] = {0};
2509  UErrorCode status = U_ZERO_ERROR;
2510  UCollator *UCA = ucol_open("", &status);
2511  uint8_t res = 0;
2512
2513  for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2514    blen = u_unescape(caseBitData[i], b, 256);
2515    res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2516    if(results[i] != res) {
2517      log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2518    }
2519  }
2520}
2521#endif
2522
2523static void TestHangulTailoring(void) {
2524    static const char *koreanData[] = {
2525        "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2526            "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2527            "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2528            "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2529            "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2530            "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2531    };
2532
2533    const char *rules =
2534        "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2535        "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2536        "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2537        "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2538        "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2539        "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2540
2541
2542  UErrorCode status = U_ZERO_ERROR;
2543  UChar rlz[2048] = { 0 };
2544  uint32_t rlen = u_unescape(rules, rlz, 2048);
2545
2546  UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2547  if(status == U_FILE_ACCESS_ERROR) {
2548    log_data_err("Is your data around?\n");
2549    return;
2550  } else if(U_FAILURE(status)) {
2551    log_err("Error opening collator\n");
2552    return;
2553  }
2554
2555  log_verbose("Using start of korean rules\n");
2556
2557  if(U_SUCCESS(status)) {
2558    genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2559  } else {
2560    log_err("Unable to open collator with rules %s\n", rules);
2561  }
2562
2563  log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2564  ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home  */
2565  genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2566
2567  ucol_close(coll);
2568
2569  log_verbose("Using ko__LOTUS locale\n");
2570  genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2571}
2572
2573static void TestCompressOverlap(void) {
2574    UChar       secstr[150];
2575    UChar       tertstr[150];
2576    UErrorCode  status = U_ZERO_ERROR;
2577    UCollator  *coll;
2578    char        result[200];
2579    uint32_t    resultlen;
2580    int         count = 0;
2581    char       *tempptr;
2582
2583    coll = ucol_open("", &status);
2584
2585    if (U_FAILURE(status)) {
2586        log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
2587        return;
2588    }
2589    while (count < 149) {
2590        secstr[count] = 0x0020; /* [06, 05, 05] */
2591        tertstr[count] = 0x0020;
2592        count ++;
2593    }
2594
2595    /* top down compression ----------------------------------- */
2596    secstr[count] = 0x0332; /* [, 87, 05] */
2597    tertstr[count] = 0x3000; /* [06, 05, 07] */
2598
2599    /* no compression secstr should have 150 secondary bytes, tertstr should
2600    have 150 tertiary bytes.
2601    with correct overlapping compression, secstr should have 4 secondary
2602    bytes, tertstr should have > 2 tertiary bytes */
2603    resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2604    (void)resultlen;    /* Suppress set but not used warning. */
2605    tempptr = uprv_strchr(result, 1) + 1;
2606    while (*(tempptr + 1) != 1) {
2607        /* the last secondary collation element is not checked since it is not
2608        part of the compression */
2609        if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2610            log_err("Secondary compression overlapped\n");
2611        }
2612        tempptr ++;
2613    }
2614
2615    /* tertiary top/bottom/common for en_US is similar to the secondary
2616    top/bottom/common */
2617    resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2618    tempptr = uprv_strrchr(result, 1) + 1;
2619    while (*(tempptr + 1) != 0) {
2620        /* the last secondary collation element is not checked since it is not
2621        part of the compression */
2622        if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2623            log_err("Tertiary compression overlapped\n");
2624        }
2625        tempptr ++;
2626    }
2627
2628    /* bottom up compression ------------------------------------- */
2629    secstr[count] = 0;
2630    tertstr[count] = 0;
2631    resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2632    tempptr = uprv_strchr(result, 1) + 1;
2633    while (*(tempptr + 1) != 1) {
2634        /* the last secondary collation element is not checked since it is not
2635        part of the compression */
2636        if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2637            log_err("Secondary compression overlapped\n");
2638        }
2639        tempptr ++;
2640    }
2641
2642    /* tertiary top/bottom/common for en_US is similar to the secondary
2643    top/bottom/common */
2644    resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2645    tempptr = uprv_strrchr(result, 1) + 1;
2646    while (*(tempptr + 1) != 0) {
2647        /* the last secondary collation element is not checked since it is not
2648        part of the compression */
2649        if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2650            log_err("Tertiary compression overlapped\n");
2651        }
2652        tempptr ++;
2653    }
2654
2655    ucol_close(coll);
2656}
2657
2658static void TestCyrillicTailoring(void) {
2659  static const char *test[] = {
2660    "\\u0410b",
2661      "\\u0410\\u0306a",
2662      "\\u04d0A"
2663  };
2664
2665    /* Russian overrides contractions, so this test is not valid anymore */
2666    /*genericLocaleStarter("ru", test, 3);*/
2667
2668    genericLocaleStarter("root", test, 3);
2669    genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2670    genericRulesStarter("&Z < \\u0410", test, 3);
2671    genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2672    genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2673    genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2674    genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2675}
2676
2677static void TestSuppressContractions(void) {
2678
2679  static const char *testNoCont2[] = {
2680      "\\u0410\\u0302a",
2681      "\\u0410\\u0306b",
2682      "\\u0410c"
2683  };
2684  static const char *testNoCont[] = {
2685      "a\\u0410",
2686      "A\\u0410\\u0306",
2687      "\\uFF21\\u0410\\u0302"
2688  };
2689
2690  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2691  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2692}
2693
2694static void TestContraction(void) {
2695    const static char *testrules[] = {
2696        "&A = AB / B",
2697        "&A = A\\u0306/\\u0306",
2698        "&c = ch / h"
2699    };
2700    const static UChar testdata[][2] = {
2701        {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2702        {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2703        {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2704    };
2705    const static UChar testdata2[][2] = {
2706        {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2707        {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2708        {0x0063 /* 'c' */, 0x006C /* 'l' */}
2709    };
2710    const static char *testrules3[] = {
2711        "&z < xyz &xyzw << B",
2712        "&z < xyz &xyz << B / w",
2713        "&z < ch &achm << B",
2714        "&z < ch &a << B / chm",
2715        "&\\ud800\\udc00w << B",
2716        "&\\ud800\\udc00 << B / w",
2717        "&a\\ud800\\udc00m << B",
2718        "&a << B / \\ud800\\udc00m",
2719    };
2720
2721    UErrorCode  status   = U_ZERO_ERROR;
2722    UCollator  *coll;
2723    UChar       rule[256] = {0};
2724    uint32_t    rlen     = 0;
2725    int         i;
2726
2727    for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2728        UCollationElements *iter1;
2729        int j = 0;
2730        log_verbose("Rule %s for testing\n", testrules[i]);
2731        rlen = u_unescape(testrules[i], rule, 32);
2732        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2733        if (U_FAILURE(status)) {
2734            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2735            return;
2736        }
2737        iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2738        if (U_FAILURE(status)) {
2739            log_err("Collation iterator creation failed\n");
2740            return;
2741        }
2742        while (j < 2) {
2743            UCollationElements *iter2 = ucol_openElements(coll,
2744                                                         &(testdata[i][j]),
2745                                                         1, &status);
2746            uint32_t ce;
2747            if (U_FAILURE(status)) {
2748                log_err("Collation iterator creation failed\n");
2749                return;
2750            }
2751            ce = ucol_next(iter2, &status);
2752            while (ce != UCOL_NULLORDER) {
2753                if ((uint32_t)ucol_next(iter1, &status) != ce) {
2754                    log_err("Collation elements in contraction split does not match\n");
2755                    return;
2756                }
2757                ce = ucol_next(iter2, &status);
2758            }
2759            j ++;
2760            ucol_closeElements(iter2);
2761        }
2762        if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2763            log_err("Collation elements not exhausted\n");
2764            return;
2765        }
2766        ucol_closeElements(iter1);
2767        ucol_close(coll);
2768    }
2769
2770    rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2771    coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2772    if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2773        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2774                testdata2[0][0], testdata2[0][1], testdata2[1][0],
2775                testdata2[1][1]);
2776        return;
2777    }
2778    if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2779        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2780                testdata2[1][0], testdata2[1][1], testdata2[2][0],
2781                testdata2[2][1]);
2782        return;
2783    }
2784    ucol_close(coll);
2785
2786    for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2787        UCollator          *coll1,
2788                           *coll2;
2789        UCollationElements *iter1,
2790                           *iter2;
2791        UChar               ch = 0x0042 /* 'B' */;
2792        uint32_t            ce;
2793        rlen = u_unescape(testrules3[i], rule, 32);
2794        coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2795        rlen = u_unescape(testrules3[i + 1], rule, 32);
2796        coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2797        if (U_FAILURE(status)) {
2798            log_err("Collator creation failed %s\n", testrules[i]);
2799            return;
2800        }
2801        iter1 = ucol_openElements(coll1, &ch, 1, &status);
2802        iter2 = ucol_openElements(coll2, &ch, 1, &status);
2803        if (U_FAILURE(status)) {
2804            log_err("Collation iterator creation failed\n");
2805            return;
2806        }
2807        ce = ucol_next(iter1, &status);
2808        if (U_FAILURE(status)) {
2809            log_err("Retrieving ces failed\n");
2810            return;
2811        }
2812        while (ce != UCOL_NULLORDER) {
2813            if (ce != (uint32_t)ucol_next(iter2, &status)) {
2814                log_err("CEs does not match\n");
2815                return;
2816            }
2817            ce = ucol_next(iter1, &status);
2818            if (U_FAILURE(status)) {
2819                log_err("Retrieving ces failed\n");
2820                return;
2821            }
2822        }
2823        if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2824            log_err("CEs not exhausted\n");
2825            return;
2826        }
2827        ucol_closeElements(iter1);
2828        ucol_closeElements(iter2);
2829        ucol_close(coll1);
2830        ucol_close(coll2);
2831    }
2832}
2833
2834static void TestExpansion(void) {
2835    const static char *testrules[] = {
2836        "&J << K / B & K << M",
2837        "&J << K / B << M"
2838    };
2839    const static UChar testdata[][3] = {
2840        {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2841        {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2842        {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2843        {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2844        {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2845        {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2846    };
2847
2848    UErrorCode  status   = U_ZERO_ERROR;
2849    UCollator  *coll;
2850    UChar       rule[256] = {0};
2851    uint32_t    rlen     = 0;
2852    int         i;
2853
2854    for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2855        int j = 0;
2856        log_verbose("Rule %s for testing\n", testrules[i]);
2857        rlen = u_unescape(testrules[i], rule, 32);
2858        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2859        if (U_FAILURE(status)) {
2860            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2861            return;
2862        }
2863
2864        for (j = 0; j < 5; j ++) {
2865            doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2866        }
2867        ucol_close(coll);
2868    }
2869}
2870
2871#if 0
2872/* this test tests the current limitations of the engine */
2873/* it always fail, so it is disabled by default */
2874static void TestLimitations(void) {
2875  /* recursive expansions */
2876  {
2877    static const char *rule = "&a=b/c&d=c/e";
2878    static const char *tlimit01[] = {"add","b","adf"};
2879    static const char *tlimit02[] = {"aa","b","af"};
2880    log_verbose("recursive expansions\n");
2881    genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2882    genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2883  }
2884  /* contractions spanning expansions */
2885  {
2886    static const char *rule = "&a<<<c/e&g<<<eh";
2887    static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2888    static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2889    log_verbose("contractions spanning expansions\n");
2890    genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2891    genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2892  }
2893  /* normalization: nulls in contractions */
2894  {
2895    static const char *rule = "&a<<<\\u0000\\u0302";
2896    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2897    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2898    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2899    static const UColAttributeValue valOn[] = { UCOL_ON };
2900    static const UColAttributeValue valOff[] = { UCOL_OFF };
2901
2902    log_verbose("NULL in contractions\n");
2903    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2904    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2905    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2906    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2907
2908  }
2909  /* normalization: contractions spanning normalization */
2910  {
2911    static const char *rule = "&a<<<\\u0000\\u0302";
2912    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2913    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2914    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2915    static const UColAttributeValue valOn[] = { UCOL_ON };
2916    static const UColAttributeValue valOff[] = { UCOL_OFF };
2917
2918    log_verbose("contractions spanning normalization\n");
2919    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2920    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2921    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2922    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2923
2924  }
2925  /* variable top:  */
2926  {
2927    /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2928    static const char *rule = "&\\u2010<x<[variable top]=z";
2929    /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2930    static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2931    static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2932    static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2933    static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
2934    static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
2935    static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
2936
2937    log_verbose("variable top\n");
2938    genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2939    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2940    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2941    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2942    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2943
2944  }
2945  /* case level */
2946  {
2947    static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
2948    static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
2949    static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
2950    static const UColAttribute att[] = { UCOL_CASE_FIRST};
2951    static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
2952    /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2953    log_verbose("case level\n");
2954    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2955    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2956    /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2957    /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2958  }
2959
2960}
2961#endif
2962
2963static void TestBocsuCoverage(void) {
2964  UErrorCode status = U_ZERO_ERROR;
2965  const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2966  UChar       test[256] = {0};
2967  uint32_t    tlen     = u_unescape(testString, test, 32);
2968  uint8_t key[256]     = {0};
2969  uint32_t klen         = 0;
2970
2971  UCollator *coll = ucol_open("", &status);
2972  if(U_SUCCESS(status)) {
2973  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
2974
2975  klen = ucol_getSortKey(coll, test, tlen, key, 256);
2976  (void)klen;    /* Suppress set but not used warning. */
2977
2978  ucol_close(coll);
2979  } else {
2980    log_data_err("Couldn't open UCA\n");
2981  }
2982}
2983
2984static void TestVariableTopSetting(void) {
2985  UErrorCode status = U_ZERO_ERROR;
2986  const UChar *current = NULL;
2987  uint32_t varTopOriginal = 0, varTop1, varTop2;
2988  UCollator *coll = ucol_open("", &status);
2989  if(U_SUCCESS(status)) {
2990
2991  uint32_t strength = 0;
2992  uint16_t specs = 0;
2993  uint32_t chOffset = 0;
2994  uint32_t chLen = 0;
2995  uint32_t exOffset = 0;
2996  uint32_t exLen = 0;
2997  uint32_t oldChOffset = 0;
2998  uint32_t oldChLen = 0;
2999  uint32_t oldExOffset = 0;
3000  uint32_t oldExLen = 0;
3001  uint32_t prefixOffset = 0;
3002  uint32_t prefixLen = 0;
3003
3004  UBool startOfRules = TRUE;
3005  UColTokenParser src;
3006  UColOptionSet opts;
3007
3008  UChar *rulesCopy = NULL;
3009  uint32_t rulesLen;
3010
3011  UCollationResult result;
3012
3013  UChar first[256] = { 0 };
3014  UChar second[256] = { 0 };
3015  UParseError parseError;
3016  int32_t myQ = getTestOption(QUICK_OPTION);
3017
3018  (void)prefixLen;        /* Suppress set but not used warnings. */
3019  (void)prefixOffset;
3020  (void)specs;
3021
3022  uprv_memset(&src, 0, sizeof(UColTokenParser));
3023
3024  src.opts = &opts;
3025
3026  if(getTestOption(QUICK_OPTION) <= 0) {
3027    setTestOption(QUICK_OPTION, 1);
3028  }
3029
3030  /* this test will fail when normalization is turned on */
3031  /* therefore we always turn off exhaustive mode for it */
3032  { /* QUICK > 0*/
3033    log_verbose("Slide variable top over UCARules\n");
3034    rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
3035    rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
3036    rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
3037
3038    if(U_SUCCESS(status) && rulesLen > 0) {
3039      ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
3040      src.current = src.source = rulesCopy;
3041      src.end = rulesCopy+rulesLen;
3042      src.extraCurrent = src.end;
3043      src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
3044
3045	  /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
3046	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
3047      while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
3048        strength = src.parsedToken.strength;
3049        chOffset = src.parsedToken.charsOffset;
3050        chLen = src.parsedToken.charsLen;
3051        exOffset = src.parsedToken.extensionOffset;
3052        exLen = src.parsedToken.extensionLen;
3053        prefixOffset = src.parsedToken.prefixOffset;
3054        prefixLen = src.parsedToken.prefixLen;
3055        specs = src.parsedToken.flags;
3056
3057        startOfRules = FALSE;
3058        {
3059          log_verbose("%04X %d ", *(src.source+chOffset), chLen);
3060        }
3061        if(strength == UCOL_PRIMARY) {
3062          status = U_ZERO_ERROR;
3063          varTopOriginal = ucol_getVariableTop(coll, &status);
3064          varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
3065          if(U_FAILURE(status)) {
3066            char buffer[256];
3067            char *buf = buffer;
3068            uint32_t i = 0, j;
3069            uint32_t CE = UCOL_NO_MORE_CES;
3070
3071            /* before we start screaming, let's see if there is a problem with the rules */
3072            UErrorCode collIterateStatus = U_ZERO_ERROR;
3073            collIterate *s = uprv_new_collIterate(&collIterateStatus);
3074            uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus);
3075
3076            CE = ucol_getNextCE(coll, s, &status);
3077            (void)CE;    /* Suppress set but not used warning. */
3078
3079            for(i = 0; i < oldChLen; i++) {
3080              j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
3081              buf += j;
3082            }
3083            if(status == U_PRIMARY_TOO_LONG_ERROR) {
3084              log_verbose("= Expected failure for %s =", buffer);
3085            } else {
3086              if(uprv_collIterateAtEnd(s)) {
3087                log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
3088                  oldChOffset, u_errorName(status), buffer);
3089              } else {
3090                log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3091                  buffer);
3092              }
3093            }
3094            uprv_delete_collIterate(s);
3095          }
3096          varTop2 = ucol_getVariableTop(coll, &status);
3097          if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
3098            log_err("cannot retrieve set varTop value!\n");
3099            continue;
3100          }
3101
3102          if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
3103
3104            u_strncpy(first, src.source+oldChOffset, oldChLen);
3105            u_strncpy(first+oldChLen, src.source+chOffset, chLen);
3106            u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
3107            first[2*oldChLen+chLen] = 0;
3108
3109            if(oldExLen == 0) {
3110              u_strncpy(second, src.source+chOffset, chLen);
3111              second[chLen] = 0;
3112            } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
3113              u_strncpy(second, src.source+oldExOffset, oldExLen);
3114              u_strncpy(second+oldChLen, src.source+chOffset, chLen);
3115              u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen);
3116              second[2*oldExLen+chLen] = 0;
3117            }
3118            result = ucol_strcoll(coll, first, -1, second, -1);
3119            if(result == UCOL_EQUAL) {
3120              doTest(coll, first, second, UCOL_EQUAL);
3121            } else {
3122              log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset));
3123            }
3124          }
3125        }
3126        if(strength != UCOL_TOK_RESET) {
3127          oldChOffset = chOffset;
3128          oldChLen = chLen;
3129          oldExOffset = exOffset;
3130          oldExLen = exLen;
3131        }
3132      }
3133      status = U_ZERO_ERROR;
3134    }
3135    else {
3136      log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3137      return;
3138    }
3139    if (U_FAILURE(status)) {
3140        log_err("Error parsing rules %s\n", u_errorName(status));
3141        return;
3142    }
3143    status = U_ZERO_ERROR;
3144  }
3145
3146  setTestOption(QUICK_OPTION, myQ);
3147
3148  log_verbose("Testing setting variable top to contractions\n");
3149  {
3150    UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3151    int32_t maxUCAContractionLength = coll->image->contractionUCACombosWidth;
3152    while(*conts != 0) {
3153      /*
3154       * A continuation is NUL-terminated and NUL-padded
3155       * except if it has the maximum length.
3156       */
3157      int32_t contractionLength = maxUCAContractionLength;
3158      while(contractionLength > 0 && conts[contractionLength - 1] == 0) {
3159        --contractionLength;
3160      }
3161      if(*(conts+1)==0) { /* pre-context */
3162        varTop1 = ucol_setVariableTop(coll, conts, 1, &status);
3163      } else {
3164        varTop1 = ucol_setVariableTop(coll, conts, contractionLength, &status);
3165      }
3166      if(U_FAILURE(status)) {
3167        if(status == U_PRIMARY_TOO_LONG_ERROR) {
3168          /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
3169           * therefore it is not an error when it complains about them. */
3170          log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
3171                      *conts, *(conts+1), *(conts+2));
3172        } else {
3173          log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
3174                  *conts, *(conts+1), *(conts+2), u_errorName(status));
3175        }
3176        status = U_ZERO_ERROR;
3177      }
3178      conts+=maxUCAContractionLength;
3179    }
3180
3181    status = U_ZERO_ERROR;
3182
3183    first[0] = 0x0040;
3184    first[1] = 0x0050;
3185    first[2] = 0x0000;
3186
3187    ucol_setVariableTop(coll, first, -1, &status);
3188
3189    if(U_SUCCESS(status)) {
3190      log_err("Invalid contraction succeded in setting variable top!\n");
3191    }
3192
3193  }
3194
3195  log_verbose("Test restoring variable top\n");
3196
3197  status = U_ZERO_ERROR;
3198  ucol_restoreVariableTop(coll, varTopOriginal, &status);
3199  if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3200    log_err("Couldn't restore old variable top\n");
3201  }
3202
3203  log_verbose("Testing calling with error set\n");
3204
3205  status = U_INTERNAL_PROGRAM_ERROR;
3206  varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3207  varTop2 = ucol_getVariableTop(coll, &status);
3208  ucol_restoreVariableTop(coll, varTop2, &status);
3209  varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3210  varTop2 = ucol_getVariableTop(NULL, &status);
3211  ucol_restoreVariableTop(NULL, varTop2, &status);
3212  if(status != U_INTERNAL_PROGRAM_ERROR) {
3213    log_err("Bad reaction to passed error!\n");
3214  }
3215  uprv_free(src.source);
3216  ucol_close(coll);
3217  } else {
3218    log_data_err("Couldn't open UCA collator\n");
3219  }
3220
3221}
3222
3223static void TestNonChars(void) {
3224  static const char *test[] = {
3225      "\\u0000",  /* ignorable */
3226      "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
3227      "\\uFDD0", "\\uFDEF",
3228      "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
3229      "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
3230      "\\U0003FFFE", "\\U0003FFFF",
3231      "\\U0004FFFE", "\\U0004FFFF",
3232      "\\U0005FFFE", "\\U0005FFFF",
3233      "\\U0006FFFE", "\\U0006FFFF",
3234      "\\U0007FFFE", "\\U0007FFFF",
3235      "\\U0008FFFE", "\\U0008FFFF",
3236      "\\U0009FFFE", "\\U0009FFFF",
3237      "\\U000AFFFE", "\\U000AFFFF",
3238      "\\U000BFFFE", "\\U000BFFFF",
3239      "\\U000CFFFE", "\\U000CFFFF",
3240      "\\U000DFFFE", "\\U000DFFFF",
3241      "\\U000EFFFE", "\\U000EFFFF",
3242      "\\U000FFFFE", "\\U000FFFFF",
3243      "\\U0010FFFE", "\\U0010FFFF",
3244      "\\uFFFF"  /* special character with maximum primary weight */
3245  };
3246  UErrorCode status = U_ZERO_ERROR;
3247  UCollator *coll = ucol_open("en_US", &status);
3248
3249  log_verbose("Test non characters\n");
3250
3251  if(U_SUCCESS(status)) {
3252    genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
3253  } else {
3254    log_err_status(status, "Unable to open collator\n");
3255  }
3256
3257  ucol_close(coll);
3258}
3259
3260static void TestExtremeCompression(void) {
3261  static char *test[4];
3262  int32_t j = 0, i = 0;
3263
3264  for(i = 0; i<4; i++) {
3265    test[i] = (char *)malloc(2048*sizeof(char));
3266  }
3267
3268  for(j = 20; j < 500; j++) {
3269    for(i = 0; i<4; i++) {
3270      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3271      test[i][j-1] = (char)('a'+i);
3272      test[i][j] = 0;
3273    }
3274    genericLocaleStarter("en_US", (const char **)test, 4);
3275  }
3276
3277
3278  for(i = 0; i<4; i++) {
3279    free(test[i]);
3280  }
3281}
3282
3283#if 0
3284static void TestExtremeCompression(void) {
3285  static char *test[4];
3286  int32_t j = 0, i = 0;
3287  UErrorCode status = U_ZERO_ERROR;
3288  UCollator *coll = ucol_open("en_US", status);
3289  for(i = 0; i<4; i++) {
3290    test[i] = (char *)malloc(2048*sizeof(char));
3291  }
3292  for(j = 10; j < 2048; j++) {
3293    for(i = 0; i<4; i++) {
3294      uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3295      test[i][j-1] = (char)('a'+i);
3296      test[i][j] = 0;
3297    }
3298  }
3299  genericLocaleStarter("en_US", (const char **)test, 4);
3300
3301  for(j = 10; j < 2048; j++) {
3302    for(i = 0; i<1; i++) {
3303      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3304      test[i][j] = 0;
3305    }
3306  }
3307  for(i = 0; i<4; i++) {
3308    free(test[i]);
3309  }
3310}
3311#endif
3312
3313static void TestSurrogates(void) {
3314  static const char *test[] = {
3315    "z","\\ud900\\udc25",  "\\ud805\\udc50",
3316       "\\ud800\\udc00y",  "\\ud800\\udc00r",
3317       "\\ud800\\udc00f",  "\\ud800\\udc00",
3318       "\\ud800\\udc00c", "\\ud800\\udc00b",
3319       "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3320       "\\ud800\\udc00a",
3321       "c", "b"
3322  };
3323
3324  static const char *rule =
3325    "&z < \\ud900\\udc25   < \\ud805\\udc50"
3326       "< \\ud800\\udc00y  < \\ud800\\udc00r"
3327       "< \\ud800\\udc00f  << \\ud800\\udc00"
3328       "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3329       "< \\ud800\\udc00a  < c < b" ;
3330
3331  genericRulesStarter(rule, test, 14);
3332}
3333
3334/* This is a test for prefix implementation, used by JIS X 4061 collation rules */
3335static void TestPrefix(void) {
3336  uint32_t i;
3337
3338  static const struct {
3339    const char *rules;
3340    const char *data[50];
3341    const uint32_t len;
3342  } tests[] = {
3343    { "&z <<< z|a",
3344      {"zz", "za"}, 2 },
3345
3346    { "&z <<< z|   a",
3347      {"zz", "za"}, 2 },
3348    { "[strength I]"
3349      "&a=\\ud900\\udc25"
3350      "&z<<<\\ud900\\udc25|a",
3351      {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3352  };
3353
3354
3355  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3356    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3357  }
3358}
3359
3360/* This test uses data suplied by Masashiko Maedera to test the implementation */
3361/* JIS X 4061 collation order implementation                                   */
3362static void TestNewJapanese(void) {
3363
3364  static const char * const test1[] = {
3365      "\\u30b7\\u30e3\\u30fc\\u30ec",
3366      "\\u30b7\\u30e3\\u30a4",
3367      "\\u30b7\\u30e4\\u30a3",
3368      "\\u30b7\\u30e3\\u30ec",
3369      "\\u3061\\u3087\\u3053",
3370      "\\u3061\\u3088\\u3053",
3371      "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3372      "\\u3066\\u30fc\\u305f",
3373      "\\u30c6\\u30fc\\u30bf",
3374      "\\u30c6\\u30a7\\u30bf",
3375      "\\u3066\\u3048\\u305f",
3376      "\\u3067\\u30fc\\u305f",
3377      "\\u30c7\\u30fc\\u30bf",
3378      "\\u30c7\\u30a7\\u30bf",
3379      "\\u3067\\u3048\\u305f",
3380      "\\u3066\\u30fc\\u305f\\u30fc",
3381      "\\u30c6\\u30fc\\u30bf\\u30a1",
3382      "\\u30c6\\u30a7\\u30bf\\u30fc",
3383      "\\u3066\\u3047\\u305f\\u3041",
3384      "\\u3066\\u3048\\u305f\\u30fc",
3385      "\\u3067\\u30fc\\u305f\\u30fc",
3386      "\\u30c7\\u30fc\\u30bf\\u30a1",
3387      "\\u3067\\u30a7\\u305f\\u30a1",
3388      "\\u30c7\\u3047\\u30bf\\u3041",
3389      "\\u30c7\\u30a8\\u30bf\\u30a2",
3390      "\\u3072\\u3086",
3391      "\\u3073\\u3085\\u3042",
3392      "\\u3074\\u3085\\u3042",
3393      "\\u3073\\u3085\\u3042\\u30fc",
3394      "\\u30d3\\u30e5\\u30a2\\u30fc",
3395      "\\u3074\\u3085\\u3042\\u30fc",
3396      "\\u30d4\\u30e5\\u30a2\\u30fc",
3397      "\\u30d2\\u30e5\\u30a6",
3398      "\\u30d2\\u30e6\\u30a6",
3399      "\\u30d4\\u30e5\\u30a6\\u30a2",
3400      "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3401      "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3402      "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3403      "\\u3072\\u3085\\u3093",
3404      "\\u3074\\u3085\\u3093",
3405      "\\u3075\\u30fc\\u308a",
3406      "\\u30d5\\u30fc\\u30ea",
3407      "\\u3075\\u3045\\u308a",
3408      "\\u3075\\u30a5\\u308a",
3409      "\\u3075\\u30a5\\u30ea",
3410      "\\u30d5\\u30a6\\u30ea",
3411      "\\u3076\\u30fc\\u308a",
3412      "\\u30d6\\u30fc\\u30ea",
3413      "\\u3076\\u3045\\u308a",
3414      "\\u30d6\\u30a5\\u308a",
3415      "\\u3077\\u3046\\u308a",
3416      "\\u30d7\\u30a6\\u30ea",
3417      "\\u3075\\u30fc\\u308a\\u30fc",
3418      "\\u30d5\\u30a5\\u30ea\\u30fc",
3419      "\\u3075\\u30a5\\u308a\\u30a3",
3420      "\\u30d5\\u3045\\u308a\\u3043",
3421      "\\u30d5\\u30a6\\u30ea\\u30fc",
3422      "\\u3075\\u3046\\u308a\\u3043",
3423      "\\u30d6\\u30a6\\u30ea\\u30a4",
3424      "\\u3077\\u30fc\\u308a\\u30fc",
3425      "\\u3077\\u30a5\\u308a\\u30a4",
3426      "\\u3077\\u3046\\u308a\\u30fc",
3427      "\\u30d7\\u30a6\\u30ea\\u30a4",
3428      "\\u30d5\\u30fd",
3429      "\\u3075\\u309e",
3430      "\\u3076\\u309d",
3431      "\\u3076\\u3075",
3432      "\\u3076\\u30d5",
3433      "\\u30d6\\u3075",
3434      "\\u30d6\\u30d5",
3435      "\\u3076\\u309e",
3436      "\\u3076\\u3077",
3437      "\\u30d6\\u3077",
3438      "\\u3077\\u309d",
3439      "\\u30d7\\u30fd",
3440      "\\u3077\\u3075",
3441};
3442
3443  static const char *test2[] = {
3444    "\\u306f\\u309d", /* H\\u309d */
3445    "\\u30cf\\u30fd", /* K\\u30fd */
3446    "\\u306f\\u306f", /* HH */
3447    "\\u306f\\u30cf", /* HK */
3448    "\\u30cf\\u30cf", /* KK */
3449    "\\u306f\\u309e", /* H\\u309e */
3450    "\\u30cf\\u30fe", /* K\\u30fe */
3451    "\\u306f\\u3070", /* HH\\u309b */
3452    "\\u30cf\\u30d0", /* KK\\u309b */
3453    "\\u306f\\u3071", /* HH\\u309c */
3454    "\\u30cf\\u3071", /* KH\\u309c */
3455    "\\u30cf\\u30d1", /* KK\\u309c */
3456    "\\u3070\\u309d", /* H\\u309b\\u309d */
3457    "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3458    "\\u3070\\u306f", /* H\\u309bH */
3459    "\\u30d0\\u30cf", /* K\\u309bK */
3460    "\\u3070\\u309e", /* H\\u309b\\u309e */
3461    "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3462    "\\u3070\\u3070", /* H\\u309bH\\u309b */
3463    "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3464    "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3465    "\\u3070\\u3071", /* H\\u309bH\\u309c */
3466    "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3467    "\\u3071\\u309d", /* H\\u309c\\u309d */
3468    "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3469    "\\u3071\\u306f", /* H\\u309cH */
3470    "\\u30d1\\u30cf", /* K\\u309cK */
3471    "\\u3071\\u3070", /* H\\u309cH\\u309b */
3472    "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3473    "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3474    "\\u3071\\u3071", /* H\\u309cH\\u309c */
3475    "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3476  };
3477  /*
3478  static const char *test3[] = {
3479    "\\u221er\\u221e",
3480    "\\u221eR#",
3481    "\\u221et\\u221e",
3482    "#r\\u221e",
3483    "#R#",
3484    "#t%",
3485    "#T%",
3486    "8t\\u221e",
3487    "8T\\u221e",
3488    "8t#",
3489    "8T#",
3490    "8t%",
3491    "8T%",
3492    "8t8",
3493    "8T8",
3494    "\\u03c9r\\u221e",
3495    "\\u03a9R%",
3496    "rr\\u221e",
3497    "rR\\u221e",
3498    "Rr\\u221e",
3499    "RR\\u221e",
3500    "RT%",
3501    "rt8",
3502    "tr\\u221e",
3503    "tr8",
3504    "TR8",
3505    "tt8",
3506    "\\u30b7\\u30e3\\u30fc\\u30ec",
3507  };
3508  */
3509  static const UColAttribute att[] = { UCOL_STRENGTH };
3510  static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3511
3512  static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3513  static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3514
3515  genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3516  genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3517  /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3518  genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3519  genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3520}
3521
3522static void TestStrCollIdenticalPrefix(void) {
3523  const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3524  const char* test[] = {
3525    "ab\\ud9b0\\udc70",
3526    "ab\\ud9b0\\udc71"
3527  };
3528  genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3529}
3530/* Contractions should have all their canonically equivalent */
3531/* strings included */
3532static void TestContractionClosure(void) {
3533  static const struct {
3534    const char *rules;
3535    const char *data[10];
3536    const uint32_t len;
3537  } tests[] = {
3538    {   "&b=\\u00e4\\u00e4",
3539      { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3540    {   "&b=\\u00C5",
3541      { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3542  };
3543  uint32_t i;
3544
3545
3546  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3547    genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3548  }
3549}
3550
3551/* This tests also fails*/
3552static void TestBeforePrefixFailure(void) {
3553  static const struct {
3554    const char *rules;
3555    const char *data[10];
3556    const uint32_t len;
3557  } tests[] = {
3558    { "&g <<< a"
3559      "&[before 3]\\uff41 <<< x",
3560      {"x", "\\uff41"}, 2 },
3561    {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3562        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3563        "&[before 3]\\u30a7<<<\\u30a9",
3564      {"\\u30a9", "\\u30a7"}, 2 },
3565    {   "&[before 3]\\u30a7<<<\\u30a9"
3566        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3567        "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3568      {"\\u30a9", "\\u30a7"}, 2 },
3569  };
3570  uint32_t i;
3571
3572
3573  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3574    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3575  }
3576
3577#if 0
3578  const char* rule1 =
3579        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3580        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3581        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3582  const char* rule2 =
3583        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3584        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3585        "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3586  const char* test[] = {
3587      "\\u30c6\\u30fc\\u30bf",
3588      "\\u30c6\\u30a7\\u30bf",
3589  };
3590  genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3591  genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3592/* this piece of code should be in some sort of verbose mode     */
3593/* it gets the collation elements for elements and prints them   */
3594/* This is useful when trying to see whether the problem is      */
3595  {
3596    UErrorCode status = U_ZERO_ERROR;
3597    uint32_t i = 0;
3598    UCollationElements *it = NULL;
3599    uint32_t CE;
3600    UChar string[256];
3601    uint32_t uStringLen;
3602    UCollator *coll = NULL;
3603
3604    uStringLen = u_unescape(rule1, string, 256);
3605
3606    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3607
3608    /*coll = ucol_open("ja_JP_JIS", &status);*/
3609    it = ucol_openElements(coll, string, 0, &status);
3610
3611    for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3612      log_verbose("%s\n", test[i]);
3613      uStringLen = u_unescape(test[i], string, 256);
3614      ucol_setText(it, string, uStringLen, &status);
3615
3616      while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3617        log_verbose("%08X\n", CE);
3618      }
3619      log_verbose("\n");
3620
3621    }
3622
3623    ucol_closeElements(it);
3624    ucol_close(coll);
3625  }
3626#endif
3627}
3628
3629static void TestPrefixCompose(void) {
3630  const char* rule1 =
3631        "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3632  /*
3633  const char* test[] = {
3634      "\\u30c6\\u30fc\\u30bf",
3635      "\\u30c6\\u30a7\\u30bf",
3636  };
3637  */
3638  {
3639    UErrorCode status = U_ZERO_ERROR;
3640    /*uint32_t i = 0;*/
3641    /*UCollationElements *it = NULL;*/
3642/*    uint32_t CE;*/
3643    UChar string[256];
3644    uint32_t uStringLen;
3645    UCollator *coll = NULL;
3646
3647    uStringLen = u_unescape(rule1, string, 256);
3648
3649    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3650    ucol_close(coll);
3651  }
3652
3653
3654}
3655
3656/*
3657[last variable] last variable value
3658[last primary ignorable] largest CE for primary ignorable
3659[last secondary ignorable] largest CE for secondary ignorable
3660[last tertiary ignorable] largest CE for tertiary ignorable
3661[top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3662*/
3663
3664static void TestRuleOptions(void) {
3665  /* values here are hardcoded and are correct for the current UCA
3666   * when the UCA changes, one might be forced to change these
3667   * values.
3668   */
3669
3670  /*
3671   * These strings contain the last character before [variable top]
3672   * and the first and second characters (by primary weights) after it.
3673   * See FractionalUCA.txt. For example:
3674      [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
3675      [variable top = 0C FE]
3676      [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
3677     and
3678      00B4; [0D 0C, 05, 05]
3679   *
3680   * Note: Starting with UCA 6.0, the [variable top] collation element
3681   * is not the weight of any character or string,
3682   * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
3683   */
3684#define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
3685#define FIRST_REGULAR_CHAR_STRING "\\u0060"
3686#define SECOND_REGULAR_CHAR_STRING "\\u00B4"
3687
3688  /*
3689   * This string has to match the character that has the [last regular] weight
3690   * which changes with each UCA version.
3691   * See the bottom of FractionalUCA.txt which says something like
3692      [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
3693   *
3694   * Note: Starting with UCA 6.0, the [last regular] collation element
3695   * is not the weight of any character or string,
3696   * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
3697   */
3698#define LAST_REGULAR_CHAR_STRING "\\U0001342E"
3699
3700  static const struct {
3701    const char *rules;
3702    const char *data[10];
3703    const uint32_t len;
3704  } tests[] = {
3705    /* - all befores here amount to zero */
3706    { "&[before 3][first tertiary ignorable]<<<a",
3707        { "\\u0000", "a"}, 2
3708    }, /* you cannot go before first tertiary ignorable */
3709
3710    { "&[before 3][last tertiary ignorable]<<<a",
3711        { "\\u0000", "a"}, 2
3712    }, /* you cannot go before last tertiary ignorable */
3713
3714    { "&[before 3][first secondary ignorable]<<<a",
3715        { "\\u0000", "a"}, 2
3716    }, /* you cannot go before first secondary ignorable */
3717
3718    { "&[before 3][last secondary ignorable]<<<a",
3719        { "\\u0000", "a"}, 2
3720    }, /* you cannot go before first secondary ignorable */
3721
3722    /* 'normal' befores */
3723
3724    { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3725        {  "c", "b", "\\u0332", "a" }, 4
3726    },
3727
3728    /* we don't have a code point that corresponds to
3729     * the last primary ignorable
3730     */
3731    { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3732        {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3733    },
3734
3735    { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3736        {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
3737    },
3738
3739    { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3740        { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
3741    },
3742
3743    { "&[first regular]<a"
3744      "&[before 1][first regular]<b",
3745      { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
3746    },
3747
3748    { "&[before 1][last regular]<b"
3749      "&[last regular]<a",
3750        { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
3751    },
3752
3753    { "&[before 1][first implicit]<b"
3754      "&[first implicit]<a",
3755        { "b", "\\u4e00", "a", "\\u4e01"}, 4
3756    },
3757
3758    { "&[before 1][last implicit]<b"
3759      "&[last implicit]<a",
3760        { "b", "\\U0010FFFD", "a" }, 3
3761    },
3762
3763    { "&[last variable]<z"
3764      "&[last primary ignorable]<x"
3765      "&[last secondary ignorable]<<y"
3766      "&[last tertiary ignorable]<<<w"
3767      "&[top]<u",
3768      {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
3769    }
3770
3771  };
3772  uint32_t i;
3773
3774  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3775    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3776  }
3777}
3778
3779
3780static void TestOptimize(void) {
3781  /* this is not really a test - just trying out
3782   * whether copying of UCA contents will fail
3783   * Cannot really test, since the functionality
3784   * remains the same.
3785   */
3786  static const struct {
3787    const char *rules;
3788    const char *data[10];
3789    const uint32_t len;
3790  } tests[] = {
3791    /* - all befores here amount to zero */
3792    { "[optimize [\\uAC00-\\uD7FF]]",
3793    { "a", "b"}, 2}
3794  };
3795  uint32_t i;
3796
3797  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3798    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3799  }
3800}
3801
3802/*
3803cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3804weiv    ucol_strcollIter?
3805cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3806weiv    these are the input strings?
3807cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3808weiv    will check - could be a problem with utf-8 iterator
3809cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3810weiv    hmmm
3811cycheng@ca.ibm.c... note that we have a standalone high surrogate
3812weiv    that doesn't sound right
3813cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3814weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
3815cycheng@ca.ibm.c... yes
3816weiv    and then do the comparison
3817cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3818weiv    utf-16 strings look like a little endian ones in the example you sent me
3819weiv    It could be a bug - let me try to test it out
3820cycheng@ca.ibm.c... ok
3821cycheng@ca.ibm.c... we can wait till the conf. call
3822cycheng@ca.ibm.c... next weke
3823weiv    that would be great
3824weiv    hmmm
3825weiv    I might be wrong
3826weiv    let me play with it some more
3827cycheng@ca.ibm.c... ok
3828cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
3829cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3830cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3831weiv    ok
3832cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3833weiv    thanks
3834cycheng@ca.ibm.c... the 4 strings we sent are just samples
3835*/
3836#if 0
3837static void Alexis(void) {
3838  UErrorCode status = U_ZERO_ERROR;
3839  UCollator *coll = ucol_open("", &status);
3840
3841
3842  const char utf16be[2][4] = {
3843    { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3844    { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3845  };
3846
3847  const char utf8[2][4] = {
3848    { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3849    { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3850  };
3851
3852  UCharIterator iterU161, iterU162;
3853  UCharIterator iterU81, iterU82;
3854
3855  UCollationResult resU16, resU8;
3856
3857  uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3858  uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3859
3860  uiter_setUTF8(&iterU81, utf8[0], 4);
3861  uiter_setUTF8(&iterU82, utf8[1], 4);
3862
3863  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3864
3865  resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3866  resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3867
3868
3869  if(resU16 != resU8) {
3870    log_err("different results\n");
3871  }
3872
3873  ucol_close(coll);
3874}
3875#endif
3876
3877#define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
3878static void Alexis2(void) {
3879  UErrorCode status = U_ZERO_ERROR;
3880  UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3881  char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3882  char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3883  int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3884
3885  UConverter *conv = NULL;
3886
3887  UCharIterator U16BEItS, U16BEItT;
3888  UCharIterator U8ItS, U8ItT;
3889
3890  UCollationResult resU16, resU16BE, resU8;
3891
3892  static const char* const pairs[][2] = {
3893    { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3894    { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3895    { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3896    { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3897    { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3898    { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3899    { "\\u0020", "\\u0020\\u0000"}
3900/*
39015F20 (my result here)
39025F204E008E3F
39035F20 (your result here)
3904*/
3905  };
3906
3907  int32_t i = 0;
3908
3909  UCollator *coll = ucol_open("", &status);
3910  if(status == U_FILE_ACCESS_ERROR) {
3911    log_data_err("Is your data around?\n");
3912    return;
3913  } else if(U_FAILURE(status)) {
3914    log_err("Error opening collator\n");
3915    return;
3916  }
3917  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3918  conv = ucnv_open("UTF16BE", &status);
3919  for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3920    U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3921    U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3922
3923    resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3924
3925    log_verbose("Result of strcoll is %i\n", resU16);
3926
3927    U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3928    U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3929    (void)U16BELenS;    /* Suppress set but not used warnings. */
3930    (void)U16BELenT;
3931
3932    /* use the original sizes, as the result from converter is in bytes */
3933    uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3934    uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3935
3936    resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3937
3938    log_verbose("Result of U16BE is %i\n", resU16BE);
3939
3940    if(resU16 != resU16BE) {
3941      log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3942    }
3943
3944    u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3945    u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3946
3947    uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3948    uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3949
3950    resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3951
3952    if(resU16 != resU8) {
3953      log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3954    }
3955
3956  }
3957
3958  ucol_close(coll);
3959  ucnv_close(conv);
3960}
3961
3962static void TestHebrewUCA(void) {
3963  UErrorCode status = U_ZERO_ERROR;
3964  static const char *first[] = {
3965    "d790d6b8d79cd795d6bcd7a9",
3966    "d790d79cd79ed7a7d799d799d7a1",
3967    "d790d6b4d79ed795d6bcd7a9",
3968  };
3969
3970  char utf8String[3][256];
3971  UChar utf16String[3][256];
3972
3973  int32_t i = 0, j = 0;
3974  int32_t sizeUTF8[3];
3975  int32_t sizeUTF16[3];
3976
3977  UCollator *coll = ucol_open("", &status);
3978  if (U_FAILURE(status)) {
3979      log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
3980      return;
3981  }
3982  /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3983
3984  for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3985    sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3986    u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3987    log_verbose("%i: ");
3988    for(j = 0; j < sizeUTF16[i]; j++) {
3989      /*log_verbose("\\u%04X", utf16String[i][j]);*/
3990      log_verbose("%04X", utf16String[i][j]);
3991    }
3992    log_verbose("\n");
3993  }
3994  for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3995    for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
3996      doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
3997    }
3998  }
3999
4000  ucol_close(coll);
4001
4002}
4003
4004static void TestPartialSortKeyTermination(void) {
4005  static const char* cases[] = {
4006    "\\u1234\\u1234\\udc00",
4007    "\\udc00\\ud800\\ud800"
4008  };
4009
4010  int32_t i = sizeof(UCollator);
4011
4012  UErrorCode status = U_ZERO_ERROR;
4013
4014  UCollator *coll = ucol_open("", &status);
4015
4016  UCharIterator iter;
4017
4018  UChar currCase[256];
4019  int32_t length = 0;
4020  int32_t pKeyLen = 0;
4021
4022  uint8_t key[256];
4023
4024  for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
4025    uint32_t state[2] = {0, 0};
4026    length = u_unescape(cases[i], currCase, 256);
4027    uiter_setString(&iter, currCase, length);
4028    pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
4029    (void)pKeyLen;   /* Suppress set but not used warning. */
4030
4031    log_verbose("Done\n");
4032
4033  }
4034  ucol_close(coll);
4035}
4036
4037static void TestSettings(void) {
4038  static const char* cases[] = {
4039    "apple",
4040      "Apple"
4041  };
4042
4043  static const char* locales[] = {
4044    "",
4045      "en"
4046  };
4047
4048  UErrorCode status = U_ZERO_ERROR;
4049
4050  int32_t i = 0, j = 0;
4051
4052  UChar source[256], target[256];
4053  int32_t sLen = 0, tLen = 0;
4054
4055  UCollator *collateObject = NULL;
4056  for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
4057    collateObject = ucol_open(locales[i], &status);
4058    ucol_setStrength(collateObject, UCOL_PRIMARY);
4059    ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
4060    for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
4061      sLen = u_unescape(cases[j-1], source, 256);
4062      source[sLen] = 0;
4063      tLen = u_unescape(cases[j], target, 256);
4064      source[tLen] = 0;
4065      doTest(collateObject, source, target, UCOL_EQUAL);
4066    }
4067    ucol_close(collateObject);
4068  }
4069}
4070
4071static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
4072    UErrorCode status = U_ZERO_ERROR;
4073    int32_t errorNo = 0;
4074    const UChar *sourceRules = NULL;
4075    int32_t sourceRulesLen = 0;
4076    UParseError parseError;
4077    UColAttributeValue french = UCOL_OFF;
4078
4079    if(!ucol_equals(source, target)) {
4080        log_err("Same collators, different address not equal\n");
4081        errorNo++;
4082    }
4083    ucol_close(target);
4084    if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
4085        target = ucol_safeClone(source, NULL, NULL, &status);
4086        if(U_FAILURE(status)) {
4087            log_err("Error creating clone\n");
4088            errorNo++;
4089            return errorNo;
4090        }
4091        if(!ucol_equals(source, target)) {
4092            log_err("Collator different from it's clone\n");
4093            errorNo++;
4094        }
4095        french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
4096        if(french == UCOL_ON) {
4097            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
4098        } else {
4099            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
4100        }
4101        if(U_FAILURE(status)) {
4102            log_err("Error setting attributes\n");
4103            errorNo++;
4104            return errorNo;
4105        }
4106        if(ucol_equals(source, target)) {
4107            log_err("Collators same even when options changed\n");
4108            errorNo++;
4109        }
4110        ucol_close(target);
4111
4112        sourceRules = ucol_getRules(source, &sourceRulesLen);
4113        target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4114        if(U_FAILURE(status)) {
4115            log_err("Error instantiating target from rules - %s\n", u_errorName(status));
4116            errorNo++;
4117            return errorNo;
4118        }
4119        if(!ucol_equals(source, target)) {
4120            log_err("Collator different from collator that was created from the same rules\n");
4121            errorNo++;
4122        }
4123        ucol_close(target);
4124    }
4125    return errorNo;
4126}
4127
4128
4129static void TestEquals(void) {
4130    /* ucol_equals is not currently a public API. There is a chance that it will become
4131    * something like this, but currently it is only used by RuleBasedCollator::operator==
4132    */
4133    /* test whether the two collators instantiated from the same locale are equal */
4134    UErrorCode status = U_ZERO_ERROR;
4135    UParseError parseError;
4136    int32_t noOfLoc = uloc_countAvailable();
4137    const char *locName = NULL;
4138    UCollator *source = NULL, *target = NULL;
4139    int32_t i = 0;
4140
4141    const char* rules[] = {
4142        "&l < lj <<< Lj <<< LJ",
4143        "&n < nj <<< Nj <<< NJ",
4144        "&ae <<< \\u00e4",
4145        "&AE <<< \\u00c4"
4146    };
4147    /*
4148    const char* badRules[] = {
4149    "&l <<< Lj",
4150    "&n < nj <<< nJ <<< NJ",
4151    "&a <<< \\u00e4",
4152    "&AE <<< \\u00c4 <<< x"
4153    };
4154    */
4155
4156    UChar sourceRules[1024], targetRules[1024];
4157    int32_t sourceRulesSize = 0, targetRulesSize = 0;
4158    int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
4159
4160    for(i = 0; i < rulesSize; i++) {
4161        sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
4162        targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
4163    }
4164
4165    source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4166    if(status == U_FILE_ACCESS_ERROR) {
4167        log_data_err("Is your data around?\n");
4168        return;
4169    } else if(U_FAILURE(status)) {
4170        log_err("Error opening collator\n");
4171        return;
4172    }
4173    target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4174    if(!ucol_equals(source, target)) {
4175        log_err("Equivalent collators not equal!\n");
4176    }
4177    ucol_close(source);
4178    ucol_close(target);
4179
4180    source = ucol_open("root", &status);
4181    target = ucol_open("root", &status);
4182    log_verbose("Testing root\n");
4183    if(!ucol_equals(source, source)) {
4184        log_err("Same collator not equal\n");
4185    }
4186    if(TestEqualsForCollator(locName, source, target)) {
4187        log_err("Errors for root\n", locName);
4188    }
4189    ucol_close(source);
4190
4191    for(i = 0; i<noOfLoc; i++) {
4192        status = U_ZERO_ERROR;
4193        locName = uloc_getAvailable(i);
4194        /*if(hasCollationElements(locName)) {*/
4195        log_verbose("Testing equality for locale %s\n", locName);
4196        source = ucol_open(locName, &status);
4197        target = ucol_open(locName, &status);
4198        if (U_FAILURE(status)) {
4199            log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
4200            continue;
4201        }
4202        if(TestEqualsForCollator(locName, source, target)) {
4203            log_err("Errors for locale %s\n", locName);
4204        }
4205        ucol_close(source);
4206        /*}*/
4207    }
4208}
4209
4210static void TestJ2726(void) {
4211    UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4212    UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4213    UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4214    UErrorCode status = U_ZERO_ERROR;
4215    UCollator *coll = ucol_open("en", &status);
4216    ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4217    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4218    doTest(coll, a, aSpace, UCOL_EQUAL);
4219    doTest(coll, aSpace, a, UCOL_EQUAL);
4220    doTest(coll, a, spaceA, UCOL_EQUAL);
4221    doTest(coll, spaceA, a, UCOL_EQUAL);
4222    doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4223    doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4224    ucol_close(coll);
4225}
4226
4227static void NullRule(void) {
4228    UChar r[3] = {0};
4229    UErrorCode status = U_ZERO_ERROR;
4230    UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4231    if(U_SUCCESS(status)) {
4232        log_err("This should have been an error!\n");
4233        ucol_close(coll);
4234    } else {
4235        status = U_ZERO_ERROR;
4236    }
4237    coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4238    if(U_FAILURE(status)) {
4239        log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
4240    } else {
4241        ucol_close(coll);
4242    }
4243}
4244
4245/**
4246 * Test for CollationElementIterator previous and next for the whole set of
4247 * unicode characters with normalization on.
4248 */
4249static void TestNumericCollation(void)
4250{
4251    UErrorCode status = U_ZERO_ERROR;
4252
4253    const static char *basicTestStrings[]={
4254    "hello1",
4255    "hello2",
4256    "hello2002",
4257    "hello2003",
4258    "hello123456",
4259    "hello1234567",
4260    "hello10000000",
4261    "hello100000000",
4262    "hello1000000000",
4263    "hello10000000000",
4264    };
4265
4266    const static char *preZeroTestStrings[]={
4267    "avery10000",
4268    "avery010000",
4269    "avery0010000",
4270    "avery00010000",
4271    "avery000010000",
4272    "avery0000010000",
4273    "avery00000010000",
4274    "avery000000010000",
4275    };
4276
4277    const static char *thirtyTwoBitNumericStrings[]={
4278    "avery42949672960",
4279    "avery42949672961",
4280    "avery42949672962",
4281    "avery429496729610"
4282    };
4283
4284     const static char *longNumericStrings[]={
4285     /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
4286        In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
4287        are treated as multiple collation elements. */
4288    "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
4289    "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
4290    "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
4291    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
4292    "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
4293    "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
4294    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
4295    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
4296    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
4297    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
4298    };
4299
4300    const static char *supplementaryDigits[] = {
4301      "\\uD835\\uDFCE", /* 0 */
4302      "\\uD835\\uDFCF", /* 1 */
4303      "\\uD835\\uDFD0", /* 2 */
4304      "\\uD835\\uDFD1", /* 3 */
4305      "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4306      "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4307      "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4308      "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4309      "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4310      "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4311    };
4312
4313    const static char *foreignDigits[] = {
4314      "\\u0661",
4315        "\\u0662",
4316        "\\u0663",
4317      "\\u0661\\u0660",
4318      "\\u0661\\u0662",
4319      "\\u0661\\u0663",
4320      "\\u0662\\u0660",
4321      "\\u0662\\u0662",
4322      "\\u0662\\u0663",
4323      "\\u0663\\u0660",
4324      "\\u0663\\u0662",
4325      "\\u0663\\u0663"
4326    };
4327
4328    const static char *evenZeroes[] = {
4329      "2000",
4330      "2001",
4331        "2002",
4332        "2003"
4333    };
4334
4335    UColAttribute att = UCOL_NUMERIC_COLLATION;
4336    UColAttributeValue val = UCOL_ON;
4337
4338    /* Open our collator. */
4339    UCollator* coll = ucol_open("root", &status);
4340    if (U_FAILURE(status)){
4341        log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
4342              myErrorName(status));
4343        return;
4344    }
4345    genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
4346    genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
4347    genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
4348    genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
4349    genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
4350    genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
4351
4352    /* Setting up our collator to do digits. */
4353    ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
4354    if (U_FAILURE(status)){
4355        log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4356              myErrorName(status));
4357        return;
4358    }
4359
4360    /*
4361       Testing that prepended zeroes still yield the correct collation behavior.
4362       We expect that every element in our strings array will be equal.
4363    */
4364    genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
4365
4366    ucol_close(coll);
4367}
4368
4369static void TestTibetanConformance(void)
4370{
4371    const char* test[] = {
4372        "\\u0FB2\\u0591\\u0F71\\u0061",
4373        "\\u0FB2\\u0F71\\u0061"
4374    };
4375
4376    UErrorCode status = U_ZERO_ERROR;
4377    UCollator *coll = ucol_open("", &status);
4378    UChar source[100];
4379    UChar target[100];
4380    int result;
4381    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4382    if (U_SUCCESS(status)) {
4383        u_unescape(test[0], source, 100);
4384        u_unescape(test[1], target, 100);
4385        doTest(coll, source, target, UCOL_EQUAL);
4386        result = ucol_strcoll(coll, source, -1,   target, -1);
4387        log_verbose("result %d\n", result);
4388        if (UCOL_EQUAL != result) {
4389            log_err("Tibetan comparison error\n");
4390        }
4391    }
4392    ucol_close(coll);
4393
4394    genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4395}
4396
4397static void TestPinyinProblem(void) {
4398    static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4399    genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4400}
4401
4402#define TST_UCOL_MAX_INPUT 0x220001
4403#define topByte 0xFF000000;
4404#define bottomByte 0xFF;
4405#define fourBytes 0xFFFFFFFF;
4406
4407
4408static void showImplicit(UChar32 i) {
4409    if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4410        log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4411    }
4412}
4413
4414static void TestImplicitGeneration(void) {
4415    UErrorCode status = U_ZERO_ERROR;
4416    UChar32 last = 0;
4417    UChar32 current;
4418    UChar32 i = 0, j = 0;
4419    UChar32 roundtrip = 0;
4420    UChar32 lastBottom = 0;
4421    UChar32 currentBottom = 0;
4422    UChar32 lastTop = 0;
4423    UChar32 currentTop = 0;
4424
4425    UCollator *coll = ucol_open("root", &status);
4426    if(U_FAILURE(status)) {
4427        log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4428        return;
4429    }
4430
4431    uprv_uca_getRawFromImplicit(0xE20303E7);
4432
4433    for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4434        current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4435
4436        /* check that it round-trips AND that all intervening ones are illegal*/
4437        roundtrip = uprv_uca_getRawFromImplicit(current);
4438        if (roundtrip != i) {
4439            log_err("No roundtrip %08X\n", i);
4440        }
4441        if (last != 0) {
4442            for (j = last + 1; j < current; ++j) {
4443                roundtrip = uprv_uca_getRawFromImplicit(j);
4444                /* raise an error if it *doesn't* find an error*/
4445                if (roundtrip != -1) {
4446                    log_err("Fails to recognize illegal %08X\n", j);
4447                }
4448            }
4449        }
4450        /* now do other consistency checks*/
4451        lastBottom = last & bottomByte;
4452        currentBottom = current & bottomByte;
4453        lastTop = last & topByte;
4454        currentTop = current & topByte;
4455        (void)lastBottom;     /* Suppress set but not used warnings. */
4456        (void)currentBottom;
4457
4458        /* print out some values for spot-checking*/
4459        if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4460            showImplicit(i-3);
4461            showImplicit(i-2);
4462            showImplicit(i-1);
4463            showImplicit(i);
4464            showImplicit(i+1);
4465            showImplicit(i+2);
4466        }
4467        last = current;
4468
4469        if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4470            log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4471        }
4472    }
4473    showImplicit(TST_UCOL_MAX_INPUT-2);
4474    showImplicit(TST_UCOL_MAX_INPUT-1);
4475    showImplicit(TST_UCOL_MAX_INPUT);
4476    ucol_close(coll);
4477}
4478
4479/**
4480 * Iterate through the given iterator, checking to see that all the strings
4481 * in the expected array are present.
4482 * @param expected array of strings we expect to see, or NULL
4483 * @param expectedCount number of elements of expected, or 0
4484 */
4485static int32_t checkUEnumeration(const char* msg,
4486                                 UEnumeration* iter,
4487                                 const char** expected,
4488                                 int32_t expectedCount) {
4489    UErrorCode ec = U_ZERO_ERROR;
4490    int32_t i = 0, n, j, bit;
4491    int32_t seenMask = 0;
4492
4493    U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
4494    n = uenum_count(iter, &ec);
4495    if (!assertSuccess("count", &ec)) return -1;
4496    log_verbose("%s = [", msg);
4497    for (;; ++i) {
4498        const char* s = uenum_next(iter, NULL, &ec);
4499        if (!assertSuccess("snext", &ec) || s == NULL) break;
4500        if (i != 0) log_verbose(",");
4501        log_verbose("%s", s);
4502        /* check expected list */
4503        for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4504            if ((seenMask&bit) == 0 &&
4505                uprv_strcmp(s, expected[j]) == 0) {
4506                seenMask |= bit;
4507                break;
4508            }
4509        }
4510    }
4511    log_verbose("] (%d)\n", i);
4512    assertTrue("count verified", i==n);
4513    /* did we see all expected strings? */
4514    for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4515        if ((seenMask&bit)!=0) {
4516            log_verbose("Ok: \"%s\" seen\n", expected[j]);
4517        } else {
4518            log_err("FAIL: \"%s\" not seen\n", expected[j]);
4519        }
4520    }
4521    return n;
4522}
4523
4524/**
4525 * Test new API added for separate collation tree.
4526 */
4527static void TestSeparateTrees(void) {
4528    UErrorCode ec = U_ZERO_ERROR;
4529    UEnumeration *e = NULL;
4530    int32_t n = -1;
4531    UBool isAvailable;
4532    char loc[256];
4533
4534    static const char* AVAIL[] = { "en", "de" };
4535
4536    static const char* KW[] = { "collation" };
4537
4538    static const char* KWVAL[] = { "phonebook", "stroke" };
4539
4540#if !UCONFIG_NO_SERVICE
4541    e = ucol_openAvailableLocales(&ec);
4542    if (e != NULL) {
4543        assertSuccess("ucol_openAvailableLocales", &ec);
4544        assertTrue("ucol_openAvailableLocales!=0", e!=0);
4545        n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
4546        (void)n;    /* Suppress set but not used warnings. */
4547        /* Don't need to check n because we check list */
4548        uenum_close(e);
4549    } else {
4550        log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
4551    }
4552#endif
4553
4554    e = ucol_getKeywords(&ec);
4555    if (e != NULL) {
4556        assertSuccess("ucol_getKeywords", &ec);
4557        assertTrue("ucol_getKeywords!=0", e!=0);
4558        n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
4559        /* Don't need to check n because we check list */
4560        uenum_close(e);
4561    } else {
4562        log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
4563    }
4564
4565    e = ucol_getKeywordValues(KW[0], &ec);
4566    if (e != NULL) {
4567        assertSuccess("ucol_getKeywordValues", &ec);
4568        assertTrue("ucol_getKeywordValues!=0", e!=0);
4569        n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
4570        /* Don't need to check n because we check list */
4571        uenum_close(e);
4572    } else {
4573        log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
4574    }
4575
4576    /* Try setting a warning before calling ucol_getKeywordValues */
4577    ec = U_USING_FALLBACK_WARNING;
4578    e = ucol_getKeywordValues(KW[0], &ec);
4579    if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
4580        assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
4581        n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
4582        /* Don't need to check n because we check list */
4583        uenum_close(e);
4584    }
4585
4586    /*
4587U_DRAFT int32_t U_EXPORT2
4588ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4589                             const char* locale, UBool* isAvailable,
4590                             UErrorCode* status);
4591}
4592*/
4593    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
4594                                     &isAvailable, &ec);
4595    if (assertSuccess("getFunctionalEquivalent", &ec)) {
4596        assertEquals("getFunctionalEquivalent(de)", "root", loc);
4597        assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
4598                   isAvailable == TRUE);
4599    }
4600
4601    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
4602                                     &isAvailable, &ec);
4603    if (assertSuccess("getFunctionalEquivalent", &ec)) {
4604        assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
4605        assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
4606                   isAvailable == TRUE);
4607    }
4608}
4609
4610/* supercedes TestJ784 */
4611static void TestBeforePinyin(void) {
4612    const static char rules[] = {
4613        "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4614        "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4615        "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4616        "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4617        "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4618        "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4619    };
4620
4621    const static char *test[] = {
4622        "l\\u0101",
4623        "la",
4624        "l\\u0101n",
4625        "lan ",
4626        "l\\u0113",
4627        "le",
4628        "l\\u0113n",
4629        "len"
4630    };
4631
4632    const static char *test2[] = {
4633        "x\\u0101",
4634        "x\\u0100",
4635        "X\\u0101",
4636        "X\\u0100",
4637        "x\\u00E1",
4638        "x\\u00C1",
4639        "X\\u00E1",
4640        "X\\u00C1",
4641        "x\\u01CE",
4642        "x\\u01CD",
4643        "X\\u01CE",
4644        "X\\u01CD",
4645        "x\\u00E0",
4646        "x\\u00C0",
4647        "X\\u00E0",
4648        "X\\u00C0",
4649        "xa",
4650        "xA",
4651        "Xa",
4652        "XA",
4653        "x\\u0101x",
4654        "x\\u0100x",
4655        "x\\u00E1x",
4656        "x\\u00C1x",
4657        "x\\u01CEx",
4658        "x\\u01CDx",
4659        "x\\u00E0x",
4660        "x\\u00C0x",
4661        "xax",
4662        "xAx"
4663    };
4664
4665    genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4666    genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
4667    genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
4668    genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
4669}
4670
4671static void TestBeforeTightening(void) {
4672    static const struct {
4673        const char *rules;
4674        UErrorCode expectedStatus;
4675    } tests[] = {
4676        { "&[before 1]a<x", U_ZERO_ERROR },
4677        { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
4678        { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
4679        { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
4680        { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
4681        { "&[before 2]a<<x",U_ZERO_ERROR },
4682        { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
4683        { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
4684        { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
4685        { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
4686        { "&[before 3]a<<<x",U_ZERO_ERROR },
4687        { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
4688        { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
4689    };
4690
4691    int32_t i = 0;
4692
4693    UErrorCode status = U_ZERO_ERROR;
4694    UChar rlz[RULE_BUFFER_LEN] = { 0 };
4695    uint32_t rlen = 0;
4696
4697    UCollator *coll = NULL;
4698
4699
4700    for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4701        rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
4702        coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4703        if(status != tests[i].expectedStatus) {
4704            log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
4705                tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
4706        }
4707        ucol_close(coll);
4708        status = U_ZERO_ERROR;
4709    }
4710
4711}
4712
4713/*
4714&m < a
4715&[before 1] a < x <<< X << q <<< Q < z
4716assert: m <<< M < x <<< X << q <<< Q < z < a < n
4717
4718&m < a
4719&[before 2] a << x <<< X << q <<< Q < z
4720assert: m <<< M < x <<< X << q <<< Q << a < z < n
4721
4722&m < a
4723&[before 3] a <<< x <<< X << q <<< Q < z
4724assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4725
4726
4727&m << a
4728&[before 1] a < x <<< X << q <<< Q < z
4729assert: x <<< X << q <<< Q < z < m <<< M << a < n
4730
4731&m << a
4732&[before 2] a << x <<< X << q <<< Q < z
4733assert: m <<< M << x <<< X << q <<< Q << a < z < n
4734
4735&m << a
4736&[before 3] a <<< x <<< X << q <<< Q < z
4737assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4738
4739
4740&m <<< a
4741&[before 1] a < x <<< X << q <<< Q < z
4742assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4743
4744&m <<< a
4745&[before 2] a << x <<< X << q <<< Q < z
4746assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
4747
4748&m <<< a
4749&[before 3] a <<< x <<< X << q <<< Q < z
4750assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
4751
4752
4753&[before 1] s < x <<< X << q <<< Q < z
4754assert: r <<< R < x <<< X << q <<< Q < z < s < n
4755
4756&[before 2] s << x <<< X << q <<< Q < z
4757assert: r <<< R < x <<< X << q <<< Q << s < z < n
4758
4759&[before 3] s <<< x <<< X << q <<< Q < z
4760assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4761
4762
4763&[before 1] \u24DC < x <<< X << q <<< Q < z
4764assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4765
4766&[before 2] \u24DC << x <<< X << q <<< Q < z
4767assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4768
4769&[before 3] \u24DC <<< x <<< X << q <<< Q < z
4770assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
4771*/
4772
4773
4774#if 0
4775/* requires features not yet supported */
4776static void TestMoreBefore(void) {
4777    static const struct {
4778        const char* rules;
4779        const char* order[16];
4780        int32_t size;
4781    } tests[] = {
4782        { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4783        { "m","M","x","X","q","Q","z","a","n" }, 9},
4784        { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4785        { "m","M","x","X","q","Q","a","z","n" }, 9},
4786        { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4787        { "m","M","x","X","a","q","Q","z","n" }, 9},
4788        { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4789        { "x","X","q","Q","z","m","M","a","n" }, 9},
4790        { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4791        { "m","M","x","X","q","Q","a","z","n" }, 9},
4792        { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4793        { "m","M","x","X","a","q","Q","z","n" }, 9},
4794        { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4795        { "x","X","q","Q","z","n","m","a","M" }, 9},
4796        { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4797        { "x","X","q","Q","m","a","M","z","n" }, 9},
4798        { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4799        { "m","x","X","a","M","q","Q","z","n" }, 9},
4800        { "&[before 1] s < x <<< X << q <<< Q < z",
4801        { "r","R","x","X","q","Q","z","s","n" }, 9},
4802        { "&[before 2] s << x <<< X << q <<< Q < z",
4803        { "r","R","x","X","q","Q","s","z","n" }, 9},
4804        { "&[before 3] s <<< x <<< X << q <<< Q < z",
4805        { "r","R","x","X","s","q","Q","z","n" }, 9},
4806        { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4807        { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4808        { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4809        { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4810        { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4811        { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4812    };
4813
4814    int32_t i = 0;
4815
4816    for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4817        genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
4818    }
4819}
4820#endif
4821
4822static void TestTailorNULL( void ) {
4823    const static char* rule = "&a <<< '\\u0000'";
4824    UErrorCode status = U_ZERO_ERROR;
4825    UChar rlz[RULE_BUFFER_LEN] = { 0 };
4826    uint32_t rlen = 0;
4827    UChar a = 1, null = 0;
4828    UCollationResult res = UCOL_EQUAL;
4829
4830    UCollator *coll = NULL;
4831
4832
4833    rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
4834    coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4835
4836    if(U_FAILURE(status)) {
4837        log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
4838    } else {
4839        res = ucol_strcoll(coll, &a, 1, &null, 1);
4840
4841        if(res != UCOL_LESS) {
4842            log_err("NULL was not tailored properly!\n");
4843        }
4844    }
4845
4846    ucol_close(coll);
4847}
4848
4849static void
4850TestUpperFirstQuaternary(void)
4851{
4852  const char* tests[] = { "B", "b", "Bb", "bB" };
4853  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
4854  UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
4855  genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4856}
4857
4858static void
4859TestJ4960(void)
4860{
4861  const char* tests[] = { "\\u00e2T", "aT" };
4862  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
4863  UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
4864  const char* tests2[] = { "a", "A" };
4865  const char* rule = "&[first tertiary ignorable]=A=a";
4866  UColAttribute att2[] = { UCOL_CASE_LEVEL };
4867  UColAttributeValue attVals2[] = { UCOL_ON };
4868  /* Test whether we correctly ignore primary ignorables on case level when */
4869  /* we have only primary & case level */
4870  genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
4871  /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4872  /* and case level */
4873  genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4874  /* Test whether completely ignorable letters have case level info (they shouldn't) */
4875  genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
4876}
4877
4878static void
4879TestJ5223(void)
4880{
4881  static const char *test = "this is a test string";
4882  UChar ustr[256];
4883  int32_t ustr_length = u_unescape(test, ustr, 256);
4884  unsigned char sortkey[256];
4885  int32_t sortkey_length;
4886  UErrorCode status = U_ZERO_ERROR;
4887  static UCollator *coll = NULL;
4888  coll = ucol_open("root", &status);
4889  if(U_FAILURE(status)) {
4890    log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4891    return;
4892  }
4893  ucol_setStrength(coll, UCOL_PRIMARY);
4894  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4895  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4896  if (U_FAILURE(status)) {
4897    log_err("Failed setting atributes\n");
4898    return;
4899  }
4900  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
4901  if (sortkey_length > 256) return;
4902
4903  /* we mark the position where the null byte should be written in advance */
4904  sortkey[sortkey_length-1] = 0xAA;
4905
4906  /* we set the buffer size one byte higher than needed */
4907  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4908    sortkey_length+1);
4909
4910  /* no error occurs (for me) */
4911  if (sortkey[sortkey_length-1] == 0xAA) {
4912    log_err("Hit bug at first try\n");
4913  }
4914
4915  /* we mark the position where the null byte should be written again */
4916  sortkey[sortkey_length-1] = 0xAA;
4917
4918  /* this time we set the buffer size to the exact amount needed */
4919  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4920    sortkey_length);
4921
4922  /* now the trailing null byte is not written */
4923  if (sortkey[sortkey_length-1] == 0xAA) {
4924    log_err("Hit bug at second try\n");
4925  }
4926
4927  ucol_close(coll);
4928}
4929
4930/* Regression test for Thai partial sort key problem */
4931static void
4932TestJ5232(void)
4933{
4934    const static char *test[] = {
4935        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4936        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4937    };
4938
4939    genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
4940}
4941
4942static void
4943TestJ5367(void)
4944{
4945    const static char *test[] = { "a", "y" };
4946    const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
4947    genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4948}
4949
4950static void
4951TestVI5913(void)
4952{
4953    UErrorCode status = U_ZERO_ERROR;
4954    int32_t i, j;
4955    UCollator *coll =NULL;
4956    uint8_t  resColl[100], expColl[100];
4957    int32_t  rLen, tLen, ruleLen, sLen, kLen;
4958    UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &a<0x1FF3-omega with Ypogegrammeni*/
4959    UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
4960    UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0};  /* &z<a+e with circumflex.*/
4961    static const UChar tData[][20]={
4962        {0x1EAC, 0},
4963        {0x0041, 0x0323, 0x0302, 0},
4964        {0x1EA0, 0x0302, 0},
4965        {0x00C2, 0x0323, 0},
4966        {0x1ED8, 0},  /* O with dot and circumflex */
4967        {0x1ECC, 0x0302, 0},
4968        {0x1EB7, 0},
4969        {0x1EA1, 0x0306, 0},
4970    };
4971    static const UChar tailorData[][20]={
4972        {0x1FA2, 0},  /* Omega with 3 combining marks */
4973        {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4974        {0x1FF3, 0x0313, 0x0300, 0},
4975        {0x1F60, 0x0300, 0x0345, 0},
4976        {0x1F62, 0x0345, 0},
4977        {0x1FA0, 0x0300, 0},
4978    };
4979    static const UChar tailorData2[][20]={
4980        {0x1E63, 0x030C, 0},  /* s with dot below + caron */
4981        {0x0073, 0x0323, 0x030C, 0},
4982        {0x0073, 0x030C, 0x0323, 0},
4983    };
4984    static const UChar tailorData3[][20]={
4985        {0x007a, 0},  /*  z */
4986        {0x0061, 0x0065, 0},  /*  a + e */
4987        {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4988        {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
4989        {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4990        {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
4991        {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
4992        {0x00EA, 0},  /* e with circumflex  */
4993    };
4994
4995    /* Test Vietnamese sort. */
4996    coll = ucol_open("vi", &status);
4997    if(U_FAILURE(status)) {
4998        log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
4999        return;
5000    }
5001    log_verbose("\n\nVI collation:");
5002    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
5003        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
5004    }
5005    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
5006        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
5007    }
5008    if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
5009        log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
5010    }
5011    if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
5012        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
5013    }
5014
5015    for (j=0; j<8; j++) {
5016        tLen = u_strlen(tData[j]);
5017        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
5018        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
5019        for(i = 0; i<rLen; i++) {
5020            log_verbose(" %02X", resColl[i]);
5021        }
5022    }
5023
5024    ucol_close(coll);
5025
5026    /* Test Romanian sort. */
5027    coll = ucol_open("ro", &status);
5028    log_verbose("\n\nRO collation:");
5029    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
5030        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
5031    }
5032    if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
5033        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
5034    }
5035    if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
5036        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
5037    }
5038
5039    for (j=4; j<8; j++) {
5040        tLen = u_strlen(tData[j]);
5041        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
5042        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
5043        for(i = 0; i<rLen; i++) {
5044            log_verbose(" %02X", resColl[i]);
5045        }
5046    }
5047    ucol_close(coll);
5048
5049    /* Test the precomposed Greek character with 3 combining marks. */
5050    log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
5051    ruleLen = u_strlen(rule);
5052    coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5053    if (U_FAILURE(status)) {
5054        log_err("ucol_openRules failed with %s\n", u_errorName(status));
5055        return;
5056    }
5057    sLen = u_strlen(tailorData[0]);
5058    for (j=1; j<6; j++) {
5059        tLen = u_strlen(tailorData[j]);
5060        if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
5061            log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
5062        }
5063    }
5064    /* Test getSortKey. */
5065    tLen = u_strlen(tailorData[0]);
5066    kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
5067    for (j=0; j<6; j++) {
5068        tLen = u_strlen(tailorData[j]);
5069        rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
5070        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5071            log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5072            for(i = 0; i<rLen; i++) {
5073                log_err(" %02X", resColl[i]);
5074            }
5075        }
5076    }
5077    ucol_close(coll);
5078
5079    log_verbose("\n\nTailoring test for s with caron:");
5080    ruleLen = u_strlen(rule2);
5081    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5082    tLen = u_strlen(tailorData2[0]);
5083    kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
5084    for (j=1; j<3; j++) {
5085        tLen = u_strlen(tailorData2[j]);
5086        rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
5087        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5088            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5089            for(i = 0; i<rLen; i++) {
5090                log_err(" %02X", resColl[i]);
5091            }
5092        }
5093    }
5094    ucol_close(coll);
5095
5096    log_verbose("\n\nTailoring test for &z< ae with circumflex:");
5097    ruleLen = u_strlen(rule3);
5098    coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5099    tLen = u_strlen(tailorData3[3]);
5100    kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
5101    for (j=4; j<6; j++) {
5102        tLen = u_strlen(tailorData3[j]);
5103        rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
5104
5105        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5106            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5107            for(i = 0; i<rLen; i++) {
5108                log_err(" %02X", resColl[i]);
5109            }
5110        }
5111
5112        log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5113         for(i = 0; i<rLen; i++) {
5114             log_verbose(" %02X", resColl[i]);
5115         }
5116    }
5117    ucol_close(coll);
5118}
5119
5120static void
5121TestTailor6179(void)
5122{
5123    UErrorCode status = U_ZERO_ERROR;
5124    int32_t i;
5125    UCollator *coll =NULL;
5126    uint8_t  resColl[100];
5127    int32_t  rLen, tLen, ruleLen;
5128    /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
5129    static const UChar rule1[]={
5130            0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
5131            0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
5132            0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
5133            0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
5134    /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
5135    static const UChar rule2[]={
5136            0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
5137            0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
5138            0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
5139            0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
5140            0x3C,0x3C,0x20,0x62,0};
5141
5142    static const UChar tData1[][4]={
5143        {0x61, 0},
5144        {0x62, 0},
5145        { 0xFDD0,0x009E, 0}
5146    };
5147    static const UChar tData2[][4]={
5148        {0x61, 0},
5149        {0x62, 0},
5150        { 0xFDD0,0x009E, 0}
5151     };
5152
5153    /*
5154     * These values from FractionalUCA.txt will change,
5155     * and need to be updated here.
5156     */
5157    static const uint8_t firstPrimaryIgnCE[]={1, 0x88, 1, 5, 0};
5158    static const uint8_t lastPrimaryIgnCE[]={1, 0xE3, 1, 5, 0};
5159    static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
5160    static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
5161
5162    /* Test [Last Primary ignorable] */
5163
5164    log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
5165    ruleLen = u_strlen(rule1);
5166    coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5167    if (U_FAILURE(status)) {
5168        log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
5169        return;
5170    }
5171    tLen = u_strlen(tData1[0]);
5172    rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
5173    if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
5174        log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
5175        for(i = 0; i<rLen; i++) {
5176            log_err(" %02X", resColl[i]);
5177        }
5178        log_err("\n");
5179    }
5180    tLen = u_strlen(tData1[1]);
5181    rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
5182    if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
5183        log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
5184        for(i = 0; i<rLen; i++) {
5185            log_err(" %02X", resColl[i]);
5186        }
5187        log_err("\n");
5188    }
5189    ucol_close(coll);
5190
5191
5192    /* Test [Last Secondary ignorable] */
5193    log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
5194    ruleLen = u_strlen(rule1);
5195    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5196    if (U_FAILURE(status)) {
5197        log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
5198        return;
5199    }
5200    tLen = u_strlen(tData2[0]);
5201    rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
5202    if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
5203        log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
5204        for(i = 0; i<rLen; i++) {
5205            log_err(" %02X", resColl[i]);
5206        }
5207        log_err("\n");
5208    }
5209    if(!log_knownIssue("8982", "debug and fix")) { /* TODO: debug & fix, see ticket #8982 */
5210      tLen = u_strlen(tData2[1]);
5211      rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
5212      if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
5213        log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
5214        for(i = 0; i<rLen; i++) {
5215          log_err(" %02X", resColl[i]);
5216        }
5217        log_err("\n");
5218      }
5219    }
5220    ucol_close(coll);
5221}
5222
5223static void
5224TestUCAPrecontext(void)
5225{
5226    UErrorCode status = U_ZERO_ERROR;
5227    int32_t i, j;
5228    UCollator *coll =NULL;
5229    uint8_t  resColl[100], prevColl[100];
5230    int32_t  rLen, tLen, ruleLen;
5231    UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
5232    UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
5233    /* & l middle-dot << a  a is an expansion. */
5234
5235    UChar tData1[][20]={
5236            { 0xb7, 0},  /* standalone middle dot(0xb7) */
5237            { 0x387, 0}, /* standalone middle dot(0x387) */
5238            { 0x61, 0},  /* a */
5239            { 0x6C, 0},  /* l */
5240            { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
5241            { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
5242            { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
5243            { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
5244            { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
5245            { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
5246            { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
5247     };
5248
5249    log_verbose("\n\nEN collation:");
5250    coll = ucol_open("en", &status);
5251    if (U_FAILURE(status)) {
5252        log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
5253        return;
5254    }
5255    for (j=0; j<11; j++) {
5256        tLen = u_strlen(tData1[j]);
5257        rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5258        if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5259            log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5260                    j, tData1[j]);
5261        }
5262        log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5263        for(i = 0; i<rLen; i++) {
5264            log_verbose(" %02X", resColl[i]);
5265        }
5266        uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5267     }
5268     ucol_close(coll);
5269
5270
5271     log_verbose("\n\nJA collation:");
5272     coll = ucol_open("ja", &status);
5273     if (U_FAILURE(status)) {
5274         log_err("Tailoring test: &z <<a|- failed!");
5275         return;
5276     }
5277     for (j=0; j<11; j++) {
5278         tLen = u_strlen(tData1[j]);
5279         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5280         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5281             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5282                     j, tData1[j]);
5283         }
5284         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5285         for(i = 0; i<rLen; i++) {
5286             log_verbose(" %02X", resColl[i]);
5287         }
5288         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5289      }
5290      ucol_close(coll);
5291
5292
5293      log_verbose("\n\nTailoring test: & middle dot < a ");
5294      ruleLen = u_strlen(rule1);
5295      coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5296      if (U_FAILURE(status)) {
5297          log_err("Tailoring test: & middle dot < a failed!");
5298          return;
5299      }
5300      for (j=0; j<11; j++) {
5301          tLen = u_strlen(tData1[j]);
5302          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5303          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5304              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5305                      j, tData1[j]);
5306          }
5307          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5308          for(i = 0; i<rLen; i++) {
5309              log_verbose(" %02X", resColl[i]);
5310          }
5311          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5312       }
5313       ucol_close(coll);
5314
5315
5316       log_verbose("\n\nTailoring test: & l middle-dot << a ");
5317       ruleLen = u_strlen(rule2);
5318       coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5319       if (U_FAILURE(status)) {
5320           log_err("Tailoring test: & l middle-dot << a failed!");
5321           return;
5322       }
5323       for (j=0; j<11; j++) {
5324           tLen = u_strlen(tData1[j]);
5325           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5326           if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5327               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5328                       j, tData1[j]);
5329           }
5330           if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
5331               log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
5332                       j, tData1[j]);
5333           }
5334           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5335           for(i = 0; i<rLen; i++) {
5336               log_verbose(" %02X", resColl[i]);
5337           }
5338           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5339        }
5340        ucol_close(coll);
5341}
5342
5343static void
5344TestOutOfBuffer5468(void)
5345{
5346    static const char *test = "\\u4e00";
5347    UChar ustr[256];
5348    int32_t ustr_length = u_unescape(test, ustr, 256);
5349    unsigned char shortKeyBuf[1];
5350    int32_t sortkey_length;
5351    UErrorCode status = U_ZERO_ERROR;
5352    static UCollator *coll = NULL;
5353
5354    coll = ucol_open("root", &status);
5355    if(U_FAILURE(status)) {
5356      log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
5357      return;
5358    }
5359    ucol_setStrength(coll, UCOL_PRIMARY);
5360    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
5361    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5362    if (U_FAILURE(status)) {
5363      log_err("Failed setting atributes\n");
5364      return;
5365    }
5366
5367    sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
5368    if (sortkey_length != 4) {
5369        log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
5370    }
5371    log_verbose("length of sortKey is %d", sortkey_length);
5372    ucol_close(coll);
5373}
5374
5375#define TSKC_DATA_SIZE 5
5376#define TSKC_BUF_SIZE  50
5377static void
5378TestSortKeyConsistency(void)
5379{
5380    UErrorCode icuRC = U_ZERO_ERROR;
5381    UCollator* ucol;
5382    UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
5383
5384    uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5385    uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5386    int32_t i, j, i2;
5387
5388    ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
5389    if (U_FAILURE(icuRC))
5390    {
5391        log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
5392        return;
5393    }
5394
5395    for (i = 0; i < TSKC_DATA_SIZE; i++)
5396    {
5397        UCharIterator uiter;
5398        uint32_t state[2] = { 0, 0 };
5399        int32_t dataLen = i+1;
5400        for (j=0; j<TSKC_BUF_SIZE; j++)
5401            bufFull[i][j] = bufPart[i][j] = 0;
5402
5403        /* Full sort key */
5404        ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
5405
5406        /* Partial sort key */
5407        uiter_setString(&uiter, data, dataLen);
5408        ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
5409        if (U_FAILURE(icuRC))
5410        {
5411            log_err("ucol_nextSortKeyPart failed\n");
5412            ucol_close(ucol);
5413            return;
5414        }
5415
5416        for (i2=0; i2<i; i2++)
5417        {
5418            UBool fullMatch = TRUE;
5419            UBool partMatch = TRUE;
5420            for (j=0; j<TSKC_BUF_SIZE; j++)
5421            {
5422                fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
5423                partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
5424            }
5425            if (fullMatch != partMatch) {
5426                log_err(fullMatch ? "full key was consistent, but partial key changed\n"
5427                                  : "partial key was consistent, but full key changed\n");
5428                ucol_close(ucol);
5429                return;
5430            }
5431        }
5432    }
5433
5434    /*=============================================*/
5435   ucol_close(ucol);
5436}
5437
5438/* ticket: 6101 */
5439static void TestCroatianSortKey(void) {
5440    const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
5441    UErrorCode status = U_ZERO_ERROR;
5442    UCollator *ucol;
5443    UCharIterator iter;
5444
5445    static const UChar text[] = { 0x0044, 0xD81A };
5446
5447    size_t length = sizeof(text)/sizeof(*text);
5448
5449    uint8_t textSortKey[32];
5450    size_t lenSortKey = 32;
5451    size_t actualSortKeyLen;
5452    uint32_t uStateInfo[2] = { 0, 0 };
5453
5454    ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
5455    if (U_FAILURE(status)) {
5456        log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
5457        return;
5458    }
5459
5460    uiter_setString(&iter, text, length);
5461
5462    actualSortKeyLen = ucol_nextSortKeyPart(
5463        ucol, &iter, (uint32_t*)uStateInfo,
5464        textSortKey, lenSortKey, &status
5465        );
5466
5467    if (actualSortKeyLen == lenSortKey) {
5468        log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
5469    }
5470
5471    ucol_close(ucol);
5472}
5473
5474/* ticket: 6140 */
5475/* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
5476 * they are both Hiragana and Katakana
5477 */
5478#define SORTKEYLEN 50
5479static void TestHiragana(void) {
5480    UErrorCode status = U_ZERO_ERROR;
5481    UCollator* ucol;
5482    UCollationResult strcollresult;
5483    UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
5484    UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
5485    int32_t data1Len = sizeof(data1)/sizeof(*data1);
5486    int32_t data2Len = sizeof(data2)/sizeof(*data2);
5487    int32_t i, j;
5488    uint8_t sortKey1[SORTKEYLEN];
5489    uint8_t sortKey2[SORTKEYLEN];
5490
5491    UCharIterator uiter1;
5492    UCharIterator uiter2;
5493    uint32_t state1[2] = { 0, 0 };
5494    uint32_t state2[2] = { 0, 0 };
5495    int32_t keySize1;
5496    int32_t keySize2;
5497
5498    ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
5499            &status);
5500    if (U_FAILURE(status)) {
5501        log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
5502        return;
5503    }
5504
5505    /* Start of full sort keys */
5506    /* Full sort key1 */
5507    keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
5508    /* Full sort key2 */
5509    keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
5510    if (keySize1 == keySize2) {
5511        for (i = 0; i < keySize1; i++) {
5512            if (sortKey1[i] != sortKey2[i]) {
5513                log_err("Full sort keys are different. Should be equal.");
5514            }
5515        }
5516    } else {
5517        log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
5518    }
5519    /* End of full sort keys */
5520
5521    /* Start of partial sort keys */
5522    /* Partial sort key1 */
5523    uiter_setString(&uiter1, data1, data1Len);
5524    keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
5525    /* Partial sort key2 */
5526    uiter_setString(&uiter2, data2, data2Len);
5527    keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
5528    if (U_SUCCESS(status) && keySize1 == keySize2) {
5529        for (j = 0; j < keySize1; j++) {
5530            if (sortKey1[j] != sortKey2[j]) {
5531                log_err("Partial sort keys are different. Should be equal");
5532            }
5533        }
5534    } else {
5535        log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
5536    }
5537    /* End of partial sort keys */
5538
5539    /* Start of strcoll */
5540    /* Use ucol_strcoll() to determine ordering */
5541    strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
5542    if (strcollresult != UCOL_EQUAL) {
5543        log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
5544    }
5545
5546    ucol_close(ucol);
5547}
5548
5549/* Convenient struct for running collation tests */
5550typedef struct {
5551  const UChar source[MAX_TOKEN_LEN];  /* String on left */
5552  const UChar target[MAX_TOKEN_LEN];  /* String on right */
5553  UCollationResult result;            /* -1, 0 or +1, depending on collation */
5554} OneTestCase;
5555
5556/*
5557 * Utility function to test one collation test case.
5558 * @param testcases Array of test cases.
5559 * @param n_testcases Size of the array testcases.
5560 * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
5561 * @param n_rules Size of the array str_rules.
5562 */
5563static void doTestOneTestCase(const OneTestCase testcases[],
5564                              int n_testcases,
5565                              const char* str_rules[],
5566                              int n_rules)
5567{
5568  int rule_no, testcase_no;
5569  UChar rule[500];
5570  int32_t length = 0;
5571  UErrorCode status = U_ZERO_ERROR;
5572  UParseError parse_error;
5573  UCollator  *myCollation;
5574
5575  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5576
5577    length = u_unescape(str_rules[rule_no], rule, 500);
5578    if (length == 0) {
5579        log_err("ERROR: The rule cannot be unescaped: %s\n");
5580        return;
5581    }
5582    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5583    if(U_FAILURE(status)){
5584        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5585        return;
5586    }
5587    log_verbose("Testing the <<* syntax\n");
5588    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5589    ucol_setStrength(myCollation, UCOL_TERTIARY);
5590    for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
5591      doTest(myCollation,
5592             testcases[testcase_no].source,
5593             testcases[testcase_no].target,
5594             testcases[testcase_no].result
5595             );
5596    }
5597    ucol_close(myCollation);
5598  }
5599}
5600
5601const static OneTestCase rangeTestcases[] = {
5602  { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
5603  { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
5604  { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
5605
5606  { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
5607  { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
5608  { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
5609  { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
5610  { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
5611
5612  { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
5613  { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
5614  { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
5615  { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
5616
5617  { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
5618  { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
5619  { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
5620  { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
5621  { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
5622  { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
5623  { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
5624  { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
5625};
5626
5627static int nRangeTestcases = LEN(rangeTestcases);
5628
5629const static OneTestCase rangeTestcasesSupplemental[] = {
5630  { {0xfffe},                            {0xffff},                          UCOL_LESS }, /* U+FFFE < U+FFFF */
5631  { {0xffff},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFF < U+10000 */
5632  { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
5633  { {0xfffe},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+FFFE < U+10001 */
5634  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
5635  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
5636  { {0xfffe},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+FFFE < U+10001 */
5637};
5638
5639static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
5640
5641const static OneTestCase rangeTestcasesQwerty[] = {
5642  { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
5643  { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
5644
5645  { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
5646  { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
5647
5648  { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
5649  { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
5650
5651  { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
5652  { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
5653
5654  { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
5655    {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
5656  { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
5657    {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
5658};
5659
5660static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
5661
5662static void TestSameStrengthList(void)
5663{
5664  const char* strRules[] = {
5665    /* Normal */
5666    "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
5667
5668    /* Lists */
5669    "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
5670  };
5671  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5672}
5673
5674static void TestSameStrengthListQuoted(void)
5675{
5676  const char* strRules[] = {
5677    /* Lists with quoted characters */
5678    "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
5679    "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
5680
5681    "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
5682    "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
5683
5684    "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
5685    "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
5686  };
5687  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5688}
5689
5690static void TestSameStrengthListSupplemental(void)
5691{
5692  const char* strRules[] = {
5693    "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
5694    "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
5695    "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
5696    "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
5697  };
5698  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5699}
5700
5701static void TestSameStrengthListQwerty(void)
5702{
5703  const char* strRules[] = {
5704    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
5705    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
5706    "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
5707    "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
5708    "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
5709
5710    /* Quoted characters also will work if two quoted characters are not consecutive.  */
5711    "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
5712
5713    /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
5714    /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
5715
5716 };
5717  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5718}
5719
5720static void TestSameStrengthListQuotedQwerty(void)
5721{
5722  const char* strRules[] = {
5723    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
5724    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
5725    "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
5726
5727    /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
5728    /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
5729   };
5730  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5731}
5732
5733static void TestSameStrengthListRanges(void)
5734{
5735  const char* strRules[] = {
5736    "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
5737  };
5738  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5739}
5740
5741static void TestSameStrengthListSupplementalRanges(void)
5742{
5743  const char* strRules[] = {
5744    "&\\ufffe<*\\uffff-\\U00010002",
5745  };
5746  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5747}
5748
5749static void TestSpecialCharacters(void)
5750{
5751  const char* strRules[] = {
5752    /* Normal */
5753    "&';'<'+'<','<'-'<'&'<'*'",
5754
5755    /* List */
5756    "&';'<*'+,-&*'",
5757
5758    /* Range */
5759    "&';'<*'+'-'-&*'",
5760  };
5761
5762  const static OneTestCase specialCharacterStrings[] = {
5763    { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
5764    { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
5765    { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
5766    { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
5767  };
5768  doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
5769}
5770
5771static void TestPrivateUseCharacters(void)
5772{
5773  const char* strRules[] = {
5774    /* Normal */
5775    "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
5776    "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
5777  };
5778
5779  const static OneTestCase privateUseCharacterStrings[] = {
5780    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5781    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5782    { {0xe2d9}, {0xe2da}, UCOL_LESS },
5783    { {0xe2da}, {0xe2db}, UCOL_LESS },
5784    { {0xe2db}, {0xe2dc}, UCOL_LESS },
5785    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5786  };
5787  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5788}
5789
5790static void TestPrivateUseCharactersInList(void)
5791{
5792  const char* strRules[] = {
5793    /* List */
5794    "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
5795    /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
5796    "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
5797  };
5798
5799  const static OneTestCase privateUseCharacterStrings[] = {
5800    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5801    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5802    { {0xe2d9}, {0xe2da}, UCOL_LESS },
5803    { {0xe2da}, {0xe2db}, UCOL_LESS },
5804    { {0xe2db}, {0xe2dc}, UCOL_LESS },
5805    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5806  };
5807  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5808}
5809
5810static void TestPrivateUseCharactersInRange(void)
5811{
5812  const char* strRules[] = {
5813    /* Range */
5814    "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
5815    "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
5816    /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
5817  };
5818
5819  const static OneTestCase privateUseCharacterStrings[] = {
5820    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5821    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5822    { {0xe2d9}, {0xe2da}, UCOL_LESS },
5823    { {0xe2da}, {0xe2db}, UCOL_LESS },
5824    { {0xe2db}, {0xe2dc}, UCOL_LESS },
5825    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5826  };
5827  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5828}
5829
5830static void TestInvalidListsAndRanges(void)
5831{
5832  const char* invalidRules[] = {
5833    /* Range not in starred expression */
5834    "&\\ufffe<\\uffff-\\U00010002",
5835
5836    /* Range without start */
5837    "&a<*-c",
5838
5839    /* Range without end */
5840    "&a<*b-",
5841
5842    /* More than one hyphen */
5843    "&a<*b-g-l",
5844
5845    /* Range in the wrong order */
5846    "&a<*k-b",
5847
5848  };
5849
5850  UChar rule[500];
5851  UErrorCode status = U_ZERO_ERROR;
5852  UParseError parse_error;
5853  int n_rules = LEN(invalidRules);
5854  int rule_no;
5855  int length;
5856  UCollator  *myCollation;
5857
5858  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5859
5860    length = u_unescape(invalidRules[rule_no], rule, 500);
5861    if (length == 0) {
5862        log_err("ERROR: The rule cannot be unescaped: %s\n");
5863        return;
5864    }
5865    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5866    (void)myCollation;      /* Suppress set but not used warning. */
5867    if(!U_FAILURE(status)){
5868      log_err("ERROR: Could not cause a failure as expected: \n");
5869    }
5870    status = U_ZERO_ERROR;
5871  }
5872}
5873
5874/*
5875 * This test ensures that characters placed before a character in a different script have the same lead byte
5876 * in their collation key before and after script reordering.
5877 */
5878static void TestBeforeRuleWithScriptReordering(void)
5879{
5880    UParseError error;
5881    UErrorCode status = U_ZERO_ERROR;
5882    UCollator  *myCollation;
5883    char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
5884    UChar rules[500];
5885    uint32_t rulesLength = 0;
5886    int32_t reorderCodes[1] = {USCRIPT_GREEK};
5887    UCollationResult collResult;
5888
5889    uint8_t baseKey[256];
5890    uint32_t baseKeyLength;
5891    uint8_t beforeKey[256];
5892    uint32_t beforeKeyLength;
5893
5894    UChar base[] = { 0x03b1 }; /* base */
5895    int32_t baseLen = sizeof(base)/sizeof(*base);
5896
5897    UChar before[] = { 0x0e01 }; /* ko kai */
5898    int32_t beforeLen = sizeof(before)/sizeof(*before);
5899
5900    /*UChar *data[] = { before, base };
5901    genericRulesStarter(srules, data, 2);*/
5902
5903    log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
5904
5905    (void)beforeKeyLength;   /* Suppress set but not used warnings. */
5906    (void)baseKeyLength;
5907
5908    /* build collator */
5909    log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
5910
5911    rulesLength = u_unescape(srules, rules, LEN(rules));
5912    myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5913    if(U_FAILURE(status)) {
5914        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5915        return;
5916    }
5917
5918    /* check collation results - before rule applied but not script reordering */
5919    collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5920    if (collResult != UCOL_GREATER) {
5921        log_err("Collation result not correct before script reordering = %d\n", collResult);
5922    }
5923
5924    /* check the lead byte of the collation keys before script reordering */
5925    baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5926    beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5927    if (baseKey[0] != beforeKey[0]) {
5928      log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5929   }
5930
5931    /* reorder the scripts */
5932    ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
5933    if(U_FAILURE(status)) {
5934        log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5935        return;
5936    }
5937
5938    /* check collation results - before rule applied and after script reordering */
5939    collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5940    if (collResult != UCOL_GREATER) {
5941        log_err("Collation result not correct after script reordering = %d\n", collResult);
5942    }
5943
5944    /* check the lead byte of the collation keys after script reordering */
5945    ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5946    ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5947    if (baseKey[0] != beforeKey[0]) {
5948        log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5949    }
5950
5951    ucol_close(myCollation);
5952}
5953
5954/*
5955 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
5956 */
5957static void TestNonLeadBytesDuringCollationReordering(void)
5958{
5959    UErrorCode status = U_ZERO_ERROR;
5960    UCollator  *myCollation;
5961    int32_t reorderCodes[1] = {USCRIPT_GREEK};
5962
5963    uint8_t baseKey[256];
5964    uint32_t baseKeyLength;
5965    uint8_t reorderKey[256];
5966    uint32_t reorderKeyLength;
5967
5968    UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
5969
5970    uint32_t i;
5971
5972
5973    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5974
5975    /* build collator tertiary */
5976    myCollation = ucol_open("", &status);
5977    ucol_setStrength(myCollation, UCOL_TERTIARY);
5978    if(U_FAILURE(status)) {
5979        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5980        return;
5981    }
5982    baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5983
5984    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5985    if(U_FAILURE(status)) {
5986        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5987        return;
5988    }
5989    reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5990
5991    if (baseKeyLength != reorderKeyLength) {
5992        log_err("Key lengths not the same during reordering.\n");
5993        return;
5994    }
5995
5996    for (i = 1; i < baseKeyLength; i++) {
5997        if (baseKey[i] != reorderKey[i]) {
5998            log_err("Collation key bytes not the same at position %d.\n", i);
5999            return;
6000        }
6001    }
6002    ucol_close(myCollation);
6003
6004    /* build collator quaternary */
6005    myCollation = ucol_open("", &status);
6006    ucol_setStrength(myCollation, UCOL_QUATERNARY);
6007    if(U_FAILURE(status)) {
6008        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6009        return;
6010    }
6011    baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
6012
6013    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6014    if(U_FAILURE(status)) {
6015        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6016        return;
6017    }
6018    reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
6019
6020    if (baseKeyLength != reorderKeyLength) {
6021        log_err("Key lengths not the same during reordering.\n");
6022        return;
6023    }
6024
6025    for (i = 1; i < baseKeyLength; i++) {
6026        if (baseKey[i] != reorderKey[i]) {
6027            log_err("Collation key bytes not the same at position %d.\n", i);
6028            return;
6029        }
6030    }
6031    ucol_close(myCollation);
6032}
6033
6034/*
6035 * Test reordering API.
6036 */
6037static void TestReorderingAPI(void)
6038{
6039    UErrorCode status = U_ZERO_ERROR;
6040    UCollator  *myCollation;
6041    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6042    int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS};
6043    int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6044    UCollationResult collResult;
6045    int32_t retrievedReorderCodesLength;
6046    int32_t retrievedReorderCodes[10];
6047    UChar greekString[] = { 0x03b1 };
6048    UChar punctuationString[] = { 0x203e };
6049    int loopIndex;
6050
6051    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6052
6053    /* build collator tertiary */
6054    myCollation = ucol_open("", &status);
6055    ucol_setStrength(myCollation, UCOL_TERTIARY);
6056    if(U_FAILURE(status)) {
6057        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6058        return;
6059    }
6060
6061    /* set the reorderding */
6062    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6063    if (U_FAILURE(status)) {
6064        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6065        return;
6066    }
6067
6068    /* get the reordering */
6069    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6070    if (status != U_BUFFER_OVERFLOW_ERROR) {
6071        log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
6072        return;
6073    }
6074    status = U_ZERO_ERROR;
6075    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6076        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6077        return;
6078    }
6079    /* now let's really get it */
6080    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6081    if (U_FAILURE(status)) {
6082        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6083        return;
6084    }
6085    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6086        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6087        return;
6088    }
6089    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6090        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6091            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6092            return;
6093        }
6094    }
6095    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6096    if (collResult != UCOL_LESS) {
6097        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6098        return;
6099    }
6100
6101    /* clear the reordering */
6102    ucol_setReorderCodes(myCollation, NULL, 0, &status);
6103    if (U_FAILURE(status)) {
6104        log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
6105        return;
6106    }
6107
6108    /* get the reordering again */
6109    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6110    if (retrievedReorderCodesLength != 0) {
6111        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6112        return;
6113    }
6114
6115    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6116    if (collResult != UCOL_GREATER) {
6117        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6118        return;
6119    }
6120
6121    /* test for error condition on duplicate reorder codes */
6122    ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
6123    if (!U_FAILURE(status)) {
6124        log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
6125        return;
6126    }
6127
6128    status = U_ZERO_ERROR;
6129    /* test for reorder codes after a reset code */
6130    ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status);
6131    if (!U_FAILURE(status)) {
6132        log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
6133        return;
6134    }
6135
6136    ucol_close(myCollation);
6137}
6138
6139/*
6140 * Test reordering API.
6141 */
6142static void TestReorderingAPIWithRuleCreatedCollator(void)
6143{
6144    UErrorCode status = U_ZERO_ERROR;
6145    UCollator  *myCollation;
6146    UChar rules[90];
6147    static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
6148    static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6149    static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};
6150    UCollationResult collResult;
6151    int32_t retrievedReorderCodesLength;
6152    int32_t retrievedReorderCodes[10];
6153    static const UChar greekString[] = { 0x03b1 };
6154    static const UChar punctuationString[] = { 0x203e };
6155    static const UChar hanString[] = { 0x65E5, 0x672C };
6156    int loopIndex;
6157
6158    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6159
6160    /* build collator from rules */
6161    u_uastrcpy(rules, "[reorder Hani Grek]");
6162    myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
6163    if(U_FAILURE(status)) {
6164        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6165        return;
6166    }
6167
6168    /* get the reordering */
6169    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6170    if (U_FAILURE(status)) {
6171        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6172        return;
6173    }
6174    if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
6175        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
6176        return;
6177    }
6178    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6179        if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
6180            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6181            return;
6182        }
6183    }
6184    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString));
6185    if (collResult != UCOL_GREATER) {
6186        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6187        return;
6188    }
6189
6190    /* set the reordering */
6191    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6192    if (U_FAILURE(status)) {
6193        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6194        return;
6195    }
6196
6197    /* get the reordering */
6198    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6199    if (status != U_BUFFER_OVERFLOW_ERROR) {
6200        log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
6201        return;
6202    }
6203    status = U_ZERO_ERROR;
6204    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6205        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6206        return;
6207    }
6208    /* now let's really get it */
6209    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6210    if (U_FAILURE(status)) {
6211        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6212        return;
6213    }
6214    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6215        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6216        return;
6217    }
6218    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6219        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6220            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6221            return;
6222        }
6223    }
6224    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6225    if (collResult != UCOL_LESS) {
6226        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6227        return;
6228    }
6229
6230    /* clear the reordering */
6231    ucol_setReorderCodes(myCollation, NULL, 0, &status);
6232    if (U_FAILURE(status)) {
6233        log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
6234        return;
6235    }
6236
6237    /* get the reordering again */
6238    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6239    if (retrievedReorderCodesLength != 0) {
6240        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6241        return;
6242    }
6243
6244    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6245    if (collResult != UCOL_GREATER) {
6246        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6247        return;
6248    }
6249
6250    /* reset the reordering */
6251    ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);
6252    if (U_FAILURE(status)) {
6253        log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n", myErrorName(status));
6254        return;
6255    }
6256    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6257    if (U_FAILURE(status)) {
6258        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6259        return;
6260    }
6261    if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
6262        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
6263        return;
6264    }
6265    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6266        if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
6267            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6268            return;
6269        }
6270    }
6271
6272    ucol_close(myCollation);
6273}
6274
6275static int compareUScriptCodes(const void * a, const void * b)
6276{
6277  return ( *(int32_t*)a - *(int32_t*)b );
6278}
6279
6280static void TestEquivalentReorderingScripts(void) {
6281    UErrorCode status = U_ZERO_ERROR;
6282    int32_t equivalentScripts[50];
6283    int32_t equivalentScriptsLength;
6284    int loopIndex;
6285    int32_t equivalentScriptsResult[] = {
6286        USCRIPT_BOPOMOFO,
6287        USCRIPT_LISU,
6288        USCRIPT_LYCIAN,
6289        USCRIPT_CARIAN,
6290        USCRIPT_LYDIAN,
6291        USCRIPT_YI,
6292        USCRIPT_OLD_ITALIC,
6293        USCRIPT_GOTHIC,
6294        USCRIPT_DESERET,
6295        USCRIPT_SHAVIAN,
6296        USCRIPT_OSMANYA,
6297        USCRIPT_LINEAR_B,
6298        USCRIPT_CYPRIOT,
6299        USCRIPT_OLD_SOUTH_ARABIAN,
6300        USCRIPT_AVESTAN,
6301        USCRIPT_IMPERIAL_ARAMAIC,
6302        USCRIPT_INSCRIPTIONAL_PARTHIAN,
6303        USCRIPT_INSCRIPTIONAL_PAHLAVI,
6304        USCRIPT_UGARITIC,
6305        USCRIPT_OLD_PERSIAN,
6306        USCRIPT_CUNEIFORM,
6307        USCRIPT_EGYPTIAN_HIEROGLYPHS,
6308        USCRIPT_PHONETIC_POLLARD,
6309        USCRIPT_SORA_SOMPENG,
6310        USCRIPT_MEROITIC_CURSIVE,
6311        USCRIPT_MEROITIC_HIEROGLYPHS
6312    };
6313
6314    qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes);
6315
6316    /* UScript.GOTHIC */
6317    equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
6318    if (U_FAILURE(status)) {
6319        log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
6320        return;
6321    }
6322    /*
6323    fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
6324    fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength);
6325    for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6326        fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
6327    }
6328    */
6329    if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
6330        log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
6331        return;
6332    }
6333    for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6334        if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
6335            log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
6336            return;
6337        }
6338    }
6339
6340    /* UScript.SHAVIAN */
6341    equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status);
6342    if (U_FAILURE(status)) {
6343        log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
6344        return;
6345    }
6346    if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
6347        log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
6348        return;
6349    }
6350    for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6351        if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
6352            log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
6353            return;
6354        }
6355    }
6356}
6357
6358static void TestReorderingAcrossCloning(void)
6359{
6360    UErrorCode status = U_ZERO_ERROR;
6361    UCollator  *myCollation;
6362    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6363    UCollator *clonedCollation;
6364    int32_t retrievedReorderCodesLength;
6365    int32_t retrievedReorderCodes[10];
6366    int loopIndex;
6367
6368    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6369
6370    /* build collator tertiary */
6371    myCollation = ucol_open("", &status);
6372    ucol_setStrength(myCollation, UCOL_TERTIARY);
6373    if(U_FAILURE(status)) {
6374        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6375        return;
6376    }
6377
6378    /* set the reorderding */
6379    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6380    if (U_FAILURE(status)) {
6381        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6382        return;
6383    }
6384
6385    /* clone the collator */
6386    clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);
6387    if (U_FAILURE(status)) {
6388        log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
6389        return;
6390    }
6391
6392    /* get the reordering */
6393    retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6394    if (U_FAILURE(status)) {
6395        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6396        return;
6397    }
6398    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6399        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6400        return;
6401    }
6402    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6403        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6404            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6405            return;
6406        }
6407    }
6408
6409    /*uprv_free(buffer);*/
6410    ucol_close(myCollation);
6411    ucol_close(clonedCollation);
6412}
6413
6414/*
6415 * Utility function to test one collation reordering test case set.
6416 * @param testcases Array of test cases.
6417 * @param n_testcases Size of the array testcases.
6418 * @param reorderTokens Array of reordering codes.
6419 * @param reorderTokensLen Size of the array reorderTokens.
6420 */
6421static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
6422{
6423    uint32_t testCaseNum;
6424    UErrorCode status = U_ZERO_ERROR;
6425    UCollator  *myCollation;
6426
6427    myCollation = ucol_open("", &status);
6428    if (U_FAILURE(status)) {
6429        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6430        return;
6431    }
6432    ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
6433    if(U_FAILURE(status)) {
6434        log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
6435        return;
6436    }
6437
6438    for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
6439        doTest(myCollation,
6440            testCases[testCaseNum].source,
6441            testCases[testCaseNum].target,
6442            testCases[testCaseNum].result
6443        );
6444    }
6445    ucol_close(myCollation);
6446}
6447
6448static void TestGreekFirstReorder(void)
6449{
6450    const char* strRules[] = {
6451        "[reorder Grek]"
6452    };
6453
6454    const int32_t apiRules[] = {
6455        USCRIPT_GREEK
6456    };
6457
6458    const static OneTestCase privateUseCharacterStrings[] = {
6459        { {0x0391}, {0x0391}, UCOL_EQUAL },
6460        { {0x0041}, {0x0391}, UCOL_GREATER },
6461        { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
6462        { {0x0060}, {0x0391}, UCOL_LESS },
6463        { {0x0391}, {0xe2dc}, UCOL_LESS },
6464        { {0x0391}, {0x0060}, UCOL_GREATER },
6465    };
6466
6467    /* Test rules creation */
6468    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6469
6470    /* Test collation reordering API */
6471    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6472}
6473
6474static void TestGreekLastReorder(void)
6475{
6476    const char* strRules[] = {
6477        "[reorder Zzzz Grek]"
6478    };
6479
6480    const int32_t apiRules[] = {
6481        USCRIPT_UNKNOWN, USCRIPT_GREEK
6482    };
6483
6484    const static OneTestCase privateUseCharacterStrings[] = {
6485        { {0x0391}, {0x0391}, UCOL_EQUAL },
6486        { {0x0041}, {0x0391}, UCOL_LESS },
6487        { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
6488        { {0x0060}, {0x0391}, UCOL_LESS },
6489        { {0x0391}, {0xe2dc}, UCOL_GREATER },
6490    };
6491
6492    /* Test rules creation */
6493    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6494
6495    /* Test collation reordering API */
6496    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6497}
6498
6499static void TestNonScriptReorder(void)
6500{
6501    const char* strRules[] = {
6502        "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
6503    };
6504
6505    const int32_t apiRules[] = {
6506        USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
6507        UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
6508        UCOL_REORDER_CODE_CURRENCY
6509    };
6510
6511    const static OneTestCase privateUseCharacterStrings[] = {
6512        { {0x0391}, {0x0041}, UCOL_LESS },
6513        { {0x0041}, {0x0391}, UCOL_GREATER },
6514        { {0x0060}, {0x0041}, UCOL_LESS },
6515        { {0x0060}, {0x0391}, UCOL_GREATER },
6516        { {0x0024}, {0x0041}, UCOL_GREATER },
6517    };
6518
6519    /* Test rules creation */
6520    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6521
6522    /* Test collation reordering API */
6523    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6524}
6525
6526static void TestHaniReorder(void)
6527{
6528    const char* strRules[] = {
6529        "[reorder Hani]"
6530    };
6531    const int32_t apiRules[] = {
6532        USCRIPT_HAN
6533    };
6534
6535    const static OneTestCase privateUseCharacterStrings[] = {
6536        { {0x4e00}, {0x0041}, UCOL_LESS },
6537        { {0x4e00}, {0x0060}, UCOL_GREATER },
6538        { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
6539        { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
6540        { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
6541        { {0xfa27}, {0x0041}, UCOL_LESS },
6542        { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
6543    };
6544
6545    /* Test rules creation */
6546    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6547
6548    /* Test collation reordering API */
6549    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6550}
6551
6552static void TestHaniReorderWithOtherRules(void)
6553{
6554    const char* strRules[] = {
6555        "[reorder Hani] &b<a"
6556    };
6557    /*const int32_t apiRules[] = {
6558        USCRIPT_HAN
6559    };*/
6560
6561    const static OneTestCase privateUseCharacterStrings[] = {
6562        { {0x4e00}, {0x0041}, UCOL_LESS },
6563        { {0x4e00}, {0x0060}, UCOL_GREATER },
6564        { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
6565        { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
6566        { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
6567        { {0xfa27}, {0x0041}, UCOL_LESS },
6568        { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
6569        { {0x0062}, {0x0061}, UCOL_LESS },
6570    };
6571
6572    /* Test rules creation */
6573    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6574}
6575
6576static void TestMultipleReorder(void)
6577{
6578    const char* strRules[] = {
6579        "[reorder Grek Zzzz DIGIT Latn Hani]"
6580    };
6581
6582    const int32_t apiRules[] = {
6583        USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
6584    };
6585
6586    const static OneTestCase collationTestCases[] = {
6587        { {0x0391}, {0x0041}, UCOL_LESS},
6588        { {0x0031}, {0x0041}, UCOL_LESS},
6589        { {0x0041}, {0x4e00}, UCOL_LESS},
6590    };
6591
6592    /* Test rules creation */
6593    doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
6594
6595    /* Test collation reordering API */
6596    doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
6597}
6598
6599/*
6600 * Test that covers issue reported in ticket 8814
6601 */
6602static void TestReorderWithNumericCollation(void)
6603{
6604    UErrorCode status = U_ZERO_ERROR;
6605    UCollator  *myCollation;
6606    UCollator  *myReorderCollation;
6607    int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_LATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};
6608    /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };
6609    UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */
6610    UChar fortyS[] = { 0x0053 };
6611    UChar fortyThreeP[] = { 0x0050 };
6612    uint8_t fortyS_sortKey[128];
6613    int32_t fortyS_sortKey_Length;
6614    uint8_t fortyThreeP_sortKey[128];
6615    int32_t fortyThreeP_sortKey_Length;
6616    uint8_t fortyS_sortKey_reorder[128];
6617    int32_t fortyS_sortKey_reorder_Length;
6618    uint8_t fortyThreeP_sortKey_reorder[128];
6619    int32_t fortyThreeP_sortKey_reorder_Length;
6620    UCollationResult collResult;
6621    UCollationResult collResultReorder;
6622
6623    log_verbose("Testing reordering with and without numeric collation\n");
6624
6625    /* build collator tertiary with numeric */
6626    myCollation = ucol_open("", &status);
6627    /*
6628    ucol_setStrength(myCollation, UCOL_TERTIARY);
6629    */
6630    ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
6631    if(U_FAILURE(status)) {
6632        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6633        return;
6634    }
6635
6636    /* build collator tertiary with numeric and reordering */
6637    myReorderCollation = ucol_open("", &status);
6638    /*
6639    ucol_setStrength(myReorderCollation, UCOL_TERTIARY);
6640    */
6641    ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
6642    ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &status);
6643    if(U_FAILURE(status)) {
6644        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6645        return;
6646    }
6647
6648    fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fortyS_sortKey, 128);
6649    fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey, 128);
6650    fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128);
6651    fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128);
6652
6653    if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_sortKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) {
6654        log_err_status(status, "ERROR: couldn't generate sort keys\n");
6655        return;
6656    }
6657    collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
6658    collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fortyThreeP, LEN(fortyThreeP));
6659    /*
6660    fprintf(stderr, "\tcollResult = %x\n", collResult);
6661    fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);
6662    fprintf(stderr, "\nfortyS\n");
6663    for (i = 0; i < fortyS_sortKey_Length; i++) {
6664        fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder[i]);
6665    }
6666    fprintf(stderr, "\nfortyThreeP\n");
6667    for (i = 0; i < fortyThreeP_sortKey_Length; i++) {
6668        fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortKey_reorder[i]);
6669    }
6670    */
6671    if (collResult != collResultReorder) {
6672        log_err_status(status, "ERROR: collation results should have been the same.\n");
6673        return;
6674    }
6675
6676    ucol_close(myCollation);
6677    ucol_close(myReorderCollation);
6678}
6679
6680static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
6681{
6682  for (; *a == *b; ++a, ++b) {
6683    if (*a == 0) {
6684      return 0;
6685    }
6686  }
6687  return (*a < *b ? -1 : 1);
6688}
6689
6690static void TestImportRulesDeWithPhonebook(void)
6691{
6692  const char* normalRules[] = {
6693    "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",
6694    "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",
6695    "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",
6696  };
6697  const OneTestCase normalTests[] = {
6698    { {0x00e6}, {0x00c6}, UCOL_LESS},
6699    { {0x00fc}, {0x00dc}, UCOL_GREATER},
6700  };
6701
6702  const char* importRules[] = {
6703    "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",
6704    "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
6705    "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",
6706  };
6707  const OneTestCase importTests[] = {
6708    { {0x00e6}, {0x00c6}, UCOL_LESS},
6709    { {0x00fc}, {0x00dc}, UCOL_LESS},
6710  };
6711
6712  doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules));
6713  doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules));
6714}
6715
6716#if 0
6717static void TestImportRulesFiWithEor(void)
6718{
6719  /* DUCET. */
6720  const char* defaultRules[] = {
6721    "&a<b",                                    /* Dummy rule. */
6722  };
6723
6724  const OneTestCase defaultTests[] = {
6725    { {0x0110}, {0x00F0}, UCOL_LESS},
6726    { {0x00a3}, {0x00a5}, UCOL_LESS},
6727    { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
6728  };
6729
6730  /* European Ordering rules: ignore currency characters. */
6731  const char* eorRules[] = {
6732    "[import root-u-co-eor]",
6733  };
6734
6735  const OneTestCase eorTests[] = {
6736    { {0x0110}, {0x00F0}, UCOL_LESS},
6737    { {0x00a3}, {0x00a5}, UCOL_EQUAL},
6738    { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
6739  };
6740
6741  const char* fiStdRules[] = {
6742    "[import fi-u-co-standard]",
6743  };
6744
6745  const OneTestCase fiStdTests[] = {
6746    { {0x0110}, {0x00F0}, UCOL_GREATER},
6747    { {0x00a3}, {0x00a5}, UCOL_LESS},
6748    { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},
6749  };
6750
6751  /* Both European Ordering Rules and Fi Standard Rules. */
6752  const char* eorFiStdRules[] = {
6753    "[import root-u-co-eor][import fi-u-co-standard]",
6754  };
6755
6756  /* This is essentially same as the one before once fi.txt is updated with import. */
6757  const char* fiEorRules[] = {
6758    "[import fi-u-co-eor]",
6759  };
6760
6761  const OneTestCase fiEorTests[] = {
6762    { {0x0110}, {0x00F0}, UCOL_GREATER},
6763    { {0x00a3}, {0x00a5}, UCOL_EQUAL},
6764    { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},
6765  };
6766
6767  doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
6768  doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules));
6769  doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules));
6770  doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRules));
6771
6772  log_knownIssue("8962", NULL);
6773  /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:
6774        eor{
6775            Sequence{
6776                "[import root-u-co-eor][import fi-u-co-standard]"
6777            }
6778            Version{"21.0"}
6779        }
6780  */
6781  /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)); */
6782
6783}
6784#endif
6785
6786#if 0
6787/*
6788 * This test case tests inclusion with the unihan rules, but this cannot be included now, unless
6789 * the resource files are built with -includeUnihanColl option.
6790 * TODO: Uncomment this function and make it work when unihan rules are built by default.
6791 */
6792static void TestImportRulesCJKWithUnihan(void)
6793{
6794  /* DUCET. */
6795  const char* defaultRules[] = {
6796    "&a<b",                                    /* Dummy rule. */
6797  };
6798
6799  const OneTestCase defaultTests[] = {
6800    { {0x3402}, {0x4e1e}, UCOL_GREATER},
6801  };
6802
6803  /* European Ordering rules: ignore currency characters. */
6804  const char* unihanRules[] = {
6805    "[import ko-u-co-unihan]",
6806  };
6807
6808  const OneTestCase unihanTests[] = {
6809    { {0x3402}, {0x4e1e}, UCOL_LESS},
6810  };
6811
6812  doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRules));
6813  doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules));
6814
6815}
6816#endif
6817
6818static void TestImport(void)
6819{
6820    UCollator* vicoll;
6821    UCollator* escoll;
6822    UCollator* viescoll;
6823    UCollator* importviescoll;
6824    UParseError error;
6825    UErrorCode status = U_ZERO_ERROR;
6826    UChar* virules;
6827    int32_t viruleslength;
6828    UChar* esrules;
6829    int32_t esruleslength;
6830    UChar* viesrules;
6831    int32_t viesruleslength;
6832    char srules[500] = "[import vi][import es]";
6833    UChar rules[500];
6834    uint32_t length = 0;
6835    int32_t itemCount;
6836    int32_t i, k;
6837    UChar32 start;
6838    UChar32 end;
6839    UChar str[500];
6840    int32_t strLength;
6841
6842    uint8_t sk1[500];
6843    uint8_t sk2[500];
6844
6845    UBool b;
6846    USet* tailoredSet;
6847    USet* importTailoredSet;
6848
6849
6850    vicoll = ucol_open("vi", &status);
6851    if(U_FAILURE(status)){
6852        log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
6853        return;
6854    }
6855
6856    virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
6857    escoll = ucol_open("es", &status);
6858    esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
6859    viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
6860    viesrules[0] = 0;
6861    u_strcat(viesrules, virules);
6862    u_strcat(viesrules, esrules);
6863    viesruleslength = viruleslength + esruleslength;
6864    viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6865
6866    /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6867    length = u_unescape(srules, rules, 500);
6868    importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6869    if(U_FAILURE(status)){
6870        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6871        return;
6872    }
6873
6874    tailoredSet = ucol_getTailoredSet(viescoll, &status);
6875    importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
6876
6877    if(!uset_equals(tailoredSet, importTailoredSet)){
6878        log_err("Tailored sets not equal");
6879    }
6880
6881    uset_close(importTailoredSet);
6882
6883    itemCount = uset_getItemCount(tailoredSet);
6884
6885    for( i = 0; i < itemCount; i++){
6886        strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6887        if(strLength < 2){
6888            for (; start <= end; start++){
6889                k = 0;
6890                U16_APPEND(str, k, 500, start, b);
6891                (void)b;    /* Suppress set but not used warning. */
6892                ucol_getSortKey(viescoll, str, 1, sk1, 500);
6893                ucol_getSortKey(importviescoll, str, 1, sk2, 500);
6894                if(compare_uint8_t_arrays(sk1, sk2) != 0){
6895                    log_err("Sort key for %s not equal\n", str);
6896                    break;
6897                }
6898            }
6899        }else{
6900            ucol_getSortKey(viescoll, str, strLength, sk1, 500);
6901            ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
6902            if(compare_uint8_t_arrays(sk1, sk2) != 0){
6903                log_err("ZZSort key for %s not equal\n", str);
6904                break;
6905            }
6906
6907        }
6908    }
6909
6910    uset_close(tailoredSet);
6911
6912    uprv_free(viesrules);
6913
6914    ucol_close(vicoll);
6915    ucol_close(escoll);
6916    ucol_close(viescoll);
6917    ucol_close(importviescoll);
6918}
6919
6920static void TestImportWithType(void)
6921{
6922    UCollator* vicoll;
6923    UCollator* decoll;
6924    UCollator* videcoll;
6925    UCollator* importvidecoll;
6926    UParseError error;
6927    UErrorCode status = U_ZERO_ERROR;
6928    const UChar* virules;
6929    int32_t viruleslength;
6930    const UChar* derules;
6931    int32_t deruleslength;
6932    UChar* viderules;
6933    int32_t videruleslength;
6934    const char srules[500] = "[import vi][import de-u-co-phonebk]";
6935    UChar rules[500];
6936    uint32_t length = 0;
6937    int32_t itemCount;
6938    int32_t i, k;
6939    UChar32 start;
6940    UChar32 end;
6941    UChar str[500];
6942    int32_t strLength;
6943
6944    uint8_t sk1[500];
6945    uint8_t sk2[500];
6946
6947    USet* tailoredSet;
6948    USet* importTailoredSet;
6949
6950    vicoll = ucol_open("vi", &status);
6951    if(U_FAILURE(status)){
6952        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6953        return;
6954    }
6955    virules = ucol_getRules(vicoll, &viruleslength);
6956    /* decoll = ucol_open("de@collation=phonebook", &status); */
6957    decoll = ucol_open("de-u-co-phonebk", &status);
6958    if(U_FAILURE(status)){
6959        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6960        return;
6961    }
6962
6963
6964    derules = ucol_getRules(decoll, &deruleslength);
6965    viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
6966    viderules[0] = 0;
6967    u_strcat(viderules, virules);
6968    u_strcat(viderules, derules);
6969    videruleslength = viruleslength + deruleslength;
6970    videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6971
6972    /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6973    length = u_unescape(srules, rules, 500);
6974    importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6975    if(U_FAILURE(status)){
6976        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6977        return;
6978    }
6979
6980    tailoredSet = ucol_getTailoredSet(videcoll, &status);
6981    importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
6982
6983    if(!uset_equals(tailoredSet, importTailoredSet)){
6984        log_err("Tailored sets not equal");
6985    }
6986
6987    uset_close(importTailoredSet);
6988
6989    itemCount = uset_getItemCount(tailoredSet);
6990
6991    for( i = 0; i < itemCount; i++){
6992        strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6993        if(strLength < 2){
6994            for (; start <= end; start++){
6995                k = 0;
6996                U16_APPEND_UNSAFE(str, k, start);
6997                ucol_getSortKey(videcoll, str, 1, sk1, 500);
6998                ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
6999                if(compare_uint8_t_arrays(sk1, sk2) != 0){
7000                    log_err("Sort key for %s not equal\n", str);
7001                    break;
7002                }
7003            }
7004        }else{
7005            ucol_getSortKey(videcoll, str, strLength, sk1, 500);
7006            ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
7007            if(compare_uint8_t_arrays(sk1, sk2) != 0){
7008                log_err("Sort key for %s not equal\n", str);
7009                break;
7010            }
7011
7012        }
7013    }
7014
7015    uset_close(tailoredSet);
7016
7017    uprv_free(viderules);
7018
7019    ucol_close(videcoll);
7020    ucol_close(importvidecoll);
7021    ucol_close(vicoll);
7022    ucol_close(decoll);
7023}
7024
7025/* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
7026static const UChar longUpperStr1[]= { /* 155 chars */
7027    0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
7028    0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
7029    0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
7030    0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
7031    0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
7032    0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
7033    0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
7034    0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
7035    0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
7036    0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
7037};
7038
7039/* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
7040static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
7041    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
7042    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
7043    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
7044    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
7045    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
7046};
7047
7048/* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
7049static const UChar longUpperStr3[]= { /* 324 chars */
7050    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7051    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7052    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7053    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7054    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7055    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7056    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7057    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7058    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7059    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7060    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
7061    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
7062};
7063
7064#define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
7065
7066typedef struct {
7067    const UChar * longUpperStrPtr;
7068    int32_t       longUpperStrLen;
7069} LongUpperStrItem;
7070
7071/* String pointers must be in reverse collation order of the corresponding strings */
7072static const LongUpperStrItem longUpperStrItems[] = {
7073    { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },
7074    { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },
7075    { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },
7076    { NULL,          0                           }
7077};
7078
7079enum { kCollKeyLenMax = 800 }; /* longest expected is 749, but may change with collation changes */
7080
7081/* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
7082static void TestCaseLevelBufferOverflow(void)
7083{
7084    UErrorCode status = U_ZERO_ERROR;
7085    UCollator * ucol = ucol_open("root", &status);
7086    if ( U_SUCCESS(status) ) {
7087        ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
7088        if ( U_SUCCESS(status) ) {
7089            const LongUpperStrItem * itemPtr;
7090            uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
7091            for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
7092                int32_t sortKeyLen;
7093                if (itemPtr > longUpperStrItems) {
7094                    uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
7095                }
7096                sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
7097                if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
7098                    log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
7099                    break;
7100                }
7101                if ( itemPtr > longUpperStrItems ) {
7102                    int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
7103                    if (compareResult >= 0) {
7104                        log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
7105                    }
7106                }
7107            }
7108        } else {
7109            log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
7110        }
7111        ucol_close(ucol);
7112    } else {
7113        log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
7114    }
7115}
7116
7117
7118#define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
7119
7120void addMiscCollTest(TestNode** root)
7121{
7122    TEST(TestRuleOptions);
7123    TEST(TestBeforePrefixFailure);
7124    TEST(TestContractionClosure);
7125    TEST(TestPrefixCompose);
7126    TEST(TestStrCollIdenticalPrefix);
7127    TEST(TestPrefix);
7128    TEST(TestNewJapanese);
7129    /*TEST(TestLimitations);*/
7130    TEST(TestNonChars);
7131    TEST(TestExtremeCompression);
7132    TEST(TestSurrogates);
7133    TEST(TestVariableTopSetting);
7134    TEST(TestBocsuCoverage);
7135    TEST(TestCyrillicTailoring);
7136    TEST(TestCase);
7137    TEST(IncompleteCntTest);
7138    TEST(BlackBirdTest);
7139    TEST(FunkyATest);
7140    TEST(BillFairmanTest);
7141    TEST(RamsRulesTest);
7142    TEST(IsTailoredTest);
7143    TEST(TestCollations);
7144    TEST(TestChMove);
7145    TEST(TestImplicitTailoring);
7146    TEST(TestFCDProblem);
7147    TEST(TestEmptyRule);
7148    /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
7149    TEST(TestJ815);
7150    /*TEST(TestJ831);*/ /* we changed lv locale */
7151    TEST(TestBefore);
7152    TEST(TestRedundantRules);
7153    TEST(TestExpansionSyntax);
7154    TEST(TestHangulTailoring);
7155    TEST(TestUCARules);
7156    TEST(TestIncrementalNormalize);
7157    TEST(TestComposeDecompose);
7158    TEST(TestCompressOverlap);
7159    TEST(TestContraction);
7160    TEST(TestExpansion);
7161    /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
7162    /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
7163    TEST(TestOptimize);
7164    TEST(TestSuppressContractions);
7165    TEST(Alexis2);
7166    TEST(TestHebrewUCA);
7167    TEST(TestPartialSortKeyTermination);
7168    TEST(TestSettings);
7169    TEST(TestEquals);
7170    TEST(TestJ2726);
7171    TEST(NullRule);
7172    TEST(TestNumericCollation);
7173    TEST(TestTibetanConformance);
7174    TEST(TestPinyinProblem);
7175    TEST(TestImplicitGeneration);
7176    TEST(TestSeparateTrees);
7177    TEST(TestBeforePinyin);
7178    TEST(TestBeforeTightening);
7179    /*TEST(TestMoreBefore);*/
7180    TEST(TestTailorNULL);
7181    TEST(TestUpperFirstQuaternary);
7182    TEST(TestJ4960);
7183    TEST(TestJ5223);
7184    TEST(TestJ5232);
7185    TEST(TestJ5367);
7186    TEST(TestHiragana);
7187    TEST(TestSortKeyConsistency);
7188    TEST(TestVI5913);  /* VI, RO tailored rules */
7189    TEST(TestCroatianSortKey);
7190    TEST(TestTailor6179);
7191    TEST(TestUCAPrecontext);
7192    TEST(TestOutOfBuffer5468);
7193    TEST(TestSameStrengthList);
7194
7195    TEST(TestSameStrengthListQuoted);
7196    TEST(TestSameStrengthListSupplemental);
7197    TEST(TestSameStrengthListQwerty);
7198    TEST(TestSameStrengthListQuotedQwerty);
7199    TEST(TestSameStrengthListRanges);
7200    TEST(TestSameStrengthListSupplementalRanges);
7201    TEST(TestSpecialCharacters);
7202    TEST(TestPrivateUseCharacters);
7203    TEST(TestPrivateUseCharactersInList);
7204    TEST(TestPrivateUseCharactersInRange);
7205    TEST(TestInvalidListsAndRanges);
7206    TEST(TestImportRulesDeWithPhonebook);
7207    /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */
7208    /* TEST(TestImportRulesCJKWithUnihan); */
7209    TEST(TestImport);
7210    TEST(TestImportWithType);
7211
7212    TEST(TestBeforeRuleWithScriptReordering);
7213    TEST(TestNonLeadBytesDuringCollationReordering);
7214    TEST(TestReorderingAPI);
7215    TEST(TestReorderingAPIWithRuleCreatedCollator);
7216    TEST(TestEquivalentReorderingScripts);
7217    TEST(TestGreekFirstReorder);
7218    TEST(TestGreekLastReorder);
7219    TEST(TestNonScriptReorder);
7220    TEST(TestHaniReorder);
7221    TEST(TestHaniReorderWithOtherRules);
7222    TEST(TestMultipleReorder);
7223    TEST(TestReorderingAcrossCloning);
7224    TEST(TestReorderWithNumericCollation);
7225
7226    TEST(TestCaseLevelBufferOverflow);
7227}
7228
7229#endif /* #if !UCONFIG_NO_COLLATION */
7230