1
2/********************************************************************
3 * COPYRIGHT:
4 * Copyright (c) 2001-2013, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************/
7/*******************************************************************************
8*
9* File cmsccoll.C
10*
11*******************************************************************************/
12/**
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where
14 * to fit.
15 */
16
17#include <stdio.h>
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_COLLATION
22
23#include "unicode/ucol.h"
24#include "unicode/ucoleitr.h"
25#include "unicode/uloc.h"
26#include "cintltst.h"
27#include "ccolltst.h"
28#include "callcoll.h"
29#include "unicode/ustring.h"
30#include "string.h"
31#include "ucol_imp.h"
32#include "ucol_tok.h"
33#include "cmemory.h"
34#include "cstring.h"
35#include "uassert.h"
36#include "unicode/parseerr.h"
37#include "unicode/ucnv.h"
38#include "unicode/ures.h"
39#include "unicode/uscript.h"
40#include "unicode/utf16.h"
41#include "uparse.h"
42#include "putilimp.h"
43
44
45#define LEN(a) (sizeof(a)/sizeof(a[0]))
46
47#define MAX_TOKEN_LEN 16
48
49typedef UCollationResult tst_strcoll(void *collator, const int object,
50                        const UChar *source, const int sLen,
51                        const UChar *target, const int tLen);
52
53
54
55const static char cnt1[][10] = {
56
57  "AA",
58  "AC",
59  "AZ",
60  "AQ",
61  "AB",
62  "ABZ",
63  "ABQ",
64  "Z",
65  "ABC",
66  "Q",
67  "B"
68};
69
70const static char cnt2[][10] = {
71  "DA",
72  "DAD",
73  "DAZ",
74  "MAR",
75  "Z",
76  "DAVIS",
77  "MARK",
78  "DAV",
79  "DAVI"
80};
81
82static void IncompleteCntTest(void)
83{
84  UErrorCode status = U_ZERO_ERROR;
85  UChar temp[90];
86  UChar t1[90];
87  UChar t2[90];
88
89  UCollator *coll =  NULL;
90  uint32_t i = 0, j = 0;
91  uint32_t size = 0;
92
93  u_uastrcpy(temp, " & Z < ABC < Q < B");
94
95  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
96
97  if(U_SUCCESS(status)) {
98    size = sizeof(cnt1)/sizeof(cnt1[0]);
99    for(i = 0; i < size-1; i++) {
100      for(j = i+1; j < size; j++) {
101        UCollationElements *iter;
102        u_uastrcpy(t1, cnt1[i]);
103        u_uastrcpy(t2, cnt1[j]);
104        doTest(coll, t1, t2, UCOL_LESS);
105        /* synwee : added collation element iterator test */
106        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
107        if (U_FAILURE(status)) {
108          log_err("Creation of iterator failed\n");
109          break;
110        }
111        backAndForth(iter);
112        ucol_closeElements(iter);
113      }
114    }
115  }
116
117  ucol_close(coll);
118
119
120  u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
121  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
122
123  if(U_SUCCESS(status)) {
124    size = sizeof(cnt2)/sizeof(cnt2[0]);
125    for(i = 0; i < size-1; i++) {
126      for(j = i+1; j < size; j++) {
127        UCollationElements *iter;
128        u_uastrcpy(t1, cnt2[i]);
129        u_uastrcpy(t2, cnt2[j]);
130        doTest(coll, t1, t2, UCOL_LESS);
131
132        /* synwee : added collation element iterator test */
133        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
134        if (U_FAILURE(status)) {
135          log_err("Creation of iterator failed\n");
136          break;
137        }
138        backAndForth(iter);
139        ucol_closeElements(iter);
140      }
141    }
142  }
143
144  ucol_close(coll);
145
146
147}
148
149const static char shifted[][20] = {
150  "black bird",
151  "black-bird",
152  "blackbird",
153  "black Bird",
154  "black-Bird",
155  "blackBird",
156  "black birds",
157  "black-birds",
158  "blackbirds"
159};
160
161const static UCollationResult shiftedTert[] = {
162  UCOL_EQUAL,
163  UCOL_EQUAL,
164  UCOL_EQUAL,
165  UCOL_LESS,
166  UCOL_EQUAL,
167  UCOL_EQUAL,
168  UCOL_LESS,
169  UCOL_EQUAL,
170  UCOL_EQUAL
171};
172
173const static char nonignorable[][20] = {
174  "black bird",
175  "black Bird",
176  "black birds",
177  "black-bird",
178  "black-Bird",
179  "black-birds",
180  "blackbird",
181  "blackBird",
182  "blackbirds"
183};
184
185static void BlackBirdTest(void) {
186  UErrorCode status = U_ZERO_ERROR;
187  UChar t1[90];
188  UChar t2[90];
189
190  uint32_t i = 0, j = 0;
191  uint32_t size = 0;
192  UCollator *coll = ucol_open("en_US", &status);
193
194  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
195  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
196
197  if(U_SUCCESS(status)) {
198    size = sizeof(nonignorable)/sizeof(nonignorable[0]);
199    for(i = 0; i < size-1; i++) {
200      for(j = i+1; j < size; j++) {
201        u_uastrcpy(t1, nonignorable[i]);
202        u_uastrcpy(t2, nonignorable[j]);
203        doTest(coll, t1, t2, UCOL_LESS);
204      }
205    }
206  }
207
208  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
209  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
210
211  if(U_SUCCESS(status)) {
212    size = sizeof(shifted)/sizeof(shifted[0]);
213    for(i = 0; i < size-1; i++) {
214      for(j = i+1; j < size; j++) {
215        u_uastrcpy(t1, shifted[i]);
216        u_uastrcpy(t2, shifted[j]);
217        doTest(coll, t1, t2, UCOL_LESS);
218      }
219    }
220  }
221
222  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
223  if(U_SUCCESS(status)) {
224    size = sizeof(shifted)/sizeof(shifted[0]);
225    for(i = 1; i < size; i++) {
226      u_uastrcpy(t1, shifted[i-1]);
227      u_uastrcpy(t2, shifted[i]);
228      doTest(coll, t1, t2, shiftedTert[i]);
229    }
230  }
231
232  ucol_close(coll);
233}
234
235const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
236    {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
237    {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
238    {0x0041/*'A'*/, 0x0300, 0x0000},
239    {0x00C0, 0x0301, 0x0000},
240    /* this would work with forced normalization */
241    {0x00C0, 0x0316, 0x0000}
242};
243
244const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
245    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
246    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
247    {0x00C0, 0},
248    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
249    /* this would work with forced normalization */
250    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
251};
252
253const static UCollationResult results[] = {
254    UCOL_GREATER,
255    UCOL_EQUAL,
256    UCOL_EQUAL,
257    UCOL_GREATER,
258    UCOL_EQUAL
259};
260
261static void FunkyATest(void)
262{
263
264    int32_t i;
265    UErrorCode status = U_ZERO_ERROR;
266    UCollator  *myCollation;
267    myCollation = ucol_open("en_US", &status);
268    if(U_FAILURE(status)){
269        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
270        return;
271    }
272    log_verbose("Testing some A letters, for some reason\n");
273    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
274    ucol_setStrength(myCollation, UCOL_TERTIARY);
275    for (i = 0; i < 4 ; i++)
276    {
277        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
278    }
279    ucol_close(myCollation);
280}
281
282UColAttributeValue caseFirst[] = {
283    UCOL_OFF,
284    UCOL_LOWER_FIRST,
285    UCOL_UPPER_FIRST
286};
287
288
289UColAttributeValue alternateHandling[] = {
290    UCOL_NON_IGNORABLE,
291    UCOL_SHIFTED
292};
293
294UColAttributeValue caseLevel[] = {
295    UCOL_OFF,
296    UCOL_ON
297};
298
299UColAttributeValue strengths[] = {
300    UCOL_PRIMARY,
301    UCOL_SECONDARY,
302    UCOL_TERTIARY,
303    UCOL_QUATERNARY,
304    UCOL_IDENTICAL
305};
306
307#if 0
308static const char * strengthsC[] = {
309    "UCOL_PRIMARY",
310    "UCOL_SECONDARY",
311    "UCOL_TERTIARY",
312    "UCOL_QUATERNARY",
313    "UCOL_IDENTICAL"
314};
315
316static const char * caseFirstC[] = {
317    "UCOL_OFF",
318    "UCOL_LOWER_FIRST",
319    "UCOL_UPPER_FIRST"
320};
321
322
323static const char * alternateHandlingC[] = {
324    "UCOL_NON_IGNORABLE",
325    "UCOL_SHIFTED"
326};
327
328static const char * caseLevelC[] = {
329    "UCOL_OFF",
330    "UCOL_ON"
331};
332
333/* not used currently - does not test only prints */
334static void PrintMarkDavis(void)
335{
336  UErrorCode status = U_ZERO_ERROR;
337  UChar m[256];
338  uint8_t sortkey[256];
339  UCollator *coll = ucol_open("en_US", &status);
340  uint32_t h,i,j,k, sortkeysize;
341  uint32_t sizem = 0;
342  char buffer[512];
343  uint32_t len = 512;
344
345  log_verbose("PrintMarkDavis");
346
347  u_uastrcpy(m, "Mark Davis");
348  sizem = u_strlen(m);
349
350
351  m[1] = 0xe4;
352
353  for(i = 0; i<sizem; i++) {
354    fprintf(stderr, "\\u%04X ", m[i]);
355  }
356  fprintf(stderr, "\n");
357
358  for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
359    ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
360    fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
361
362    for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
363      ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
364      fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
365
366      for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
367        ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
368        fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
369
370        for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
371          ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
372          sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
373          fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
374          fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
375        }
376
377      }
378
379    }
380
381  }
382}
383#endif
384
385static void BillFairmanTest(void) {
386/*
387** check for actual locale via ICU resource bundles
388**
389** lp points to the original locale ("fr_FR_....")
390*/
391
392    UResourceBundle *lr,*cr;
393    UErrorCode              lec = U_ZERO_ERROR;
394    const char *lp = "fr_FR_you_ll_never_find_this_locale";
395
396    log_verbose("BillFairmanTest\n");
397
398    lr = ures_open(NULL,lp,&lec);
399    if (lr) {
400        cr = ures_getByKey(lr,"collations",0,&lec);
401        if (cr) {
402            lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
403            if (lp) {
404                if (U_SUCCESS(lec)) {
405                    if(strcmp(lp, "fr") != 0) {
406                        log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
407                    }
408                }
409            }
410            ures_close(cr);
411        }
412        ures_close(lr);
413    }
414}
415
416static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
417    UChar source[256] = { '\0'};
418    UChar target[256] = { '\0'};
419    UChar preP = 0x31a3;
420    UChar preQ = 0x310d;
421/*
422    UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
423    UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
424*/
425    /*log_verbose("Testing primary\n");*/
426
427    doTest(col, p, q, UCOL_LESS);
428/*
429    UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
430
431    if(result!=UCOL_LESS){
432       aescstrdup(p,utfSource,256);
433       aescstrdup(q,utfTarget,256);
434       fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
435    }
436*/
437    source[0] = preP;
438    u_strcpy(source+1,p);
439    target[0] = preQ;
440    u_strcpy(target+1,q);
441    doTest(col, source, target, UCOL_LESS);
442/*
443    fprintf(file,"Primary swamps 2nd failed  source: %s target: %s \n", utfSource,utfTarget);
444*/
445}
446
447static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
448    UChar source[256] = { '\0'};
449    UChar target[256] = { '\0'};
450
451    /*log_verbose("Testing secondary\n");*/
452
453    doTest(col, p, q, UCOL_LESS);
454/*
455    fprintf(file,"secondary failed  source: %s target: %s \n", utfSource,utfTarget);
456*/
457    source[0] = 0x0053;
458    u_strcpy(source+1,p);
459    target[0]= 0x0073;
460    u_strcpy(target+1,q);
461
462    doTest(col, source, target, UCOL_LESS);
463/*
464    fprintf(file,"secondary swamps 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
465*/
466
467
468    u_strcpy(source,p);
469    source[u_strlen(p)] = 0x62;
470    source[u_strlen(p)+1] = 0;
471
472
473    u_strcpy(target,q);
474    target[u_strlen(q)] = 0x61;
475    target[u_strlen(q)+1] = 0;
476
477    doTest(col, source, target, UCOL_GREATER);
478
479/*
480    fprintf(file,"secondary is swamped by 1  failed  source: %s target: %s \n",utfSource,utfTarget);
481*/
482}
483
484static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
485    UChar source[256] = { '\0'};
486    UChar target[256] = { '\0'};
487
488    /*log_verbose("Testing tertiary\n");*/
489
490    doTest(col, p, q, UCOL_LESS);
491/*
492    fprintf(file,"Tertiary failed  source: %s target: %s \n",utfSource,utfTarget);
493*/
494    source[0] = 0x0020;
495    u_strcpy(source+1,p);
496    target[0]= 0x002D;
497    u_strcpy(target+1,q);
498
499    doTest(col, source, target, UCOL_LESS);
500/*
501    fprintf(file,"Tertiary swamps 4th failed  source: %s target: %s \n", utfSource,utfTarget);
502*/
503
504    u_strcpy(source,p);
505    source[u_strlen(p)] = 0xE0;
506    source[u_strlen(p)+1] = 0;
507
508    u_strcpy(target,q);
509    target[u_strlen(q)] = 0x61;
510    target[u_strlen(q)+1] = 0;
511
512    doTest(col, source, target, UCOL_GREATER);
513
514/*
515    fprintf(file,"Tertiary is swamped by 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
516*/
517}
518
519static void testEquality(UCollator* col, const UChar* p,const UChar* q){
520/*
521    UChar source[256] = { '\0'};
522    UChar target[256] = { '\0'};
523*/
524
525    doTest(col, p, q, UCOL_EQUAL);
526/*
527    fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
528*/
529}
530
531static void testCollator(UCollator *coll, UErrorCode *status) {
532  const UChar *rules = NULL, *current = NULL;
533  int32_t ruleLen = 0;
534  uint32_t strength = 0;
535  uint32_t chOffset = 0; uint32_t chLen = 0;
536  uint32_t exOffset = 0; uint32_t exLen = 0;
537  uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
538  uint32_t firstEx = 0;
539/*  uint32_t rExpsLen = 0; */
540  uint32_t firstLen = 0;
541  UBool varT = FALSE; UBool top_ = TRUE;
542  uint16_t specs = 0;
543  UBool startOfRules = TRUE;
544  UBool lastReset = FALSE;
545  UBool before = FALSE;
546  uint32_t beforeStrength = 0;
547  UColTokenParser src;
548  UColOptionSet opts;
549
550  UChar first[256];
551  UChar second[256];
552  UChar tempB[256];
553  uint32_t tempLen;
554  UChar *rulesCopy = NULL;
555  UParseError parseError;
556
557  uprv_memset(&src, 0, sizeof(UColTokenParser));
558
559  src.opts = &opts;
560
561  rules = ucol_getRules(coll, &ruleLen);
562  if(U_SUCCESS(*status) && ruleLen > 0) {
563    rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
564    uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
565    src.current = src.source = rulesCopy;
566    src.end = rulesCopy+ruleLen;
567    src.extraCurrent = src.end;
568    src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
569    *first = *second = 0;
570
571	/* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
572	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
573    while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
574      strength = src.parsedToken.strength;
575      chOffset = src.parsedToken.charsOffset;
576      chLen = src.parsedToken.charsLen;
577      exOffset = src.parsedToken.extensionOffset;
578      exLen = src.parsedToken.extensionLen;
579      prefixOffset = src.parsedToken.prefixOffset;
580      prefixLen = src.parsedToken.prefixLen;
581      specs = src.parsedToken.flags;
582
583      startOfRules = FALSE;
584      varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
585      (void)varT;    /* Suppress set but not used warning. */
586      top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
587      if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
588        second[0] = 0;
589      } else {
590        u_strncpy(second,src.source+chOffset, chLen);
591        second[chLen] = 0;
592
593        if(exLen > 0 && firstEx == 0) {
594          u_strncat(first, src.source+exOffset, exLen);
595          first[firstLen+exLen] = 0;
596        }
597
598        if(lastReset == TRUE && prefixLen != 0) {
599          u_strncpy(first+prefixLen, first, firstLen);
600          u_strncpy(first, src.source+prefixOffset, prefixLen);
601          first[firstLen+prefixLen] = 0;
602          firstLen = firstLen+prefixLen;
603        }
604
605        if(before == TRUE) { /* swap first and second */
606          u_strcpy(tempB, first);
607          u_strcpy(first, second);
608          u_strcpy(second, tempB);
609
610          tempLen = firstLen;
611          firstLen = chLen;
612          chLen = tempLen;
613
614          tempLen = firstEx;
615          firstEx = exLen;
616          exLen = tempLen;
617          if(beforeStrength < strength) {
618            strength = beforeStrength;
619          }
620        }
621      }
622      lastReset = FALSE;
623
624      switch(strength){
625      case UCOL_IDENTICAL:
626          testEquality(coll,first,second);
627          break;
628      case UCOL_PRIMARY:
629          testPrimary(coll,first,second);
630          break;
631      case UCOL_SECONDARY:
632          testSecondary(coll,first,second);
633          break;
634      case UCOL_TERTIARY:
635          testTertiary(coll,first,second);
636          break;
637      case UCOL_TOK_RESET:
638        lastReset = TRUE;
639        before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
640        if(before) {
641          beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
642        }
643        break;
644      default:
645          break;
646      }
647
648      if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
649        before = FALSE;
650      } else {
651        firstLen = chLen;
652        firstEx = exLen;
653        u_strcpy(first, second);
654      }
655    }
656    uprv_free(src.source);
657    uprv_free(src.reorderCodes);
658  }
659}
660
661static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
662  UCollator *UCA = (UCollator *)collator;
663  return ucol_strcoll(UCA, source, sLen, target, tLen);
664}
665
666/*
667static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
668#if U_PLATFORM_HAS_WIN32_API
669  LCID lcid = (LCID)collator;
670  return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
671#else
672  return 0;
673#endif
674}
675*/
676
677static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
678                                     UChar s1, UChar s2,
679                                     const UChar *s, const uint32_t sLen,
680                                     const UChar *t, const uint32_t tLen) {
681  UChar source[256] = {0};
682  UChar target[256] = {0};
683
684  source[0] = s1;
685  u_strcpy(source+1, s);
686  target[0] = s2;
687  u_strcpy(target+1, t);
688
689  return func(collator, opts, source, sLen+1, target, tLen+1);
690}
691
692static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
693                                   UChar s1, UChar s2,
694                                   const UChar *s, const uint32_t sLen,
695                                   const UChar *t, const uint32_t tLen) {
696  UChar source[256] = {0};
697  UChar target[256] = {0};
698
699  u_strcpy(source, s);
700  source[sLen] = s1;
701  u_strcpy(target, t);
702  target[tLen] = s2;
703
704  return func(collator, opts, source, sLen+1, target, tLen+1);
705}
706
707static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
708                              const UChar *s, const uint32_t sLen,
709                              const UChar *t, const uint32_t tLen,
710                              UCollationResult result) {
711  /*UChar fPrimary = 0x6d;*/
712  /*UChar sPrimary = 0x6e;*/
713  UChar fSecondary = 0x310d;
714  UChar sSecondary = 0x31a3;
715  UChar fTertiary = 0x310f;
716  UChar sTertiary = 0x31b7;
717
718  UCollationResult oposite;
719  if(result == UCOL_EQUAL) {
720    return UCOL_IDENTICAL;
721  } else if(result == UCOL_GREATER) {
722    oposite = UCOL_LESS;
723  } else {
724    oposite = UCOL_GREATER;
725  }
726
727  if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
728    return UCOL_PRIMARY;
729  } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
730    (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
731    return UCOL_SECONDARY;
732  } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
733    (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
734    return UCOL_TERTIARY;
735  } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
736    (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
737    return UCOL_QUATERNARY;
738  } else {
739    return UCOL_IDENTICAL;
740  }
741}
742
743static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
744  uint32_t i = 0;
745
746  if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
747    buffer[0] = '=';
748    buffer[1] = '=';
749    buffer[2] = '\0';
750  } else if(res == UCOL_GREATER) {
751    for(i = 0; i<strength+1; i++) {
752      buffer[i] = '>';
753    }
754    buffer[strength+1] = '\0';
755  } else {
756    for(i = 0; i<strength+1; i++) {
757      buffer[i] = '<';
758    }
759    buffer[strength+1] = '\0';
760  }
761
762  return buffer;
763}
764
765
766
767static void logFailure (const char *platform, const char *test,
768                        const UChar *source, const uint32_t sLen,
769                        const UChar *target, const uint32_t tLen,
770                        UCollationResult realRes, uint32_t realStrength,
771                        UCollationResult expRes, uint32_t expStrength, UBool error) {
772
773  uint32_t i = 0;
774
775  char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
776  static int32_t maxOutputLength = 0;
777  int32_t outputLength;
778
779  *sEsc = *tEsc = *s = *t = 0;
780  if(error == TRUE) {
781    log_err("Difference between expected and generated order. Run test with -v for more info\n");
782  } else if(getTestOption(VERBOSITY_OPTION) == 0) {
783    return;
784  }
785  for(i = 0; i<sLen; i++) {
786    sprintf(b, "%04X", source[i]);
787    strcat(sEsc, "\\u");
788    strcat(sEsc, b);
789    strcat(s, b);
790    strcat(s, " ");
791    if(source[i] < 0x80) {
792      sprintf(b, "(%c)", source[i]);
793      strcat(sEsc, b);
794    }
795  }
796  for(i = 0; i<tLen; i++) {
797    sprintf(b, "%04X", target[i]);
798    strcat(tEsc, "\\u");
799    strcat(tEsc, b);
800    strcat(t, b);
801    strcat(t, " ");
802    if(target[i] < 0x80) {
803      sprintf(b, "(%c)", target[i]);
804      strcat(tEsc, b);
805    }
806  }
807/*
808  strcpy(output, "[[ ");
809  strcat(output, sEsc);
810  strcat(output, getRelationSymbol(expRes, expStrength, relation));
811  strcat(output, tEsc);
812
813  strcat(output, " : ");
814
815  strcat(output, sEsc);
816  strcat(output, getRelationSymbol(realRes, realStrength, relation));
817  strcat(output, tEsc);
818  strcat(output, " ]] ");
819
820  log_verbose("%s", output);
821*/
822
823
824  strcpy(output, "DIFF: ");
825
826  strcat(output, s);
827  strcat(output, " : ");
828  strcat(output, t);
829
830  strcat(output, test);
831  strcat(output, ": ");
832
833  strcat(output, sEsc);
834  strcat(output, getRelationSymbol(expRes, expStrength, relation));
835  strcat(output, tEsc);
836
837  strcat(output, " ");
838
839  strcat(output, platform);
840  strcat(output, ": ");
841
842  strcat(output, sEsc);
843  strcat(output, getRelationSymbol(realRes, realStrength, relation));
844  strcat(output, tEsc);
845
846  outputLength = (int32_t)strlen(output);
847  if(outputLength > maxOutputLength) {
848    maxOutputLength = outputLength;
849    U_ASSERT(outputLength < sizeof(output));
850  }
851
852  log_verbose("%s\n", output);
853
854}
855
856/*
857static void printOutRules(const UChar *rules) {
858  uint32_t len = u_strlen(rules);
859  uint32_t i = 0;
860  char toPrint;
861  uint32_t line = 0;
862
863  fprintf(stdout, "Rules:");
864
865  for(i = 0; i<len; i++) {
866    if(rules[i]<0x7f && rules[i]>=0x20) {
867      toPrint = (char)rules[i];
868      if(toPrint == '&') {
869        line = 1;
870        fprintf(stdout, "\n&");
871      } else if(toPrint == ';') {
872        fprintf(stdout, "<<");
873        line+=2;
874      } else if(toPrint == ',') {
875        fprintf(stdout, "<<<");
876        line+=3;
877      } else {
878        fprintf(stdout, "%c", toPrint);
879        line++;
880      }
881    } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
882      fprintf(stdout, "\\u%04X", rules[i]);
883      line+=6;
884    }
885    if(line>72) {
886      fprintf(stdout, "\n");
887      line = 0;
888    }
889  }
890
891  log_verbose("\n");
892
893}
894*/
895
896static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
897  uint32_t diffs = 0;
898  UCollationResult realResult;
899  uint32_t realStrength;
900
901  uint32_t sLen = u_strlen(first);
902  uint32_t tLen = u_strlen(second);
903
904  realResult = func(collator, opts, first, sLen, second, tLen);
905  realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
906
907  if(strength == UCOL_IDENTICAL && realResult != UCOL_EQUAL) {
908    logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
909    diffs++;
910  } else if(realResult != UCOL_LESS || realStrength != strength) {
911    logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
912    diffs++;
913  }
914  return diffs;
915}
916
917
918static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
919  const UChar *rules = NULL, *current = NULL;
920  int32_t ruleLen = 0;
921  uint32_t strength = 0;
922  uint32_t chOffset = 0; uint32_t chLen = 0;
923  uint32_t exOffset = 0; uint32_t exLen = 0;
924  uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
925/*  uint32_t rExpsLen = 0; */
926  uint32_t firstLen = 0, secondLen = 0;
927  UBool varT = FALSE; UBool top_ = TRUE;
928  uint16_t specs = 0;
929  UBool startOfRules = TRUE;
930  UColTokenParser src;
931  UColOptionSet opts;
932
933  UChar first[256];
934  UChar second[256];
935  UChar *rulesCopy = NULL;
936
937  uint32_t UCAdiff = 0;
938  uint32_t Windiff = 1;
939  UParseError parseError;
940
941  (void)top_;      /* Suppress set but not used warnings. */
942  (void)varT;
943  (void)secondLen;
944  (void)prefixLen;
945  (void)prefixOffset;
946
947  uprv_memset(&src, 0, sizeof(UColTokenParser));
948  src.opts = &opts;
949
950  rules = ucol_getRules(coll, &ruleLen);
951
952  /*printOutRules(rules);*/
953
954  if(U_SUCCESS(*status) && ruleLen > 0) {
955    rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
956    uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
957    src.current = src.source = rulesCopy;
958    src.end = rulesCopy+ruleLen;
959    src.extraCurrent = src.end;
960    src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
961    *first = *second = 0;
962
963    /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
964       the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
965    while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
966      strength = src.parsedToken.strength;
967      chOffset = src.parsedToken.charsOffset;
968      chLen = src.parsedToken.charsLen;
969      exOffset = src.parsedToken.extensionOffset;
970      exLen = src.parsedToken.extensionLen;
971      prefixOffset = src.parsedToken.prefixOffset;
972      prefixLen = src.parsedToken.prefixLen;
973      specs = src.parsedToken.flags;
974
975      startOfRules = FALSE;
976      varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
977      top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
978
979      u_strncpy(second,src.source+chOffset, chLen);
980      second[chLen] = 0;
981      secondLen = chLen;
982
983      if(exLen > 0) {
984        u_strncat(first, src.source+exOffset, exLen);
985        first[firstLen+exLen] = 0;
986        firstLen += exLen;
987      }
988
989      if(strength != UCOL_TOK_RESET) {
990        if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
991          UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
992          /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
993        }
994      }
995
996
997      firstLen = chLen;
998      u_strcpy(first, second);
999
1000    }
1001    if(UCAdiff != 0 && Windiff != 0) {
1002      log_verbose("\n");
1003    }
1004    if(UCAdiff == 0) {
1005      log_verbose("No immediate difference with %s!\n", refName);
1006    }
1007    if(Windiff == 0) {
1008      log_verbose("No immediate difference with Win32!\n");
1009    }
1010    uprv_free(src.source);
1011    uprv_free(src.reorderCodes);
1012  }
1013}
1014
1015/*
1016 * Takes two CEs (lead and continuation) and
1017 * compares them as CEs should be compared:
1018 * primary vs. primary, secondary vs. secondary
1019 * tertiary vs. tertiary
1020 */
1021static int32_t compareCEs(uint32_t s1, uint32_t s2,
1022                   uint32_t t1, uint32_t t2) {
1023  uint32_t s = 0, t = 0;
1024  if(s1 == t1 && s2 == t2) {
1025    return 0;
1026  }
1027  s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1028  t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1029  if(s < t) {
1030    return -1;
1031  } else if(s > t) {
1032    return 1;
1033  } else {
1034    s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1035    t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1036    if(s < t) {
1037      return -1;
1038    } else if(s > t) {
1039      return 1;
1040    } else {
1041      s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1042      t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1043      if(s < t) {
1044        return -1;
1045      } else {
1046        return 1;
1047      }
1048    }
1049  }
1050}
1051
1052typedef struct {
1053  uint32_t startCE;
1054  uint32_t startContCE;
1055  uint32_t limitCE;
1056  uint32_t limitContCE;
1057} indirectBoundaries;
1058
1059/* these values are used for finding CE values for indirect positioning. */
1060/* Indirect positioning is a mechanism for allowing resets on symbolic   */
1061/* values. It only works for resets and you cannot tailor indirect names */
1062/* An indirect name can define either an anchor point or a range. An     */
1063/* anchor point behaves in exactly the same way as a code point in reset */
1064/* would, except that it cannot be tailored. A range (we currently only  */
1065/* know for the [top] range will explicitly set the upper bound for      */
1066/* generated CEs, thus allowing for better control over how many CEs can */
1067/* be squeezed between in the range without performance penalty.         */
1068/* In that respect, we use [top] for tailoring of locales that use CJK   */
1069/* characters. Other indirect values are currently a pure convenience,   */
1070/* they can be used to assure that the CEs will be always positioned in  */
1071/* the same place relative to a point with known properties (e.g. first  */
1072/* primary ignorable). */
1073static indirectBoundaries ucolIndirectBoundaries[15];
1074static UBool indirectBoundariesSet = FALSE;
1075static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
1076    /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1077    /* to initalize here. */
1078    ucolIndirectBoundaries[indexR].startCE = start[0];
1079    ucolIndirectBoundaries[indexR].startContCE = start[1];
1080    if(end) {
1081        ucolIndirectBoundaries[indexR].limitCE = end[0];
1082        ucolIndirectBoundaries[indexR].limitContCE = end[1];
1083    } else {
1084        ucolIndirectBoundaries[indexR].limitCE = 0;
1085        ucolIndirectBoundaries[indexR].limitContCE = 0;
1086    }
1087}
1088
1089static void testCEs(UCollator *coll, UErrorCode *status) {
1090    const UChar *rules = NULL, *current = NULL;
1091    int32_t ruleLen = 0;
1092
1093    uint32_t strength = 0;
1094    uint32_t maxStrength = UCOL_IDENTICAL;
1095    uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1096    uint32_t lastCE;
1097    uint32_t lastContCE;
1098
1099    int32_t result = 0;
1100    uint32_t chOffset = 0; uint32_t chLen = 0;
1101    uint32_t exOffset = 0; uint32_t exLen = 0;
1102    uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1103    uint32_t oldOffset = 0;
1104
1105    /* uint32_t rExpsLen = 0; */
1106    /* uint32_t firstLen = 0; */
1107    uint16_t specs = 0;
1108    UBool varT = FALSE; UBool top_ = TRUE;
1109    UBool startOfRules = TRUE;
1110    UBool before = FALSE;
1111    UColTokenParser src;
1112    UColOptionSet opts;
1113    UParseError parseError;
1114    UChar *rulesCopy = NULL;
1115    collIterate *c = uprv_new_collIterate(status);
1116    UCAConstants *consts = NULL;
1117    uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
1118        UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
1119    const char *colLoc;
1120    UCollator *UCA = ucol_open("root", status);
1121
1122    (void)varT;             /* Suppress set but not used warnings. */
1123    (void)prefixLen;
1124    (void)prefixOffset;
1125    (void)exLen;
1126    (void)exOffset;
1127
1128    if (U_FAILURE(*status)) {
1129        log_err("Could not open root collator %s\n", u_errorName(*status));
1130        uprv_delete_collIterate(c);
1131        return;
1132    }
1133
1134    colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
1135    if (U_FAILURE(*status)) {
1136        log_err("Could not get collator name: %s\n", u_errorName(*status));
1137        ucol_close(UCA);
1138        uprv_delete_collIterate(c);
1139        return;
1140    }
1141
1142    uprv_memset(&src, 0, sizeof(UColTokenParser));
1143
1144    consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1145    UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
1146    /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
1147    UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
1148    UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1149
1150    baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1151
1152    src.opts = &opts;
1153
1154    rules = ucol_getRules(coll, &ruleLen);
1155
1156    src.invUCA = ucol_initInverseUCA(status);
1157
1158    if(indirectBoundariesSet == FALSE) {
1159        /* UCOL_RESET_TOP_VALUE */
1160        setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1161        /* UCOL_FIRST_PRIMARY_IGNORABLE */
1162        setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1163        /* UCOL_LAST_PRIMARY_IGNORABLE */
1164        setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1165        /* UCOL_FIRST_SECONDARY_IGNORABLE */
1166        setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1167        /* UCOL_LAST_SECONDARY_IGNORABLE */
1168        setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1169        /* UCOL_FIRST_TERTIARY_IGNORABLE */
1170        setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1171        /* UCOL_LAST_TERTIARY_IGNORABLE */
1172        setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1173        /* UCOL_FIRST_VARIABLE */
1174        setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1175        /* UCOL_LAST_VARIABLE */
1176        setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1177        /* UCOL_FIRST_NON_VARIABLE */
1178        setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1179        /* UCOL_LAST_NON_VARIABLE */
1180        setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1181        /* UCOL_FIRST_IMPLICIT */
1182        setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1183        /* UCOL_LAST_IMPLICIT */
1184        setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
1185        /* UCOL_FIRST_TRAILING */
1186        setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1187        /* UCOL_LAST_TRAILING */
1188        setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1189        ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
1190        indirectBoundariesSet = TRUE;
1191    }
1192
1193
1194    if(U_SUCCESS(*status) && ruleLen > 0) {
1195        rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1196        uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1197        src.current = src.source = rulesCopy;
1198        src.end = rulesCopy+ruleLen;
1199        src.extraCurrent = src.end;
1200        src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1201
1202	    /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1203	       the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1204        while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1205            strength = src.parsedToken.strength;
1206            chOffset = src.parsedToken.charsOffset;
1207            chLen = src.parsedToken.charsLen;
1208            exOffset = src.parsedToken.extensionOffset;
1209            exLen = src.parsedToken.extensionLen;
1210            prefixOffset = src.parsedToken.prefixOffset;
1211            prefixLen = src.parsedToken.prefixLen;
1212            specs = src.parsedToken.flags;
1213
1214            startOfRules = FALSE;
1215            varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1216            top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1217
1218            uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
1219
1220            currCE = ucol_getNextCE(coll, c, status);
1221            if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
1222                log_verbose("Thai prevowel detected. Will pick next CE\n");
1223                currCE = ucol_getNextCE(coll, c, status);
1224            }
1225
1226            currContCE = ucol_getNextCE(coll, c, status);
1227            if(!isContinuation(currContCE)) {
1228                currContCE = 0;
1229            }
1230
1231            /* we need to repack CEs here */
1232
1233            if(strength == UCOL_TOK_RESET) {
1234                before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1235                if(top_ == TRUE) {
1236                    int32_t tokenIndex = src.parsedToken.indirectIndex;
1237
1238                    nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE;
1239                    nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE;
1240                } else {
1241                    nextCE = baseCE = currCE;
1242                    nextContCE = baseContCE = currContCE;
1243                }
1244                maxStrength = UCOL_IDENTICAL;
1245            } else {
1246                if(strength < maxStrength) {
1247                    maxStrength = strength;
1248                    if(baseCE == UCOL_RESET_TOP_VALUE) {
1249                        log_verbose("Resetting to [top]\n");
1250                        nextCE = UCOL_NEXT_TOP_VALUE;
1251                        nextContCE = UCOL_NEXT_TOP_CONT;
1252                    } else {
1253                        result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1254                    }
1255                    if(result < 0) {
1256                        if(ucol_isTailored(coll, *(src.source+oldOffset), status)) {
1257                            log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
1258                            return;
1259                        } else {
1260                            log_err("%s: couldn't find the CE\n", colLoc);
1261                            return;
1262                        }
1263                    }
1264                }
1265
1266                currCE &= 0xFFFFFF3F;
1267                currContCE &= 0xFFFFFFBF;
1268
1269                if(maxStrength == UCOL_IDENTICAL) {
1270                    if(baseCE != currCE || baseContCE != currContCE) {
1271                        log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
1272                    }
1273                } else {
1274                    if(strength == UCOL_IDENTICAL) {
1275                        if(lastCE != currCE || lastContCE != currContCE) {
1276                            log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
1277                        }
1278                    } else {
1279                        if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1280                            /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1281                            log_err("%s: current CE is not less than base CE\n", colLoc);
1282                        }
1283                        if(!before) {
1284                            if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1285                                /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1286                                log_err("%s: sequence of generated CEs is broken\n", colLoc);
1287                            }
1288                        } else {
1289                            before = FALSE;
1290                            if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
1291                                /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1292                                log_err("%s: sequence of generated CEs is broken\n", colLoc);
1293                            }
1294                        }
1295                    }
1296                }
1297
1298            }
1299
1300            oldOffset = chOffset;
1301            lastCE = currCE & 0xFFFFFF3F;
1302            lastContCE = currContCE & 0xFFFFFFBF;
1303        }
1304        uprv_free(src.source);
1305        uprv_free(src.reorderCodes);
1306    }
1307    ucol_close(UCA);
1308    uprv_delete_collIterate(c);
1309}
1310
1311#if 0
1312/* these locales are now picked from index RB */
1313static const char* localesToTest[] = {
1314"ar", "bg", "ca", "cs", "da",
1315"el", "en_BE", "en_US_POSIX",
1316"es", "et", "fi", "fr", "hi",
1317"hr", "hu", "is", "iw", "ja",
1318"ko", "lt", "lv", "mk", "mt",
1319"nb", "nn", "nn_NO", "pl", "ro",
1320"ru", "sh", "sk", "sl", "sq",
1321"sr", "sv", "th", "tr", "uk",
1322"vi", "zh", "zh_TW"
1323};
1324#endif
1325
1326static const char* rulesToTest[] = {
1327  /* Funky fa rule */
1328  "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1329  /*"& Z < p, P",*/
1330    /* Cui Mins rules */
1331    "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1332    "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1333    "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1334    "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1335    "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1336    "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1337    "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U"  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1338};
1339
1340
1341static void TestCollations(void) {
1342    int32_t noOfLoc = uloc_countAvailable();
1343    int32_t i = 0, j = 0;
1344
1345    UErrorCode status = U_ZERO_ERROR;
1346    char cName[256];
1347    UChar name[256];
1348    int32_t nameSize;
1349
1350
1351    const char *locName = NULL;
1352    UCollator *coll = NULL;
1353    UCollator *UCA = ucol_open("", &status);
1354    UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1355    if (U_FAILURE(status)) {
1356        log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
1357        return;
1358    }
1359    ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1360
1361    for(i = 0; i<noOfLoc; i++) {
1362        status = U_ZERO_ERROR;
1363        locName = uloc_getAvailable(i);
1364        if(uprv_strcmp("ja", locName) == 0) {
1365            log_verbose("Don't know how to test prefixes\n");
1366            continue;
1367        }
1368        if(hasCollationElements(locName)) {
1369            nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1370            for(j = 0; j<nameSize; j++) {
1371                cName[j] = (char)name[j];
1372            }
1373            cName[nameSize] = 0;
1374            log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1375            coll = ucol_open(locName, &status);
1376            if(U_SUCCESS(status)) {
1377                testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1378                ucol_close(coll);
1379            } else {
1380                log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
1381                status = U_ZERO_ERROR;
1382            }
1383        }
1384    }
1385    ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1386    ucol_close(UCA);
1387}
1388
1389static void RamsRulesTest(void) {
1390    UErrorCode status = U_ZERO_ERROR;
1391    int32_t i = 0;
1392    UCollator *coll = NULL;
1393    UChar rule[2048];
1394    uint32_t ruleLen;
1395    int32_t noOfLoc = uloc_countAvailable();
1396    const char *locName = NULL;
1397
1398    log_verbose("RamsRulesTest\n");
1399
1400    if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
1401        /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
1402        return;
1403    }
1404
1405    for(i = 0; i<noOfLoc; i++) {
1406        locName = uloc_getAvailable(i);
1407        if(hasCollationElements(locName)) {
1408            if (uprv_strcmp("ja", locName)==0) {
1409                log_verbose("Don't know how to test Japanese because of prefixes\n");
1410                continue;
1411            }
1412            if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1413                log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1414                continue;
1415            }
1416            if (uprv_strcmp("bn", locName)==0 ||
1417                uprv_strcmp("bs", locName)==0 ||            /* Add due to import per cldrbug 5647 */
1418                uprv_strcmp("bs_Cyrl", locName)==0 ||       /* Add due to import per cldrbug 5647 */
1419                uprv_strcmp("en_US_POSIX", locName)==0 ||
1420                uprv_strcmp("fa", locName)==0 ||            /* Add in #10222 with CLDR 24 integration */
1421                uprv_strcmp("fa_AF", locName)==0 ||         /* Add due to import per cldrbug 5647 */
1422                uprv_strcmp("gl", locName)==0 ||            /* Add due to import per cldrbug 5647 */
1423                uprv_strcmp("gl_ES", locName)==0 ||         /* Add due to import per cldrbug 5647 */
1424                uprv_strcmp("he", locName)==0 ||            /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
1425                uprv_strcmp("he_IL", locName)==0 ||         /* Add due to new tailoring of \u05F3 vs \u0027 per cldrbug 5576 */
1426                uprv_strcmp("km", locName)==0 ||
1427                uprv_strcmp("km_KH", locName)==0 ||
1428                uprv_strcmp("my", locName)==0 ||
1429                uprv_strcmp("ps", locName)==0 ||            /* Add in #10222 with CLDR 24 integration */
1430                uprv_strcmp("si", locName)==0 ||
1431                uprv_strcmp("si_LK", locName)==0 ||
1432                uprv_strcmp("sr_Latn", locName)==0 ||       /* Add due to import per cldrbug 5647 */
1433                uprv_strcmp("th", locName)==0 ||
1434                uprv_strcmp("th_TH", locName)==0 ||
1435                uprv_strcmp("zh", locName)==0 ||
1436                uprv_strcmp("zh_Hant", locName)==0
1437            ) {
1438              if(log_knownIssue("6040", NULL)) {
1439                log_verbose("Can't test %s - TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
1440                continue;
1441              }
1442            }
1443            log_verbose("Testing locale %s\n", locName);
1444            status = U_ZERO_ERROR;
1445            coll = ucol_open(locName, &status);
1446            if(U_SUCCESS(status)) {
1447              if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) {
1448                if(coll->image->jamoSpecial == TRUE) {
1449                  log_err("%s has special JAMOs\n", locName);
1450                }
1451                ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1452                testCollator(coll, &status);
1453                testCEs(coll, &status);
1454              } else {
1455                log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
1456              }
1457              ucol_close(coll);
1458            } else {
1459              log_err("Could not open %s: %s\n", locName, u_errorName(status));
1460            }
1461        }
1462    }
1463
1464    for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1465        log_verbose("Testing rule: %s\n", rulesToTest[i]);
1466        ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1467        status = U_ZERO_ERROR;
1468        coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1469        if(U_SUCCESS(status)) {
1470            testCollator(coll, &status);
1471            testCEs(coll, &status);
1472            ucol_close(coll);
1473        } else {
1474          log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]);
1475        }
1476    }
1477
1478}
1479
1480static void IsTailoredTest(void) {
1481    UErrorCode status = U_ZERO_ERROR;
1482    uint32_t i = 0;
1483    UCollator *coll = NULL;
1484    UChar rule[2048];
1485    UChar tailored[2048];
1486    UChar notTailored[2048];
1487    uint32_t ruleLen, tailoredLen, notTailoredLen;
1488
1489    log_verbose("IsTailoredTest\n");
1490
1491    u_uastrcpy(rule, "&Z < A, B, C;c < d");
1492    ruleLen = u_strlen(rule);
1493
1494    u_uastrcpy(tailored, "ABCcd");
1495    tailoredLen = u_strlen(tailored);
1496
1497    u_uastrcpy(notTailored, "ZabD");
1498    notTailoredLen = u_strlen(notTailored);
1499
1500    coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1501    if(U_SUCCESS(status)) {
1502        for(i = 0; i<tailoredLen; i++) {
1503            if(!ucol_isTailored(coll, tailored[i], &status)) {
1504                log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1505            }
1506        }
1507        for(i = 0; i<notTailoredLen; i++) {
1508            if(ucol_isTailored(coll, notTailored[i], &status)) {
1509                log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1510            }
1511        }
1512        ucol_close(coll);
1513    }
1514    else {
1515        log_err_status(status, "Can't tailor rules\n");
1516    }
1517    /* Code coverage */
1518    status = U_ZERO_ERROR;
1519    coll = ucol_open("ja", &status);
1520    if(!ucol_isTailored(coll, 0x4E9C, &status)) {
1521        log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
1522    }
1523    ucol_close(coll);
1524}
1525
1526
1527const static char chTest[][20] = {
1528  "c",
1529  "C",
1530  "ca", "cb", "cx", "cy", "CZ",
1531  "c\\u030C", "C\\u030C",
1532  "h",
1533  "H",
1534  "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1535  "ch", "cH", "Ch", "CH",
1536  "cha", "charly", "che", "chh", "chch", "chr",
1537  "i", "I", "iarly",
1538  "r", "R",
1539  "r\\u030C", "R\\u030C",
1540  "s",
1541  "S",
1542  "s\\u030C", "S\\u030C",
1543  "z", "Z",
1544  "z\\u030C", "Z\\u030C"
1545};
1546
1547static void TestChMove(void) {
1548    UChar t1[256] = {0};
1549    UChar t2[256] = {0};
1550
1551    uint32_t i = 0, j = 0;
1552    uint32_t size = 0;
1553    UErrorCode status = U_ZERO_ERROR;
1554
1555    UCollator *coll = ucol_open("cs", &status);
1556
1557    if(U_SUCCESS(status)) {
1558        size = sizeof(chTest)/sizeof(chTest[0]);
1559        for(i = 0; i < size-1; i++) {
1560            for(j = i+1; j < size; j++) {
1561                u_unescape(chTest[i], t1, 256);
1562                u_unescape(chTest[j], t2, 256);
1563                doTest(coll, t1, t2, UCOL_LESS);
1564            }
1565        }
1566    }
1567    else {
1568        log_data_err("Can't open collator");
1569    }
1570    ucol_close(coll);
1571}
1572
1573
1574
1575
1576const static char impTest[][20] = {
1577  "\\u4e00",
1578    "a",
1579    "A",
1580    "b",
1581    "B",
1582    "\\u4e01"
1583};
1584
1585
1586static void TestImplicitTailoring(void) {
1587  static const struct {
1588    const char *rules;
1589    const char *data[10];
1590    const uint32_t len;
1591  } tests[] = {
1592      { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1593      { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1594      { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1595      { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1596  };
1597
1598  int32_t i = 0;
1599
1600  for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1601      genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1602  }
1603
1604/*
1605  UChar t1[256] = {0};
1606  UChar t2[256] = {0};
1607
1608  const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1609
1610  uint32_t i = 0, j = 0;
1611  uint32_t size = 0;
1612  uint32_t ruleLen = 0;
1613  UErrorCode status = U_ZERO_ERROR;
1614  UCollator *coll = NULL;
1615  ruleLen = u_unescape(rule, t1, 256);
1616
1617  coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1618
1619  if(U_SUCCESS(status)) {
1620    size = sizeof(impTest)/sizeof(impTest[0]);
1621    for(i = 0; i < size-1; i++) {
1622      for(j = i+1; j < size; j++) {
1623        u_unescape(impTest[i], t1, 256);
1624        u_unescape(impTest[j], t2, 256);
1625        doTest(coll, t1, t2, UCOL_LESS);
1626      }
1627    }
1628  }
1629  else {
1630    log_err("Can't open collator");
1631  }
1632  ucol_close(coll);
1633  */
1634}
1635
1636static void TestFCDProblem(void) {
1637  UChar t1[256] = {0};
1638  UChar t2[256] = {0};
1639
1640  const char *s1 = "\\u0430\\u0306\\u0325";
1641  const char *s2 = "\\u04D1\\u0325";
1642
1643  UErrorCode status = U_ZERO_ERROR;
1644  UCollator *coll = ucol_open("", &status);
1645  u_unescape(s1, t1, 256);
1646  u_unescape(s2, t2, 256);
1647
1648  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1649  doTest(coll, t1, t2, UCOL_EQUAL);
1650
1651  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1652  doTest(coll, t1, t2, UCOL_EQUAL);
1653
1654  ucol_close(coll);
1655}
1656
1657/*
1658The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
1659We're only using NFC/NFD in this test.
1660*/
1661#define NORM_BUFFER_TEST_LEN 18
1662typedef struct {
1663  UChar32 u;
1664  UChar NFC[NORM_BUFFER_TEST_LEN];
1665  UChar NFD[NORM_BUFFER_TEST_LEN];
1666} tester;
1667
1668static void TestComposeDecompose(void) {
1669    /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
1670    static const UChar UNICODESET_STR[] = {
1671        0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
1672        0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
1673        0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
1674    };
1675    int32_t noOfLoc;
1676    int32_t i = 0, j = 0;
1677
1678    UErrorCode status = U_ZERO_ERROR;
1679    const char *locName = NULL;
1680    uint32_t nfcSize;
1681    uint32_t nfdSize;
1682    tester **t;
1683    uint32_t noCases = 0;
1684    UCollator *coll = NULL;
1685    UChar32 u = 0;
1686    UChar comp[NORM_BUFFER_TEST_LEN];
1687    uint32_t len = 0;
1688    UCollationElements *iter;
1689    USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
1690    int32_t charsToTestSize;
1691
1692    noOfLoc = uloc_countAvailable();
1693
1694    coll = ucol_open("", &status);
1695    if (U_FAILURE(status)) {
1696        log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
1697        return;
1698    }
1699    charsToTestSize = uset_size(charsToTest);
1700    if (charsToTestSize <= 0) {
1701        log_err("Set was zero. Missing data?\n");
1702        return;
1703    }
1704    t = (tester **)malloc(charsToTestSize * sizeof(tester *));
1705    t[0] = (tester *)malloc(sizeof(tester));
1706    log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
1707
1708    for(u = 0; u < charsToTestSize; u++) {
1709        UChar32 ch = uset_charAt(charsToTest, u);
1710        len = 0;
1711        U16_APPEND_UNSAFE(comp, len, ch);
1712        nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1713        nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1714
1715        if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1716          || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1717            t[noCases]->u = ch;
1718            if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1719                u_strncpy(t[noCases]->NFC, comp, len);
1720                t[noCases]->NFC[len] = 0;
1721            }
1722            noCases++;
1723            t[noCases] = (tester *)malloc(sizeof(tester));
1724            uprv_memset(t[noCases], 0, sizeof(tester));
1725        }
1726    }
1727    log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
1728    uset_close(charsToTest);
1729    charsToTest = NULL;
1730
1731    for(u=0; u<(UChar32)noCases; u++) {
1732        if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1733            log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1734            doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1735        }
1736    }
1737    /*
1738    for(u = 0; u < charsToTestSize; u++) {
1739      if(!(u&0xFFFF)) {
1740        log_verbose("%08X ", u);
1741      }
1742      uprv_memset(t[noCases], 0, sizeof(tester));
1743      t[noCases]->u = u;
1744      len = 0;
1745      U16_APPEND_UNSAFE(comp, len, u);
1746      comp[len] = 0;
1747      nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1748      nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1749      doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1750      doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1751    }
1752    */
1753
1754    ucol_close(coll);
1755
1756    log_verbose("Testing locales, number of cases = %i\n", noCases);
1757    for(i = 0; i<noOfLoc; i++) {
1758        status = U_ZERO_ERROR;
1759        locName = uloc_getAvailable(i);
1760        if(hasCollationElements(locName)) {
1761            char cName[256];
1762            UChar name[256];
1763            int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1764
1765            for(j = 0; j<nameSize; j++) {
1766                cName[j] = (char)name[j];
1767            }
1768            cName[nameSize] = 0;
1769            log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1770
1771            coll = ucol_open(locName, &status);
1772            ucol_setStrength(coll, UCOL_IDENTICAL);
1773            iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1774
1775            for(u=0; u<(UChar32)noCases; u++) {
1776                if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1777                    log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1778                    doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1779                    log_verbose("Testing NFC\n");
1780                    ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
1781                    backAndForth(iter);
1782                    log_verbose("Testing NFD\n");
1783                    ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1784                    backAndForth(iter);
1785                }
1786            }
1787            ucol_closeElements(iter);
1788            ucol_close(coll);
1789        }
1790    }
1791    for(u = 0; u <= (UChar32)noCases; u++) {
1792        free(t[u]);
1793    }
1794    free(t);
1795}
1796
1797static void TestEmptyRule(void) {
1798  UErrorCode status = U_ZERO_ERROR;
1799  UChar rulez[] = { 0 };
1800  UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1801
1802  ucol_close(coll);
1803}
1804
1805static void TestUCARules(void) {
1806  UErrorCode status = U_ZERO_ERROR;
1807  UChar b[256];
1808  UChar *rules = b;
1809  uint32_t ruleLen = 0;
1810  UCollator *UCAfromRules = NULL;
1811  UCollator *coll = ucol_open("", &status);
1812  if(status == U_FILE_ACCESS_ERROR) {
1813    log_data_err("Is your data around?\n");
1814    return;
1815  } else if(U_FAILURE(status)) {
1816    log_err("Error opening collator\n");
1817    return;
1818  }
1819  ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1820
1821  log_verbose("TestUCARules\n");
1822  if(ruleLen > 256) {
1823    rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1824    ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1825  }
1826  log_verbose("Rules length is %d\n", ruleLen);
1827  UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1828  if(U_SUCCESS(status)) {
1829    ucol_close(UCAfromRules);
1830  } else {
1831    log_verbose("Unable to create a collator from UCARules!\n");
1832  }
1833/*
1834  u_unescape(blah, b, 256);
1835  ucol_getSortKey(coll, b, 1, res, 256);
1836*/
1837  ucol_close(coll);
1838  if(rules != b) {
1839    free(rules);
1840  }
1841}
1842
1843
1844/* Pinyin tonal order */
1845/*
1846    A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1847          (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
1848    E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1849    I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1850    O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1851    U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1852      < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1853.. (\u00fc)
1854
1855However, in testing we got the following order:
1856    A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1857          (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
1858    E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1859.. (\u0113)
1860    I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1861    O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1862    U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1863.. (\u01d8)
1864      < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1865*/
1866
1867static void TestBefore(void) {
1868  const static char *data[] = {
1869      "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1870      "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1871      "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1872      "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1873      "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1874      "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1875  };
1876  genericRulesStarter(
1877    "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1878    "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1879    "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1880    "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1881    "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1882    "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1883    data, sizeof(data)/sizeof(data[0]));
1884}
1885
1886#if 0
1887/* superceded by TestBeforePinyin */
1888static void TestJ784(void) {
1889  const static char *data[] = {
1890      "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1891      "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1892      "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1893      "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1894      "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1895      "\\u00fc",
1896           "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1897  };
1898  genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1899}
1900#endif
1901
1902#if 0
1903/* superceded by the changes to the lv locale */
1904static void TestJ831(void) {
1905  const static char *data[] = {
1906    "I",
1907      "i",
1908      "Y",
1909      "y"
1910  };
1911  genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1912}
1913#endif
1914
1915static void TestJ815(void) {
1916  const static char *data[] = {
1917    "aa",
1918      "Aa",
1919      "ab",
1920      "Ab",
1921      "ad",
1922      "Ad",
1923      "ae",
1924      "Ae",
1925      "\\u00e6",
1926      "\\u00c6",
1927      "af",
1928      "Af",
1929      "b",
1930      "B"
1931  };
1932  genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1933  genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1934}
1935
1936
1937/*
1938"& a < b < c < d& r < c",                                   "& a < b < d& r < c",
1939"& a < b < c < d& c < m",                                   "& a < b < c < m < d",
1940"& a < b < c < d& a < m",                                   "& a < m < b < c < d",
1941"& a <<< b << c < d& a < m",                                "& a <<< b << c < m < d",
1942"& a < b < c < d& [before 1] c < m",                        "& a < b < m < c < d",
1943"& a < b <<< c << d <<< e& [before 3] e <<< x",            "& a < b <<< c << d <<< x <<< e",
1944"& a < b <<< c << d <<< e& [before 2] e <<< x",            "& a < b <<< c <<< x << d <<< e",
1945"& a < b <<< c << d <<< e& [before 1] e <<< x",            "& a <<< x < b <<< c << d <<< e",
1946"& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",    "& a < b <<< c << d <<< e <<< f < x < g",
1947*/
1948static void TestRedundantRules(void) {
1949  int32_t i;
1950
1951  static const struct {
1952      const char *rules;
1953      const char *expectedRules;
1954      const char *testdata[8];
1955      uint32_t testdatalen;
1956  } tests[] = {
1957    /* this test conflicts with positioning of CODAN placeholder */
1958       /*{
1959        "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1960        "&\\u2089<<<x",
1961        {"\\u2089", "x"}, 2
1962       }, */
1963    /* this test conflicts with the [before x] syntax tightening */
1964      /*{
1965        "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1966        "&\\u0252<<<x",
1967        {"\\u0252", "x"}, 2
1968      }, */
1969    /* this test conflicts with the [before x] syntax tightening */
1970      /*{
1971         "& a < b <<< c << d <<< e& [before 1] e <<< x",
1972         "& a <<< x < b <<< c << d <<< e",
1973        {"a", "x", "b", "c", "d", "e"}, 6
1974      }, */
1975      {
1976        "& a < b < c < d& [before 1] c < m",
1977        "& a < b < m < c < d",
1978        {"a", "b", "m", "c", "d"}, 5
1979      },
1980      {
1981        "& a < b <<< c << d <<< e& [before 3] e <<< x",
1982        "& a < b <<< c << d <<< x <<< e",
1983        {"a", "b", "c", "d", "x", "e"}, 6
1984      },
1985    /* this test conflicts with the [before x] syntax tightening */
1986      /* {
1987        "& a < b <<< c << d <<< e& [before 2] e <<< x",
1988        "& a < b <<< c <<< x << d <<< e",
1989        {"a", "b", "c", "x", "d", "e"},, 6
1990      }, */
1991      {
1992        "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1993        "& a < b <<< c << d <<< e <<< f < x < g",
1994        {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1995      },
1996      {
1997        "& a <<< b << c < d& a < m",
1998        "& a <<< b << c < m < d",
1999        {"a", "b", "c", "m", "d"}, 5
2000      },
2001      {
2002        "&a<b<<b\\u0301 &z<b",
2003        "&a<b\\u0301 &z<b",
2004        {"a", "b\\u0301", "z", "b"}, 4
2005      },
2006      {
2007        "&z<m<<<q<<<m",
2008        "&z<q<<<m",
2009        {"z", "q", "m"},3
2010      },
2011      {
2012        "&z<<<m<q<<<m",
2013        "&z<q<<<m",
2014        {"z", "q", "m"}, 3
2015      },
2016      {
2017        "& a < b < c < d& r < c",
2018        "& a < b < d& r < c",
2019        {"a", "b", "d"}, 3
2020      },
2021      {
2022        "& a < b < c < d& r < c",
2023        "& a < b < d& r < c",
2024        {"r", "c"}, 2
2025      },
2026      {
2027        "& a < b < c < d& c < m",
2028        "& a < b < c < m < d",
2029        {"a", "b", "c", "m", "d"}, 5
2030      },
2031      {
2032        "& a < b < c < d& a < m",
2033        "& a < m < b < c < d",
2034        {"a", "m", "b", "c", "d"}, 5
2035      }
2036  };
2037
2038
2039  UCollator *credundant = NULL;
2040  UCollator *cresulting = NULL;
2041  UErrorCode status = U_ZERO_ERROR;
2042  UChar rlz[2048] = { 0 };
2043  uint32_t rlen = 0;
2044
2045  for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
2046    log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
2047    rlen = u_unescape(tests[i].rules, rlz, 2048);
2048
2049    credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2050    if(status == U_FILE_ACCESS_ERROR) {
2051      log_data_err("Is your data around?\n");
2052      return;
2053    } else if(U_FAILURE(status)) {
2054      log_err("Error opening collator\n");
2055      return;
2056    }
2057
2058    rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
2059    cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2060
2061    testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
2062
2063    ucol_close(credundant);
2064    ucol_close(cresulting);
2065
2066    log_verbose("testing using data\n");
2067
2068    genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
2069  }
2070
2071}
2072
2073static void TestExpansionSyntax(void) {
2074  int32_t i;
2075
2076  const static char *rules[] = {
2077    "&AE <<< a << b <<< c &d <<< f",
2078    "&AE <<< a <<< b << c << d < e < f <<< g",
2079    "&AE <<< B <<< C / D <<< F"
2080  };
2081
2082  const static char *expectedRules[] = {
2083    "&A <<< a / E << b / E <<< c /E  &d <<< f",
2084    "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2085    "&A <<< B / E <<< C / ED <<< F / E"
2086  };
2087
2088  const static char *testdata[][8] = {
2089    {"AE", "a", "b", "c"},
2090    {"AE", "a", "b", "c", "d", "e", "f", "g"},
2091    {"AE", "B", "C"} /* / ED <<< F / E"},*/
2092  };
2093
2094  const static uint32_t testdatalen[] = {
2095      4,
2096      8,
2097      3
2098  };
2099
2100
2101
2102  UCollator *credundant = NULL;
2103  UCollator *cresulting = NULL;
2104  UErrorCode status = U_ZERO_ERROR;
2105  UChar rlz[2048] = { 0 };
2106  uint32_t rlen = 0;
2107
2108  for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2109    log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
2110    rlen = u_unescape(rules[i], rlz, 2048);
2111
2112    credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2113    if(status == U_FILE_ACCESS_ERROR) {
2114      log_data_err("Is your data around?\n");
2115      return;
2116    } else if(U_FAILURE(status)) {
2117      log_err("Error opening collator\n");
2118      return;
2119    }
2120    rlen = u_unescape(expectedRules[i], rlz, 2048);
2121    cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2122
2123    /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2124    /* as a hard error test, but only in information mode */
2125    testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2126
2127    ucol_close(credundant);
2128    ucol_close(cresulting);
2129
2130    log_verbose("testing using data\n");
2131
2132    genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2133  }
2134}
2135
2136static void TestCase(void)
2137{
2138    const static UChar gRules[MAX_TOKEN_LEN] =
2139    /*" & 0 < 1,\u2461<a,A"*/
2140    { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2141
2142    const static UChar testCase[][MAX_TOKEN_LEN] =
2143    {
2144        /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2145        /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2146        /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2147        /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2148    };
2149
2150    const static UCollationResult caseTestResults[][9] =
2151    {
2152        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2153        { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
2154        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2155        { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
2156    };
2157
2158    const static UColAttributeValue caseTestAttributes[][2] =
2159    {
2160        { UCOL_LOWER_FIRST, UCOL_OFF},
2161        { UCOL_UPPER_FIRST, UCOL_OFF},
2162        { UCOL_LOWER_FIRST, UCOL_ON},
2163        { UCOL_UPPER_FIRST, UCOL_ON}
2164    };
2165    int32_t i,j,k;
2166    UErrorCode status = U_ZERO_ERROR;
2167    UCollationElements *iter;
2168    UCollator  *myCollation;
2169    myCollation = ucol_open("en_US", &status);
2170
2171    if(U_FAILURE(status)){
2172        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2173        return;
2174    }
2175    log_verbose("Testing different case settings\n");
2176    ucol_setStrength(myCollation, UCOL_TERTIARY);
2177
2178    for(k = 0; k<4; k++) {
2179      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2180      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2181      log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2182      for (i = 0; i < 3 ; i++) {
2183        for(j = i+1; j<4; j++) {
2184          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2185        }
2186      }
2187    }
2188    ucol_close(myCollation);
2189
2190    myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2191    if(U_FAILURE(status)){
2192        log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2193        return;
2194    }
2195    log_verbose("Testing different case settings with custom rules\n");
2196    ucol_setStrength(myCollation, UCOL_TERTIARY);
2197
2198    for(k = 0; k<4; k++) {
2199      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2200      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2201      for (i = 0; i < 3 ; i++) {
2202        for(j = i+1; j<4; j++) {
2203          log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2204          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2205          iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
2206          backAndForth(iter);
2207          ucol_closeElements(iter);
2208          iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
2209          backAndForth(iter);
2210          ucol_closeElements(iter);
2211        }
2212      }
2213    }
2214    ucol_close(myCollation);
2215    {
2216      const static char *lowerFirst[] = {
2217        "h",
2218        "H",
2219        "ch",
2220        "Ch",
2221        "CH",
2222        "cha",
2223        "chA",
2224        "Cha",
2225        "ChA",
2226        "CHa",
2227        "CHA",
2228        "i",
2229        "I"
2230      };
2231
2232      const static char *upperFirst[] = {
2233        "H",
2234        "h",
2235        "CH",
2236        "Ch",
2237        "ch",
2238        "CHA",
2239        "CHa",
2240        "ChA",
2241        "Cha",
2242        "chA",
2243        "cha",
2244        "I",
2245        "i"
2246      };
2247      log_verbose("mixed case test\n");
2248      log_verbose("lower first, case level off\n");
2249      genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2250      log_verbose("upper first, case level off\n");
2251      genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2252      log_verbose("lower first, case level on\n");
2253      genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2254      log_verbose("upper first, case level on\n");
2255      genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2256    }
2257
2258}
2259
2260static void TestIncrementalNormalize(void) {
2261
2262    /*UChar baseA     =0x61;*/
2263    UChar baseA     =0x41;
2264/*    UChar baseB     = 0x42;*/
2265    static const UChar ccMix[]   = {0x316, 0x321, 0x300};
2266    /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
2267    /*
2268        0x316 is combining grave accent below, cc=220
2269        0x321 is combining palatalized hook below, cc=202
2270        0x300 is combining grave accent, cc=230
2271    */
2272
2273#define MAXSLEN 2000
2274    /*int          maxSLen   = 64000;*/
2275    int          sLen;
2276    int          i;
2277
2278    UCollator        *coll;
2279    UErrorCode       status = U_ZERO_ERROR;
2280    UCollationResult result;
2281
2282    int32_t myQ = getTestOption(QUICK_OPTION);
2283
2284    if(getTestOption(QUICK_OPTION) < 0) {
2285        setTestOption(QUICK_OPTION, 1);
2286    }
2287
2288    {
2289        /* Test 1.  Run very long unnormalized strings, to force overflow of*/
2290        /*          most buffers along the way.*/
2291        UChar            strA[MAXSLEN+1];
2292        UChar            strB[MAXSLEN+1];
2293
2294        coll = ucol_open("en_US", &status);
2295        if(status == U_FILE_ACCESS_ERROR) {
2296          log_data_err("Is your data around?\n");
2297          return;
2298        } else if(U_FAILURE(status)) {
2299          log_err("Error opening collator\n");
2300          return;
2301        }
2302        ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2303
2304        /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
2305        /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2306        /*for (sLen = 1000; sLen<1001; sLen++) {*/
2307        for (sLen = 500; sLen<501; sLen++) {
2308        /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2309            strA[0] = baseA;
2310            strB[0] = baseA;
2311            for (i=1; i<=sLen-1; i++) {
2312                strA[i] = ccMix[i % 3];
2313                strB[sLen-i] = ccMix[i % 3];
2314            }
2315            strA[sLen]   = 0;
2316            strB[sLen]   = 0;
2317
2318            ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
2319            doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
2320            ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
2321            doTest(coll, strA, strB, UCOL_EQUAL);
2322        }
2323    }
2324
2325    setTestOption(QUICK_OPTION, myQ);
2326
2327
2328    /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
2329    /*         of the string.  Checks a couple of edge cases.*/
2330
2331    {
2332        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2333        static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
2334        ucol_setStrength(coll, UCOL_TERTIARY);
2335        doTest(coll, strA, strB, UCOL_EQUAL);
2336    }
2337
2338    /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
2339
2340    {
2341      /* New UCA  3.1.1.
2342       * test below used a code point from Desseret, which sorts differently
2343       * than d800 dc00
2344       */
2345        /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2346        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2347        static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2348        ucol_setStrength(coll, UCOL_TERTIARY);
2349        doTest(coll, strA, strB, UCOL_GREATER);
2350    }
2351
2352    /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
2353
2354    {
2355        static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2356        static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2357        char  sortKeyA[50];
2358        char  sortKeyAz[50];
2359        char  sortKeyB[50];
2360        char  sortKeyBz[50];
2361        int   r;
2362
2363        /* there used to be -3 here. Hmmmm.... */
2364        /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2365        result = ucol_strcoll(coll, strA, 3, strB, 3);
2366        if (result != UCOL_GREATER) {
2367            log_err("ERROR 1 in test 4\n");
2368        }
2369        result = ucol_strcoll(coll, strA, -1, strB, -1);
2370        if (result != UCOL_EQUAL) {
2371            log_err("ERROR 2 in test 4\n");
2372        }
2373
2374        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2375        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2376        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2377        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2378
2379        r = strcmp(sortKeyA, sortKeyAz);
2380        if (r <= 0) {
2381            log_err("Error 3 in test 4\n");
2382        }
2383        r = strcmp(sortKeyA, sortKeyB);
2384        if (r <= 0) {
2385            log_err("Error 4 in test 4\n");
2386        }
2387        r = strcmp(sortKeyAz, sortKeyBz);
2388        if (r != 0) {
2389            log_err("Error 5 in test 4\n");
2390        }
2391
2392        ucol_setStrength(coll, UCOL_IDENTICAL);
2393        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2394        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2395        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2396        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2397
2398        r = strcmp(sortKeyA, sortKeyAz);
2399        if (r <= 0) {
2400            log_err("Error 6 in test 4\n");
2401        }
2402        r = strcmp(sortKeyA, sortKeyB);
2403        if (r <= 0) {
2404            log_err("Error 7 in test 4\n");
2405        }
2406        r = strcmp(sortKeyAz, sortKeyBz);
2407        if (r != 0) {
2408            log_err("Error 8 in test 4\n");
2409        }
2410        ucol_setStrength(coll, UCOL_TERTIARY);
2411    }
2412
2413
2414    /*  Test 5:  Null characters in non-normal source strings.*/
2415
2416    {
2417        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2418        static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2419        char  sortKeyA[50];
2420        char  sortKeyAz[50];
2421        char  sortKeyB[50];
2422        char  sortKeyBz[50];
2423        int   r;
2424
2425        result = ucol_strcoll(coll, strA, 6, strB, 6);
2426        if (result != UCOL_GREATER) {
2427            log_err("ERROR 1 in test 5\n");
2428        }
2429        result = ucol_strcoll(coll, strA, -1, strB, -1);
2430        if (result != UCOL_EQUAL) {
2431            log_err("ERROR 2 in test 5\n");
2432        }
2433
2434        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2435        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2436        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2437        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2438
2439        r = strcmp(sortKeyA, sortKeyAz);
2440        if (r <= 0) {
2441            log_err("Error 3 in test 5\n");
2442        }
2443        r = strcmp(sortKeyA, sortKeyB);
2444        if (r <= 0) {
2445            log_err("Error 4 in test 5\n");
2446        }
2447        r = strcmp(sortKeyAz, sortKeyBz);
2448        if (r != 0) {
2449            log_err("Error 5 in test 5\n");
2450        }
2451
2452        ucol_setStrength(coll, UCOL_IDENTICAL);
2453        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2454        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2455        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2456        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2457
2458        r = strcmp(sortKeyA, sortKeyAz);
2459        if (r <= 0) {
2460            log_err("Error 6 in test 5\n");
2461        }
2462        r = strcmp(sortKeyA, sortKeyB);
2463        if (r <= 0) {
2464            log_err("Error 7 in test 5\n");
2465        }
2466        r = strcmp(sortKeyAz, sortKeyBz);
2467        if (r != 0) {
2468            log_err("Error 8 in test 5\n");
2469        }
2470        ucol_setStrength(coll, UCOL_TERTIARY);
2471    }
2472
2473
2474    /*  Test 6:  Null character as base of a non-normal combining sequence.*/
2475
2476    {
2477        static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2478        static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2479
2480        result = ucol_strcoll(coll, strA, 5, strB, 5);
2481        if (result != UCOL_LESS) {
2482            log_err("Error 1 in test 6\n");
2483        }
2484        result = ucol_strcoll(coll, strA, -1, strB, -1);
2485        if (result != UCOL_EQUAL) {
2486            log_err("Error 2 in test 6\n");
2487        }
2488    }
2489
2490    ucol_close(coll);
2491}
2492
2493
2494
2495#if 0
2496static void TestGetCaseBit(void) {
2497  static const char *caseBitData[] = {
2498    "a", "A", "ch", "Ch", "CH",
2499      "\\uFF9E", "\\u0009"
2500  };
2501
2502  static const uint8_t results[] = {
2503    UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2504      UCOL_UPPER_CASE, UCOL_LOWER_CASE
2505  };
2506
2507  uint32_t i, blen = 0;
2508  UChar b[256] = {0};
2509  UErrorCode status = U_ZERO_ERROR;
2510  UCollator *UCA = ucol_open("", &status);
2511  uint8_t res = 0;
2512
2513  for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2514    blen = u_unescape(caseBitData[i], b, 256);
2515    res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2516    if(results[i] != res) {
2517      log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2518    }
2519  }
2520}
2521#endif
2522
2523static void TestHangulTailoring(void) {
2524    static const char *koreanData[] = {
2525        "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2526            "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2527            "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2528            "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2529            "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2530            "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2531    };
2532
2533    const char *rules =
2534        "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2535        "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2536        "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2537        "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2538        "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2539        "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2540
2541
2542  UErrorCode status = U_ZERO_ERROR;
2543  UChar rlz[2048] = { 0 };
2544  uint32_t rlen = u_unescape(rules, rlz, 2048);
2545
2546  UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2547  if(status == U_FILE_ACCESS_ERROR) {
2548    log_data_err("Is your data around?\n");
2549    return;
2550  } else if(U_FAILURE(status)) {
2551    log_err("Error opening collator\n");
2552    return;
2553  }
2554
2555  log_verbose("Using start of korean rules\n");
2556
2557  if(U_SUCCESS(status)) {
2558    genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2559  } else {
2560    log_err("Unable to open collator with rules %s\n", rules);
2561  }
2562
2563  log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2564  ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home  */
2565  genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2566
2567  ucol_close(coll);
2568
2569  log_verbose("Using ko__LOTUS locale\n");
2570  genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2571}
2572
2573static void TestCompressOverlap(void) {
2574    UChar       secstr[150];
2575    UChar       tertstr[150];
2576    UErrorCode  status = U_ZERO_ERROR;
2577    UCollator  *coll;
2578    char        result[200];
2579    uint32_t    resultlen;
2580    int         count = 0;
2581    char       *tempptr;
2582
2583    coll = ucol_open("", &status);
2584
2585    if (U_FAILURE(status)) {
2586        log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
2587        return;
2588    }
2589    while (count < 149) {
2590        secstr[count] = 0x0020; /* [06, 05, 05] */
2591        tertstr[count] = 0x0020;
2592        count ++;
2593    }
2594
2595    /* top down compression ----------------------------------- */
2596    secstr[count] = 0x0332; /* [, 87, 05] */
2597    tertstr[count] = 0x3000; /* [06, 05, 07] */
2598
2599    /* no compression secstr should have 150 secondary bytes, tertstr should
2600    have 150 tertiary bytes.
2601    with correct overlapping compression, secstr should have 4 secondary
2602    bytes, tertstr should have > 2 tertiary bytes */
2603    resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2604    (void)resultlen;    /* Suppress set but not used warning. */
2605    tempptr = uprv_strchr(result, 1) + 1;
2606    while (*(tempptr + 1) != 1) {
2607        /* the last secondary collation element is not checked since it is not
2608        part of the compression */
2609        if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2610            log_err("Secondary compression overlapped\n");
2611        }
2612        tempptr ++;
2613    }
2614
2615    /* tertiary top/bottom/common for en_US is similar to the secondary
2616    top/bottom/common */
2617    resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2618    tempptr = uprv_strrchr(result, 1) + 1;
2619    while (*(tempptr + 1) != 0) {
2620        /* the last secondary collation element is not checked since it is not
2621        part of the compression */
2622        if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2623            log_err("Tertiary compression overlapped\n");
2624        }
2625        tempptr ++;
2626    }
2627
2628    /* bottom up compression ------------------------------------- */
2629    secstr[count] = 0;
2630    tertstr[count] = 0;
2631    resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2632    tempptr = uprv_strchr(result, 1) + 1;
2633    while (*(tempptr + 1) != 1) {
2634        /* the last secondary collation element is not checked since it is not
2635        part of the compression */
2636        if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2637            log_err("Secondary compression overlapped\n");
2638        }
2639        tempptr ++;
2640    }
2641
2642    /* tertiary top/bottom/common for en_US is similar to the secondary
2643    top/bottom/common */
2644    resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2645    tempptr = uprv_strrchr(result, 1) + 1;
2646    while (*(tempptr + 1) != 0) {
2647        /* the last secondary collation element is not checked since it is not
2648        part of the compression */
2649        if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2650            log_err("Tertiary compression overlapped\n");
2651        }
2652        tempptr ++;
2653    }
2654
2655    ucol_close(coll);
2656}
2657
2658static void TestCyrillicTailoring(void) {
2659  static const char *test[] = {
2660    "\\u0410b",
2661      "\\u0410\\u0306a",
2662      "\\u04d0A"
2663  };
2664
2665    /* Russian overrides contractions, so this test is not valid anymore */
2666    /*genericLocaleStarter("ru", test, 3);*/
2667
2668    genericLocaleStarter("root", test, 3);
2669    genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2670    genericRulesStarter("&Z < \\u0410", test, 3);
2671    genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2672    genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2673    genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2674    genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2675}
2676
2677static void TestSuppressContractions(void) {
2678
2679  static const char *testNoCont2[] = {
2680      "\\u0410\\u0302a",
2681      "\\u0410\\u0306b",
2682      "\\u0410c"
2683  };
2684  static const char *testNoCont[] = {
2685      "a\\u0410",
2686      "A\\u0410\\u0306",
2687      "\\uFF21\\u0410\\u0302"
2688  };
2689
2690  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2691  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2692}
2693
2694static void TestContraction(void) {
2695    const static char *testrules[] = {
2696        "&A = AB / B",
2697        "&A = A\\u0306/\\u0306",
2698        "&c = ch / h"
2699    };
2700    const static UChar testdata[][2] = {
2701        {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2702        {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2703        {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2704    };
2705    const static UChar testdata2[][2] = {
2706        {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2707        {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2708        {0x0063 /* 'c' */, 0x006C /* 'l' */}
2709    };
2710    const static char *testrules3[] = {
2711        "&z < xyz &xyzw << B",
2712        "&z < xyz &xyz << B / w",
2713        "&z < ch &achm << B",
2714        "&z < ch &a << B / chm",
2715        "&\\ud800\\udc00w << B",
2716        "&\\ud800\\udc00 << B / w",
2717        "&a\\ud800\\udc00m << B",
2718        "&a << B / \\ud800\\udc00m",
2719    };
2720
2721    UErrorCode  status   = U_ZERO_ERROR;
2722    UCollator  *coll;
2723    UChar       rule[256] = {0};
2724    uint32_t    rlen     = 0;
2725    int         i;
2726
2727    for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2728        UCollationElements *iter1;
2729        int j = 0;
2730        log_verbose("Rule %s for testing\n", testrules[i]);
2731        rlen = u_unescape(testrules[i], rule, 32);
2732        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2733        if (U_FAILURE(status)) {
2734            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2735            return;
2736        }
2737        iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2738        if (U_FAILURE(status)) {
2739            log_err("Collation iterator creation failed\n");
2740            return;
2741        }
2742        while (j < 2) {
2743            UCollationElements *iter2 = ucol_openElements(coll,
2744                                                         &(testdata[i][j]),
2745                                                         1, &status);
2746            uint32_t ce;
2747            if (U_FAILURE(status)) {
2748                log_err("Collation iterator creation failed\n");
2749                return;
2750            }
2751            ce = ucol_next(iter2, &status);
2752            while (ce != UCOL_NULLORDER) {
2753                if ((uint32_t)ucol_next(iter1, &status) != ce) {
2754                    log_err("Collation elements in contraction split does not match\n");
2755                    return;
2756                }
2757                ce = ucol_next(iter2, &status);
2758            }
2759            j ++;
2760            ucol_closeElements(iter2);
2761        }
2762        if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2763            log_err("Collation elements not exhausted\n");
2764            return;
2765        }
2766        ucol_closeElements(iter1);
2767        ucol_close(coll);
2768    }
2769
2770    rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2771    coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2772    if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2773        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2774                testdata2[0][0], testdata2[0][1], testdata2[1][0],
2775                testdata2[1][1]);
2776        return;
2777    }
2778    if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2779        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2780                testdata2[1][0], testdata2[1][1], testdata2[2][0],
2781                testdata2[2][1]);
2782        return;
2783    }
2784    ucol_close(coll);
2785
2786    for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2787        UCollator          *coll1,
2788                           *coll2;
2789        UCollationElements *iter1,
2790                           *iter2;
2791        UChar               ch = 0x0042 /* 'B' */;
2792        uint32_t            ce;
2793        rlen = u_unescape(testrules3[i], rule, 32);
2794        coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2795        rlen = u_unescape(testrules3[i + 1], rule, 32);
2796        coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2797        if (U_FAILURE(status)) {
2798            log_err("Collator creation failed %s\n", testrules[i]);
2799            return;
2800        }
2801        iter1 = ucol_openElements(coll1, &ch, 1, &status);
2802        iter2 = ucol_openElements(coll2, &ch, 1, &status);
2803        if (U_FAILURE(status)) {
2804            log_err("Collation iterator creation failed\n");
2805            return;
2806        }
2807        ce = ucol_next(iter1, &status);
2808        if (U_FAILURE(status)) {
2809            log_err("Retrieving ces failed\n");
2810            return;
2811        }
2812        while (ce != UCOL_NULLORDER) {
2813            if (ce != (uint32_t)ucol_next(iter2, &status)) {
2814                log_err("CEs does not match\n");
2815                return;
2816            }
2817            ce = ucol_next(iter1, &status);
2818            if (U_FAILURE(status)) {
2819                log_err("Retrieving ces failed\n");
2820                return;
2821            }
2822        }
2823        if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2824            log_err("CEs not exhausted\n");
2825            return;
2826        }
2827        ucol_closeElements(iter1);
2828        ucol_closeElements(iter2);
2829        ucol_close(coll1);
2830        ucol_close(coll2);
2831    }
2832}
2833
2834static void TestExpansion(void) {
2835    const static char *testrules[] = {
2836        "&J << K / B & K << M",
2837        "&J << K / B << M"
2838    };
2839    const static UChar testdata[][3] = {
2840        {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2841        {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2842        {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2843        {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2844        {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2845        {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2846    };
2847
2848    UErrorCode  status   = U_ZERO_ERROR;
2849    UCollator  *coll;
2850    UChar       rule[256] = {0};
2851    uint32_t    rlen     = 0;
2852    int         i;
2853
2854    for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2855        int j = 0;
2856        log_verbose("Rule %s for testing\n", testrules[i]);
2857        rlen = u_unescape(testrules[i], rule, 32);
2858        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2859        if (U_FAILURE(status)) {
2860            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2861            return;
2862        }
2863
2864        for (j = 0; j < 5; j ++) {
2865            doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2866        }
2867        ucol_close(coll);
2868    }
2869}
2870
2871#if 0
2872/* this test tests the current limitations of the engine */
2873/* it always fail, so it is disabled by default */
2874static void TestLimitations(void) {
2875  /* recursive expansions */
2876  {
2877    static const char *rule = "&a=b/c&d=c/e";
2878    static const char *tlimit01[] = {"add","b","adf"};
2879    static const char *tlimit02[] = {"aa","b","af"};
2880    log_verbose("recursive expansions\n");
2881    genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2882    genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2883  }
2884  /* contractions spanning expansions */
2885  {
2886    static const char *rule = "&a<<<c/e&g<<<eh";
2887    static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2888    static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2889    log_verbose("contractions spanning expansions\n");
2890    genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2891    genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2892  }
2893  /* normalization: nulls in contractions */
2894  {
2895    static const char *rule = "&a<<<\\u0000\\u0302";
2896    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2897    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2898    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2899    static const UColAttributeValue valOn[] = { UCOL_ON };
2900    static const UColAttributeValue valOff[] = { UCOL_OFF };
2901
2902    log_verbose("NULL in contractions\n");
2903    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2904    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2905    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2906    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2907
2908  }
2909  /* normalization: contractions spanning normalization */
2910  {
2911    static const char *rule = "&a<<<\\u0000\\u0302";
2912    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2913    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2914    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2915    static const UColAttributeValue valOn[] = { UCOL_ON };
2916    static const UColAttributeValue valOff[] = { UCOL_OFF };
2917
2918    log_verbose("contractions spanning normalization\n");
2919    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2920    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2921    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2922    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2923
2924  }
2925  /* variable top:  */
2926  {
2927    /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2928    static const char *rule = "&\\u2010<x<[variable top]=z";
2929    /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2930    static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2931    static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2932    static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2933    static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
2934    static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
2935    static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
2936
2937    log_verbose("variable top\n");
2938    genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2939    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2940    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2941    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2942    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2943
2944  }
2945  /* case level */
2946  {
2947    static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
2948    static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
2949    static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
2950    static const UColAttribute att[] = { UCOL_CASE_FIRST};
2951    static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
2952    /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2953    log_verbose("case level\n");
2954    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2955    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2956    /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2957    /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2958  }
2959
2960}
2961#endif
2962
2963static void TestBocsuCoverage(void) {
2964  UErrorCode status = U_ZERO_ERROR;
2965  const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2966  UChar       test[256] = {0};
2967  uint32_t    tlen     = u_unescape(testString, test, 32);
2968  uint8_t key[256]     = {0};
2969  uint32_t klen         = 0;
2970
2971  UCollator *coll = ucol_open("", &status);
2972  if(U_SUCCESS(status)) {
2973  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
2974
2975  klen = ucol_getSortKey(coll, test, tlen, key, 256);
2976  (void)klen;    /* Suppress set but not used warning. */
2977
2978  ucol_close(coll);
2979  } else {
2980    log_data_err("Couldn't open UCA\n");
2981  }
2982}
2983
2984static void TestVariableTopSetting(void) {
2985  UErrorCode status = U_ZERO_ERROR;
2986  const UChar *current = NULL;
2987  uint32_t varTopOriginal = 0, varTop1, varTop2;
2988  UCollator *coll = ucol_open("", &status);
2989  if(U_SUCCESS(status)) {
2990
2991  uint32_t strength = 0;
2992  uint16_t specs = 0;
2993  uint32_t chOffset = 0;
2994  uint32_t chLen = 0;
2995  uint32_t exOffset = 0;
2996  uint32_t exLen = 0;
2997  uint32_t oldChOffset = 0;
2998  uint32_t oldChLen = 0;
2999  uint32_t oldExOffset = 0;
3000  uint32_t oldExLen = 0;
3001  uint32_t prefixOffset = 0;
3002  uint32_t prefixLen = 0;
3003
3004  UBool startOfRules = TRUE;
3005  UColTokenParser src;
3006  UColOptionSet opts;
3007
3008  UChar *rulesCopy = NULL;
3009  uint32_t rulesLen;
3010
3011  UCollationResult result;
3012
3013  UChar first[256] = { 0 };
3014  UChar second[256] = { 0 };
3015  UParseError parseError;
3016  int32_t myQ = getTestOption(QUICK_OPTION);
3017
3018  (void)prefixLen;        /* Suppress set but not used warnings. */
3019  (void)prefixOffset;
3020  (void)specs;
3021
3022  uprv_memset(&src, 0, sizeof(UColTokenParser));
3023
3024  src.opts = &opts;
3025
3026  if(getTestOption(QUICK_OPTION) <= 0) {
3027    setTestOption(QUICK_OPTION, 1);
3028  }
3029
3030  /* this test will fail when normalization is turned on */
3031  /* therefore we always turn off exhaustive mode for it */
3032  { /* QUICK > 0*/
3033    log_verbose("Slide variable top over UCARules\n");
3034    rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
3035    rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
3036    rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
3037
3038    if(U_SUCCESS(status) && rulesLen > 0) {
3039      ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
3040      src.current = src.source = rulesCopy;
3041      src.end = rulesCopy+rulesLen;
3042      src.extraCurrent = src.end;
3043      src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
3044
3045	  /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
3046	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
3047      while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
3048        strength = src.parsedToken.strength;
3049        chOffset = src.parsedToken.charsOffset;
3050        chLen = src.parsedToken.charsLen;
3051        exOffset = src.parsedToken.extensionOffset;
3052        exLen = src.parsedToken.extensionLen;
3053        prefixOffset = src.parsedToken.prefixOffset;
3054        prefixLen = src.parsedToken.prefixLen;
3055        specs = src.parsedToken.flags;
3056
3057        startOfRules = FALSE;
3058        {
3059          log_verbose("%04X %d ", *(src.source+chOffset), chLen);
3060        }
3061        if(strength == UCOL_PRIMARY) {
3062          status = U_ZERO_ERROR;
3063          varTopOriginal = ucol_getVariableTop(coll, &status);
3064          varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
3065          if(U_FAILURE(status)) {
3066            char buffer[256];
3067            char *buf = buffer;
3068            uint32_t i = 0, j;
3069            uint32_t CE = UCOL_NO_MORE_CES;
3070
3071            /* before we start screaming, let's see if there is a problem with the rules */
3072            UErrorCode collIterateStatus = U_ZERO_ERROR;
3073            collIterate *s = uprv_new_collIterate(&collIterateStatus);
3074            uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus);
3075
3076            CE = ucol_getNextCE(coll, s, &status);
3077            (void)CE;    /* Suppress set but not used warning. */
3078
3079            for(i = 0; i < oldChLen; i++) {
3080              j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
3081              buf += j;
3082            }
3083            if(status == U_PRIMARY_TOO_LONG_ERROR) {
3084              log_verbose("= Expected failure for %s =", buffer);
3085            } else {
3086              if(uprv_collIterateAtEnd(s)) {
3087                log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
3088                  oldChOffset, u_errorName(status), buffer);
3089              } else {
3090                log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3091                  buffer);
3092              }
3093            }
3094            uprv_delete_collIterate(s);
3095          }
3096          varTop2 = ucol_getVariableTop(coll, &status);
3097          if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
3098            log_err("cannot retrieve set varTop value!\n");
3099            continue;
3100          }
3101
3102          if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
3103
3104            u_strncpy(first, src.source+oldChOffset, oldChLen);
3105            u_strncpy(first+oldChLen, src.source+chOffset, chLen);
3106            u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
3107            first[2*oldChLen+chLen] = 0;
3108
3109            if(oldExLen == 0) {
3110              u_strncpy(second, src.source+chOffset, chLen);
3111              second[chLen] = 0;
3112            } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
3113              u_strncpy(second, src.source+oldExOffset, oldExLen);
3114              u_strncpy(second+oldChLen, src.source+chOffset, chLen);
3115              u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen);
3116              second[2*oldExLen+chLen] = 0;
3117            }
3118            result = ucol_strcoll(coll, first, -1, second, -1);
3119            if(result == UCOL_EQUAL) {
3120              doTest(coll, first, second, UCOL_EQUAL);
3121            } else {
3122              log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset));
3123            }
3124          }
3125        }
3126        if(strength != UCOL_TOK_RESET) {
3127          oldChOffset = chOffset;
3128          oldChLen = chLen;
3129          oldExOffset = exOffset;
3130          oldExLen = exLen;
3131        }
3132      }
3133      status = U_ZERO_ERROR;
3134    }
3135    else {
3136      log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3137      return;
3138    }
3139    if (U_FAILURE(status)) {
3140        log_err("Error parsing rules %s\n", u_errorName(status));
3141        return;
3142    }
3143    status = U_ZERO_ERROR;
3144  }
3145
3146  setTestOption(QUICK_OPTION, myQ);
3147
3148  log_verbose("Testing setting variable top to contractions\n");
3149  {
3150    UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3151    int32_t maxUCAContractionLength = coll->image->contractionUCACombosWidth;
3152    while(*conts != 0) {
3153      /*
3154       * A continuation is NUL-terminated and NUL-padded
3155       * except if it has the maximum length.
3156       */
3157      int32_t contractionLength = maxUCAContractionLength;
3158      while(contractionLength > 0 && conts[contractionLength - 1] == 0) {
3159        --contractionLength;
3160      }
3161      if(*(conts+1)==0) { /* pre-context */
3162        varTop1 = ucol_setVariableTop(coll, conts, 1, &status);
3163      } else {
3164        varTop1 = ucol_setVariableTop(coll, conts, contractionLength, &status);
3165      }
3166      if(U_FAILURE(status)) {
3167        if(status == U_PRIMARY_TOO_LONG_ERROR) {
3168          /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
3169           * therefore it is not an error when it complains about them. */
3170          log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
3171                      *conts, *(conts+1), *(conts+2));
3172        } else {
3173          log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
3174                  *conts, *(conts+1), *(conts+2), u_errorName(status));
3175        }
3176        status = U_ZERO_ERROR;
3177      }
3178      conts+=maxUCAContractionLength;
3179    }
3180
3181    status = U_ZERO_ERROR;
3182
3183    first[0] = 0x0040;
3184    first[1] = 0x0050;
3185    first[2] = 0x0000;
3186
3187    ucol_setVariableTop(coll, first, -1, &status);
3188
3189    if(U_SUCCESS(status)) {
3190      log_err("Invalid contraction succeded in setting variable top!\n");
3191    }
3192
3193  }
3194
3195  log_verbose("Test restoring variable top\n");
3196
3197  status = U_ZERO_ERROR;
3198  ucol_restoreVariableTop(coll, varTopOriginal, &status);
3199  if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3200    log_err("Couldn't restore old variable top\n");
3201  }
3202
3203  log_verbose("Testing calling with error set\n");
3204
3205  status = U_INTERNAL_PROGRAM_ERROR;
3206  varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3207  varTop2 = ucol_getVariableTop(coll, &status);
3208  ucol_restoreVariableTop(coll, varTop2, &status);
3209  varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3210  varTop2 = ucol_getVariableTop(NULL, &status);
3211  ucol_restoreVariableTop(NULL, varTop2, &status);
3212  if(status != U_INTERNAL_PROGRAM_ERROR) {
3213    log_err("Bad reaction to passed error!\n");
3214  }
3215  uprv_free(src.source);
3216  ucol_close(coll);
3217  } else {
3218    log_data_err("Couldn't open UCA collator\n");
3219  }
3220
3221}
3222
3223static void TestNonChars(void) {
3224  static const char *test[] = {
3225      "\\u0000",  /* ignorable */
3226      "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
3227      "\\uFDD0", "\\uFDEF",
3228      "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
3229      "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
3230      "\\U0003FFFE", "\\U0003FFFF",
3231      "\\U0004FFFE", "\\U0004FFFF",
3232      "\\U0005FFFE", "\\U0005FFFF",
3233      "\\U0006FFFE", "\\U0006FFFF",
3234      "\\U0007FFFE", "\\U0007FFFF",
3235      "\\U0008FFFE", "\\U0008FFFF",
3236      "\\U0009FFFE", "\\U0009FFFF",
3237      "\\U000AFFFE", "\\U000AFFFF",
3238      "\\U000BFFFE", "\\U000BFFFF",
3239      "\\U000CFFFE", "\\U000CFFFF",
3240      "\\U000DFFFE", "\\U000DFFFF",
3241      "\\U000EFFFE", "\\U000EFFFF",
3242      "\\U000FFFFE", "\\U000FFFFF",
3243      "\\U0010FFFE", "\\U0010FFFF",
3244      "\\uFFFF"  /* special character with maximum primary weight */
3245  };
3246  UErrorCode status = U_ZERO_ERROR;
3247  UCollator *coll = ucol_open("en_US", &status);
3248
3249  log_verbose("Test non characters\n");
3250
3251  if(U_SUCCESS(status)) {
3252    genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
3253  } else {
3254    log_err_status(status, "Unable to open collator\n");
3255  }
3256
3257  ucol_close(coll);
3258}
3259
3260static void TestExtremeCompression(void) {
3261  static char *test[4];
3262  int32_t j = 0, i = 0;
3263
3264  for(i = 0; i<4; i++) {
3265    test[i] = (char *)malloc(2048*sizeof(char));
3266  }
3267
3268  for(j = 20; j < 500; j++) {
3269    for(i = 0; i<4; i++) {
3270      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3271      test[i][j-1] = (char)('a'+i);
3272      test[i][j] = 0;
3273    }
3274    genericLocaleStarter("en_US", (const char **)test, 4);
3275  }
3276
3277
3278  for(i = 0; i<4; i++) {
3279    free(test[i]);
3280  }
3281}
3282
3283#if 0
3284static void TestExtremeCompression(void) {
3285  static char *test[4];
3286  int32_t j = 0, i = 0;
3287  UErrorCode status = U_ZERO_ERROR;
3288  UCollator *coll = ucol_open("en_US", status);
3289  for(i = 0; i<4; i++) {
3290    test[i] = (char *)malloc(2048*sizeof(char));
3291  }
3292  for(j = 10; j < 2048; j++) {
3293    for(i = 0; i<4; i++) {
3294      uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3295      test[i][j-1] = (char)('a'+i);
3296      test[i][j] = 0;
3297    }
3298  }
3299  genericLocaleStarter("en_US", (const char **)test, 4);
3300
3301  for(j = 10; j < 2048; j++) {
3302    for(i = 0; i<1; i++) {
3303      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3304      test[i][j] = 0;
3305    }
3306  }
3307  for(i = 0; i<4; i++) {
3308    free(test[i]);
3309  }
3310}
3311#endif
3312
3313static void TestSurrogates(void) {
3314  static const char *test[] = {
3315    "z","\\ud900\\udc25",  "\\ud805\\udc50",
3316       "\\ud800\\udc00y",  "\\ud800\\udc00r",
3317       "\\ud800\\udc00f",  "\\ud800\\udc00",
3318       "\\ud800\\udc00c", "\\ud800\\udc00b",
3319       "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3320       "\\ud800\\udc00a",
3321       "c", "b"
3322  };
3323
3324  static const char *rule =
3325    "&z < \\ud900\\udc25   < \\ud805\\udc50"
3326       "< \\ud800\\udc00y  < \\ud800\\udc00r"
3327       "< \\ud800\\udc00f  << \\ud800\\udc00"
3328       "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3329       "< \\ud800\\udc00a  < c < b" ;
3330
3331  genericRulesStarter(rule, test, 14);
3332}
3333
3334/* This is a test for prefix implementation, used by JIS X 4061 collation rules */
3335static void TestPrefix(void) {
3336  uint32_t i;
3337
3338  static const struct {
3339    const char *rules;
3340    const char *data[50];
3341    const uint32_t len;
3342  } tests[] = {
3343    { "&z <<< z|a",
3344      {"zz", "za"}, 2 },
3345
3346    { "&z <<< z|   a",
3347      {"zz", "za"}, 2 },
3348    { "[strength I]"
3349      "&a=\\ud900\\udc25"
3350      "&z<<<\\ud900\\udc25|a",
3351      {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3352  };
3353
3354
3355  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3356    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3357  }
3358}
3359
3360/* This test uses data suplied by Masashiko Maedera to test the implementation */
3361/* JIS X 4061 collation order implementation                                   */
3362static void TestNewJapanese(void) {
3363
3364  static const char * const test1[] = {
3365      "\\u30b7\\u30e3\\u30fc\\u30ec",
3366      "\\u30b7\\u30e3\\u30a4",
3367      "\\u30b7\\u30e4\\u30a3",
3368      "\\u30b7\\u30e3\\u30ec",
3369      "\\u3061\\u3087\\u3053",
3370      "\\u3061\\u3088\\u3053",
3371      "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3372      "\\u3066\\u30fc\\u305f",
3373      "\\u30c6\\u30fc\\u30bf",
3374      "\\u30c6\\u30a7\\u30bf",
3375      "\\u3066\\u3048\\u305f",
3376      "\\u3067\\u30fc\\u305f",
3377      "\\u30c7\\u30fc\\u30bf",
3378      "\\u30c7\\u30a7\\u30bf",
3379      "\\u3067\\u3048\\u305f",
3380      "\\u3066\\u30fc\\u305f\\u30fc",
3381      "\\u30c6\\u30fc\\u30bf\\u30a1",
3382      "\\u30c6\\u30a7\\u30bf\\u30fc",
3383      "\\u3066\\u3047\\u305f\\u3041",
3384      "\\u3066\\u3048\\u305f\\u30fc",
3385      "\\u3067\\u30fc\\u305f\\u30fc",
3386      "\\u30c7\\u30fc\\u30bf\\u30a1",
3387      "\\u3067\\u30a7\\u305f\\u30a1",
3388      "\\u30c7\\u3047\\u30bf\\u3041",
3389      "\\u30c7\\u30a8\\u30bf\\u30a2",
3390      "\\u3072\\u3086",
3391      "\\u3073\\u3085\\u3042",
3392      "\\u3074\\u3085\\u3042",
3393      "\\u3073\\u3085\\u3042\\u30fc",
3394      "\\u30d3\\u30e5\\u30a2\\u30fc",
3395      "\\u3074\\u3085\\u3042\\u30fc",
3396      "\\u30d4\\u30e5\\u30a2\\u30fc",
3397      "\\u30d2\\u30e5\\u30a6",
3398      "\\u30d2\\u30e6\\u30a6",
3399      "\\u30d4\\u30e5\\u30a6\\u30a2",
3400      "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3401      "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3402      "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3403      "\\u3072\\u3085\\u3093",
3404      "\\u3074\\u3085\\u3093",
3405      "\\u3075\\u30fc\\u308a",
3406      "\\u30d5\\u30fc\\u30ea",
3407      "\\u3075\\u3045\\u308a",
3408      "\\u3075\\u30a5\\u308a",
3409      "\\u3075\\u30a5\\u30ea",
3410      "\\u30d5\\u30a6\\u30ea",
3411      "\\u3076\\u30fc\\u308a",
3412      "\\u30d6\\u30fc\\u30ea",
3413      "\\u3076\\u3045\\u308a",
3414      "\\u30d6\\u30a5\\u308a",
3415      "\\u3077\\u3046\\u308a",
3416      "\\u30d7\\u30a6\\u30ea",
3417      "\\u3075\\u30fc\\u308a\\u30fc",
3418      "\\u30d5\\u30a5\\u30ea\\u30fc",
3419      "\\u3075\\u30a5\\u308a\\u30a3",
3420      "\\u30d5\\u3045\\u308a\\u3043",
3421      "\\u30d5\\u30a6\\u30ea\\u30fc",
3422      "\\u3075\\u3046\\u308a\\u3043",
3423      "\\u30d6\\u30a6\\u30ea\\u30a4",
3424      "\\u3077\\u30fc\\u308a\\u30fc",
3425      "\\u3077\\u30a5\\u308a\\u30a4",
3426      "\\u3077\\u3046\\u308a\\u30fc",
3427      "\\u30d7\\u30a6\\u30ea\\u30a4",
3428      "\\u30d5\\u30fd",
3429      "\\u3075\\u309e",
3430      "\\u3076\\u309d",
3431      "\\u3076\\u3075",
3432      "\\u3076\\u30d5",
3433      "\\u30d6\\u3075",
3434      "\\u30d6\\u30d5",
3435      "\\u3076\\u309e",
3436      "\\u3076\\u3077",
3437      "\\u30d6\\u3077",
3438      "\\u3077\\u309d",
3439      "\\u30d7\\u30fd",
3440      "\\u3077\\u3075",
3441};
3442
3443  static const char *test2[] = {
3444    "\\u306f\\u309d", /* H\\u309d */
3445    "\\u30cf\\u30fd", /* K\\u30fd */
3446    "\\u306f\\u306f", /* HH */
3447    "\\u306f\\u30cf", /* HK */
3448    "\\u30cf\\u30cf", /* KK */
3449    "\\u306f\\u309e", /* H\\u309e */
3450    "\\u30cf\\u30fe", /* K\\u30fe */
3451    "\\u306f\\u3070", /* HH\\u309b */
3452    "\\u30cf\\u30d0", /* KK\\u309b */
3453    "\\u306f\\u3071", /* HH\\u309c */
3454    "\\u30cf\\u3071", /* KH\\u309c */
3455    "\\u30cf\\u30d1", /* KK\\u309c */
3456    "\\u3070\\u309d", /* H\\u309b\\u309d */
3457    "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3458    "\\u3070\\u306f", /* H\\u309bH */
3459    "\\u30d0\\u30cf", /* K\\u309bK */
3460    "\\u3070\\u309e", /* H\\u309b\\u309e */
3461    "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3462    "\\u3070\\u3070", /* H\\u309bH\\u309b */
3463    "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3464    "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3465    "\\u3070\\u3071", /* H\\u309bH\\u309c */
3466    "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3467    "\\u3071\\u309d", /* H\\u309c\\u309d */
3468    "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3469    "\\u3071\\u306f", /* H\\u309cH */
3470    "\\u30d1\\u30cf", /* K\\u309cK */
3471    "\\u3071\\u3070", /* H\\u309cH\\u309b */
3472    "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3473    "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3474    "\\u3071\\u3071", /* H\\u309cH\\u309c */
3475    "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3476  };
3477  /*
3478  static const char *test3[] = {
3479    "\\u221er\\u221e",
3480    "\\u221eR#",
3481    "\\u221et\\u221e",
3482    "#r\\u221e",
3483    "#R#",
3484    "#t%",
3485    "#T%",
3486    "8t\\u221e",
3487    "8T\\u221e",
3488    "8t#",
3489    "8T#",
3490    "8t%",
3491    "8T%",
3492    "8t8",
3493    "8T8",
3494    "\\u03c9r\\u221e",
3495    "\\u03a9R%",
3496    "rr\\u221e",
3497    "rR\\u221e",
3498    "Rr\\u221e",
3499    "RR\\u221e",
3500    "RT%",
3501    "rt8",
3502    "tr\\u221e",
3503    "tr8",
3504    "TR8",
3505    "tt8",
3506    "\\u30b7\\u30e3\\u30fc\\u30ec",
3507  };
3508  */
3509  static const UColAttribute att[] = { UCOL_STRENGTH };
3510  static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3511
3512  static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3513  static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3514
3515  genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3516  genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3517  /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3518  genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3519  genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3520}
3521
3522static void TestStrCollIdenticalPrefix(void) {
3523  const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3524  const char* test[] = {
3525    "ab\\ud9b0\\udc70",
3526    "ab\\ud9b0\\udc71"
3527  };
3528  genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3529}
3530/* Contractions should have all their canonically equivalent */
3531/* strings included */
3532static void TestContractionClosure(void) {
3533  static const struct {
3534    const char *rules;
3535    const char *data[10];
3536    const uint32_t len;
3537  } tests[] = {
3538    {   "&b=\\u00e4\\u00e4",
3539      { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3540    {   "&b=\\u00C5",
3541      { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3542  };
3543  uint32_t i;
3544
3545
3546  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3547    genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3548  }
3549}
3550
3551/* This tests also fails*/
3552static void TestBeforePrefixFailure(void) {
3553  static const struct {
3554    const char *rules;
3555    const char *data[10];
3556    const uint32_t len;
3557  } tests[] = {
3558    { "&g <<< a"
3559      "&[before 3]\\uff41 <<< x",
3560      {"x", "\\uff41"}, 2 },
3561    {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3562        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3563        "&[before 3]\\u30a7<<<\\u30a9",
3564      {"\\u30a9", "\\u30a7"}, 2 },
3565    {   "&[before 3]\\u30a7<<<\\u30a9"
3566        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3567        "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3568      {"\\u30a9", "\\u30a7"}, 2 },
3569  };
3570  uint32_t i;
3571
3572
3573  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3574    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3575  }
3576
3577#if 0
3578  const char* rule1 =
3579        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3580        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3581        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3582  const char* rule2 =
3583        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3584        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3585        "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3586  const char* test[] = {
3587      "\\u30c6\\u30fc\\u30bf",
3588      "\\u30c6\\u30a7\\u30bf",
3589  };
3590  genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3591  genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3592/* this piece of code should be in some sort of verbose mode     */
3593/* it gets the collation elements for elements and prints them   */
3594/* This is useful when trying to see whether the problem is      */
3595  {
3596    UErrorCode status = U_ZERO_ERROR;
3597    uint32_t i = 0;
3598    UCollationElements *it = NULL;
3599    uint32_t CE;
3600    UChar string[256];
3601    uint32_t uStringLen;
3602    UCollator *coll = NULL;
3603
3604    uStringLen = u_unescape(rule1, string, 256);
3605
3606    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3607
3608    /*coll = ucol_open("ja_JP_JIS", &status);*/
3609    it = ucol_openElements(coll, string, 0, &status);
3610
3611    for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3612      log_verbose("%s\n", test[i]);
3613      uStringLen = u_unescape(test[i], string, 256);
3614      ucol_setText(it, string, uStringLen, &status);
3615
3616      while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3617        log_verbose("%08X\n", CE);
3618      }
3619      log_verbose("\n");
3620
3621    }
3622
3623    ucol_closeElements(it);
3624    ucol_close(coll);
3625  }
3626#endif
3627}
3628
3629static void TestPrefixCompose(void) {
3630  const char* rule1 =
3631        "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3632  /*
3633  const char* test[] = {
3634      "\\u30c6\\u30fc\\u30bf",
3635      "\\u30c6\\u30a7\\u30bf",
3636  };
3637  */
3638  {
3639    UErrorCode status = U_ZERO_ERROR;
3640    /*uint32_t i = 0;*/
3641    /*UCollationElements *it = NULL;*/
3642/*    uint32_t CE;*/
3643    UChar string[256];
3644    uint32_t uStringLen;
3645    UCollator *coll = NULL;
3646
3647    uStringLen = u_unescape(rule1, string, 256);
3648
3649    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3650    ucol_close(coll);
3651  }
3652
3653
3654}
3655
3656/*
3657[last variable] last variable value
3658[last primary ignorable] largest CE for primary ignorable
3659[last secondary ignorable] largest CE for secondary ignorable
3660[last tertiary ignorable] largest CE for tertiary ignorable
3661[top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3662*/
3663
3664static void TestRuleOptions(void) {
3665  /* values here are hardcoded and are correct for the current UCA
3666   * when the UCA changes, one might be forced to change these
3667   * values.
3668   */
3669
3670  /*
3671   * These strings contain the last character before [variable top]
3672   * and the first and second characters (by primary weights) after it.
3673   * See FractionalUCA.txt. For example:
3674      [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
3675      [variable top = 0C FE]
3676      [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
3677     and
3678      00B4; [0D 0C, 05, 05]
3679   *
3680   * Note: Starting with UCA 6.0, the [variable top] collation element
3681   * is not the weight of any character or string,
3682   * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
3683   */
3684#define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
3685#define FIRST_REGULAR_CHAR_STRING "\\u0060"
3686#define SECOND_REGULAR_CHAR_STRING "\\u00B4"
3687
3688  /*
3689   * This string has to match the character that has the [last regular] weight
3690   * which changes with each UCA version.
3691   * See the bottom of FractionalUCA.txt which says something like
3692      [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
3693   *
3694   * Note: Starting with UCA 6.0, the [last regular] collation element
3695   * is not the weight of any character or string,
3696   * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
3697   */
3698#define LAST_REGULAR_CHAR_STRING "\\U0001342E"
3699
3700  static const struct {
3701    const char *rules;
3702    const char *data[10];
3703    const uint32_t len;
3704  } tests[] = {
3705    /* - all befores here amount to zero */
3706    { "&[before 3][first tertiary ignorable]<<<a",
3707        { "\\u0000", "a"}, 2
3708    }, /* you cannot go before first tertiary ignorable */
3709
3710    { "&[before 3][last tertiary ignorable]<<<a",
3711        { "\\u0000", "a"}, 2
3712    }, /* you cannot go before last tertiary ignorable */
3713
3714    { "&[before 3][first secondary ignorable]<<<a",
3715        { "\\u0000", "a"}, 2
3716    }, /* you cannot go before first secondary ignorable */
3717
3718    { "&[before 3][last secondary ignorable]<<<a",
3719        { "\\u0000", "a"}, 2
3720    }, /* you cannot go before first secondary ignorable */
3721
3722    /* 'normal' befores */
3723
3724    { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3725        {  "c", "b", "\\u0332", "a" }, 4
3726    },
3727
3728    /* we don't have a code point that corresponds to
3729     * the last primary ignorable
3730     */
3731    { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3732        {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3733    },
3734
3735    { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3736        {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
3737    },
3738
3739    { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3740        { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
3741    },
3742
3743    { "&[first regular]<a"
3744      "&[before 1][first regular]<b",
3745      { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
3746    },
3747
3748    { "&[before 1][last regular]<b"
3749      "&[last regular]<a",
3750        { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
3751    },
3752
3753    { "&[before 1][first implicit]<b"
3754      "&[first implicit]<a",
3755        { "b", "\\u4e00", "a", "\\u4e01"}, 4
3756    },
3757
3758    { "&[before 1][last implicit]<b"
3759      "&[last implicit]<a",
3760        { "b", "\\U0010FFFD", "a" }, 3
3761    },
3762
3763    { "&[last variable]<z"
3764      "&[last primary ignorable]<x"
3765      "&[last secondary ignorable]<<y"
3766      "&[last tertiary ignorable]<<<w"
3767      "&[top]<u",
3768      {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
3769    }
3770
3771  };
3772  uint32_t i;
3773
3774  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3775    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3776  }
3777}
3778
3779
3780static void TestOptimize(void) {
3781  /* this is not really a test - just trying out
3782   * whether copying of UCA contents will fail
3783   * Cannot really test, since the functionality
3784   * remains the same.
3785   */
3786  static const struct {
3787    const char *rules;
3788    const char *data[10];
3789    const uint32_t len;
3790  } tests[] = {
3791    /* - all befores here amount to zero */
3792    { "[optimize [\\uAC00-\\uD7FF]]",
3793    { "a", "b"}, 2}
3794  };
3795  uint32_t i;
3796
3797  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3798    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3799  }
3800}
3801
3802/*
3803cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3804weiv    ucol_strcollIter?
3805cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3806weiv    these are the input strings?
3807cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3808weiv    will check - could be a problem with utf-8 iterator
3809cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3810weiv    hmmm
3811cycheng@ca.ibm.c... note that we have a standalone high surrogate
3812weiv    that doesn't sound right
3813cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3814weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
3815cycheng@ca.ibm.c... yes
3816weiv    and then do the comparison
3817cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3818weiv    utf-16 strings look like a little endian ones in the example you sent me
3819weiv    It could be a bug - let me try to test it out
3820cycheng@ca.ibm.c... ok
3821cycheng@ca.ibm.c... we can wait till the conf. call
3822cycheng@ca.ibm.c... next weke
3823weiv    that would be great
3824weiv    hmmm
3825weiv    I might be wrong
3826weiv    let me play with it some more
3827cycheng@ca.ibm.c... ok
3828cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
3829cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3830cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3831weiv    ok
3832cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3833weiv    thanks
3834cycheng@ca.ibm.c... the 4 strings we sent are just samples
3835*/
3836#if 0
3837static void Alexis(void) {
3838  UErrorCode status = U_ZERO_ERROR;
3839  UCollator *coll = ucol_open("", &status);
3840
3841
3842  const char utf16be[2][4] = {
3843    { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3844    { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3845  };
3846
3847  const char utf8[2][4] = {
3848    { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3849    { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3850  };
3851
3852  UCharIterator iterU161, iterU162;
3853  UCharIterator iterU81, iterU82;
3854
3855  UCollationResult resU16, resU8;
3856
3857  uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3858  uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3859
3860  uiter_setUTF8(&iterU81, utf8[0], 4);
3861  uiter_setUTF8(&iterU82, utf8[1], 4);
3862
3863  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3864
3865  resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3866  resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3867
3868
3869  if(resU16 != resU8) {
3870    log_err("different results\n");
3871  }
3872
3873  ucol_close(coll);
3874}
3875#endif
3876
3877#define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
3878static void Alexis2(void) {
3879  UErrorCode status = U_ZERO_ERROR;
3880  UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3881  char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3882  char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3883  int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3884
3885  UConverter *conv = NULL;
3886
3887  UCharIterator U16BEItS, U16BEItT;
3888  UCharIterator U8ItS, U8ItT;
3889
3890  UCollationResult resU16, resU16BE, resU8;
3891
3892  static const char* const pairs[][2] = {
3893    { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3894    { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3895    { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3896    { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3897    { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3898    { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3899    { "\\u0020", "\\u0020\\u0000"}
3900/*
39015F20 (my result here)
39025F204E008E3F
39035F20 (your result here)
3904*/
3905  };
3906
3907  int32_t i = 0;
3908
3909  UCollator *coll = ucol_open("", &status);
3910  if(status == U_FILE_ACCESS_ERROR) {
3911    log_data_err("Is your data around?\n");
3912    return;
3913  } else if(U_FAILURE(status)) {
3914    log_err("Error opening collator\n");
3915    return;
3916  }
3917  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3918  conv = ucnv_open("UTF16BE", &status);
3919  for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3920    U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3921    U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3922
3923    resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3924
3925    log_verbose("Result of strcoll is %i\n", resU16);
3926
3927    U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3928    U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3929    (void)U16BELenS;    /* Suppress set but not used warnings. */
3930    (void)U16BELenT;
3931
3932    /* use the original sizes, as the result from converter is in bytes */
3933    uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3934    uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3935
3936    resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3937
3938    log_verbose("Result of U16BE is %i\n", resU16BE);
3939
3940    if(resU16 != resU16BE) {
3941      log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3942    }
3943
3944    u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3945    u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3946
3947    uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3948    uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3949
3950    resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3951
3952    if(resU16 != resU8) {
3953      log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3954    }
3955
3956  }
3957
3958  ucol_close(coll);
3959  ucnv_close(conv);
3960}
3961
3962static void TestHebrewUCA(void) {
3963  UErrorCode status = U_ZERO_ERROR;
3964  static const char *first[] = {
3965    "d790d6b8d79cd795d6bcd7a9",
3966    "d790d79cd79ed7a7d799d799d7a1",
3967    "d790d6b4d79ed795d6bcd7a9",
3968  };
3969
3970  char utf8String[3][256];
3971  UChar utf16String[3][256];
3972
3973  int32_t i = 0, j = 0;
3974  int32_t sizeUTF8[3];
3975  int32_t sizeUTF16[3];
3976
3977  UCollator *coll = ucol_open("", &status);
3978  if (U_FAILURE(status)) {
3979      log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
3980      return;
3981  }
3982  /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3983
3984  for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3985    sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3986    u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3987    log_verbose("%i: ");
3988    for(j = 0; j < sizeUTF16[i]; j++) {
3989      /*log_verbose("\\u%04X", utf16String[i][j]);*/
3990      log_verbose("%04X", utf16String[i][j]);
3991    }
3992    log_verbose("\n");
3993  }
3994  for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3995    for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
3996      doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
3997    }
3998  }
3999
4000  ucol_close(coll);
4001
4002}
4003
4004static void TestPartialSortKeyTermination(void) {
4005  static const char* cases[] = {
4006    "\\u1234\\u1234\\udc00",
4007    "\\udc00\\ud800\\ud800"
4008  };
4009
4010  int32_t i = sizeof(UCollator);
4011
4012  UErrorCode status = U_ZERO_ERROR;
4013
4014  UCollator *coll = ucol_open("", &status);
4015
4016  UCharIterator iter;
4017
4018  UChar currCase[256];
4019  int32_t length = 0;
4020  int32_t pKeyLen = 0;
4021
4022  uint8_t key[256];
4023
4024  for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
4025    uint32_t state[2] = {0, 0};
4026    length = u_unescape(cases[i], currCase, 256);
4027    uiter_setString(&iter, currCase, length);
4028    pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
4029    (void)pKeyLen;   /* Suppress set but not used warning. */
4030
4031    log_verbose("Done\n");
4032
4033  }
4034  ucol_close(coll);
4035}
4036
4037static void TestSettings(void) {
4038  static const char* cases[] = {
4039    "apple",
4040      "Apple"
4041  };
4042
4043  static const char* locales[] = {
4044    "",
4045      "en"
4046  };
4047
4048  UErrorCode status = U_ZERO_ERROR;
4049
4050  int32_t i = 0, j = 0;
4051
4052  UChar source[256], target[256];
4053  int32_t sLen = 0, tLen = 0;
4054
4055  UCollator *collateObject = NULL;
4056  for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
4057    collateObject = ucol_open(locales[i], &status);
4058    ucol_setStrength(collateObject, UCOL_PRIMARY);
4059    ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
4060    for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
4061      sLen = u_unescape(cases[j-1], source, 256);
4062      source[sLen] = 0;
4063      tLen = u_unescape(cases[j], target, 256);
4064      source[tLen] = 0;
4065      doTest(collateObject, source, target, UCOL_EQUAL);
4066    }
4067    ucol_close(collateObject);
4068  }
4069}
4070
4071static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
4072    UErrorCode status = U_ZERO_ERROR;
4073    int32_t errorNo = 0;
4074    const UChar *sourceRules = NULL;
4075    int32_t sourceRulesLen = 0;
4076    UParseError parseError;
4077    UColAttributeValue french = UCOL_OFF;
4078
4079    if(!ucol_equals(source, target)) {
4080        log_err("Same collators, different address not equal\n");
4081        errorNo++;
4082    }
4083    ucol_close(target);
4084    if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
4085        target = ucol_safeClone(source, NULL, NULL, &status);
4086        if(U_FAILURE(status)) {
4087            log_err("Error creating clone\n");
4088            errorNo++;
4089            return errorNo;
4090        }
4091        if(!ucol_equals(source, target)) {
4092            log_err("Collator different from it's clone\n");
4093            errorNo++;
4094        }
4095        french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
4096        if(french == UCOL_ON) {
4097            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
4098        } else {
4099            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
4100        }
4101        if(U_FAILURE(status)) {
4102            log_err("Error setting attributes\n");
4103            errorNo++;
4104            return errorNo;
4105        }
4106        if(ucol_equals(source, target)) {
4107            log_err("Collators same even when options changed\n");
4108            errorNo++;
4109        }
4110        ucol_close(target);
4111
4112        sourceRules = ucol_getRules(source, &sourceRulesLen);
4113        target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4114        if(U_FAILURE(status)) {
4115            log_err("Error instantiating target from rules - %s\n", u_errorName(status));
4116            errorNo++;
4117            return errorNo;
4118        }
4119        if(!ucol_equals(source, target)) {
4120            log_err("Collator different from collator that was created from the same rules\n");
4121            errorNo++;
4122        }
4123        ucol_close(target);
4124    }
4125    return errorNo;
4126}
4127
4128
4129static void TestEquals(void) {
4130    /* ucol_equals is not currently a public API. There is a chance that it will become
4131    * something like this, but currently it is only used by RuleBasedCollator::operator==
4132    */
4133    /* test whether the two collators instantiated from the same locale are equal */
4134    UErrorCode status = U_ZERO_ERROR;
4135    UParseError parseError;
4136    int32_t noOfLoc = uloc_countAvailable();
4137    const char *locName = NULL;
4138    UCollator *source = NULL, *target = NULL;
4139    int32_t i = 0;
4140
4141    const char* rules[] = {
4142        "&l < lj <<< Lj <<< LJ",
4143        "&n < nj <<< Nj <<< NJ",
4144        "&ae <<< \\u00e4",
4145        "&AE <<< \\u00c4"
4146    };
4147    /*
4148    const char* badRules[] = {
4149    "&l <<< Lj",
4150    "&n < nj <<< nJ <<< NJ",
4151    "&a <<< \\u00e4",
4152    "&AE <<< \\u00c4 <<< x"
4153    };
4154    */
4155
4156    UChar sourceRules[1024], targetRules[1024];
4157    int32_t sourceRulesSize = 0, targetRulesSize = 0;
4158    int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
4159
4160    for(i = 0; i < rulesSize; i++) {
4161        sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
4162        targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
4163    }
4164
4165    source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4166    if(status == U_FILE_ACCESS_ERROR) {
4167        log_data_err("Is your data around?\n");
4168        return;
4169    } else if(U_FAILURE(status)) {
4170        log_err("Error opening collator\n");
4171        return;
4172    }
4173    target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4174    if(!ucol_equals(source, target)) {
4175        log_err("Equivalent collators not equal!\n");
4176    }
4177    ucol_close(source);
4178    ucol_close(target);
4179
4180    source = ucol_open("root", &status);
4181    target = ucol_open("root", &status);
4182    log_verbose("Testing root\n");
4183    if(!ucol_equals(source, source)) {
4184        log_err("Same collator not equal\n");
4185    }
4186    if(TestEqualsForCollator(locName, source, target)) {
4187        log_err("Errors for root\n", locName);
4188    }
4189    ucol_close(source);
4190
4191    for(i = 0; i<noOfLoc; i++) {
4192        status = U_ZERO_ERROR;
4193        locName = uloc_getAvailable(i);
4194        /*if(hasCollationElements(locName)) {*/
4195        log_verbose("Testing equality for locale %s\n", locName);
4196        source = ucol_open(locName, &status);
4197        target = ucol_open(locName, &status);
4198        if (U_FAILURE(status)) {
4199            log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
4200            continue;
4201        }
4202        if(TestEqualsForCollator(locName, source, target)) {
4203            log_err("Errors for locale %s\n", locName);
4204        }
4205        ucol_close(source);
4206        /*}*/
4207    }
4208}
4209
4210static void TestJ2726(void) {
4211    UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4212    UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4213    UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4214    UErrorCode status = U_ZERO_ERROR;
4215    UCollator *coll = ucol_open("en", &status);
4216    ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4217    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4218    doTest(coll, a, aSpace, UCOL_EQUAL);
4219    doTest(coll, aSpace, a, UCOL_EQUAL);
4220    doTest(coll, a, spaceA, UCOL_EQUAL);
4221    doTest(coll, spaceA, a, UCOL_EQUAL);
4222    doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4223    doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4224    ucol_close(coll);
4225}
4226
4227static void NullRule(void) {
4228    UChar r[3] = {0};
4229    UErrorCode status = U_ZERO_ERROR;
4230    UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4231    if(U_SUCCESS(status)) {
4232        log_err("This should have been an error!\n");
4233        ucol_close(coll);
4234    } else {
4235        status = U_ZERO_ERROR;
4236    }
4237    coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4238    if(U_FAILURE(status)) {
4239        log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
4240    } else {
4241        ucol_close(coll);
4242    }
4243}
4244
4245/**
4246 * Test for CollationElementIterator previous and next for the whole set of
4247 * unicode characters with normalization on.
4248 */
4249static void TestNumericCollation(void)
4250{
4251    UErrorCode status = U_ZERO_ERROR;
4252
4253    const static char *basicTestStrings[]={
4254    "hello1",
4255    "hello2",
4256    "hello2002",
4257    "hello2003",
4258    "hello123456",
4259    "hello1234567",
4260    "hello10000000",
4261    "hello100000000",
4262    "hello1000000000",
4263    "hello10000000000",
4264    };
4265
4266    const static char *preZeroTestStrings[]={
4267    "avery10000",
4268    "avery010000",
4269    "avery0010000",
4270    "avery00010000",
4271    "avery000010000",
4272    "avery0000010000",
4273    "avery00000010000",
4274    "avery000000010000",
4275    };
4276
4277    const static char *thirtyTwoBitNumericStrings[]={
4278    "avery42949672960",
4279    "avery42949672961",
4280    "avery42949672962",
4281    "avery429496729610"
4282    };
4283
4284     const static char *longNumericStrings[]={
4285     /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
4286        In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
4287        are treated as multiple collation elements. */
4288    "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
4289    "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
4290    "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
4291    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
4292    "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
4293    "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
4294    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
4295    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
4296    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
4297    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
4298    };
4299
4300    const static char *supplementaryDigits[] = {
4301      "\\uD835\\uDFCE", /* 0 */
4302      "\\uD835\\uDFCF", /* 1 */
4303      "\\uD835\\uDFD0", /* 2 */
4304      "\\uD835\\uDFD1", /* 3 */
4305      "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4306      "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4307      "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4308      "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4309      "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4310      "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4311    };
4312
4313    const static char *foreignDigits[] = {
4314      "\\u0661",
4315        "\\u0662",
4316        "\\u0663",
4317      "\\u0661\\u0660",
4318      "\\u0661\\u0662",
4319      "\\u0661\\u0663",
4320      "\\u0662\\u0660",
4321      "\\u0662\\u0662",
4322      "\\u0662\\u0663",
4323      "\\u0663\\u0660",
4324      "\\u0663\\u0662",
4325      "\\u0663\\u0663"
4326    };
4327
4328    const static char *evenZeroes[] = {
4329      "2000",
4330      "2001",
4331        "2002",
4332        "2003"
4333    };
4334
4335    UColAttribute att = UCOL_NUMERIC_COLLATION;
4336    UColAttributeValue val = UCOL_ON;
4337
4338    /* Open our collator. */
4339    UCollator* coll = ucol_open("root", &status);
4340    if (U_FAILURE(status)){
4341        log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
4342              myErrorName(status));
4343        return;
4344    }
4345    genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
4346    genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
4347    genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
4348    genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
4349    genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
4350    genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
4351
4352    /* Setting up our collator to do digits. */
4353    ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
4354    if (U_FAILURE(status)){
4355        log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4356              myErrorName(status));
4357        return;
4358    }
4359
4360    /*
4361       Testing that prepended zeroes still yield the correct collation behavior.
4362       We expect that every element in our strings array will be equal.
4363    */
4364    genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
4365
4366    ucol_close(coll);
4367}
4368
4369static void TestTibetanConformance(void)
4370{
4371    const char* test[] = {
4372        "\\u0FB2\\u0591\\u0F71\\u0061",
4373        "\\u0FB2\\u0F71\\u0061"
4374    };
4375
4376    UErrorCode status = U_ZERO_ERROR;
4377    UCollator *coll = ucol_open("", &status);
4378    UChar source[100];
4379    UChar target[100];
4380    int result;
4381    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4382    if (U_SUCCESS(status)) {
4383        u_unescape(test[0], source, 100);
4384        u_unescape(test[1], target, 100);
4385        doTest(coll, source, target, UCOL_EQUAL);
4386        result = ucol_strcoll(coll, source, -1,   target, -1);
4387        log_verbose("result %d\n", result);
4388        if (UCOL_EQUAL != result) {
4389            log_err("Tibetan comparison error\n");
4390        }
4391    }
4392    ucol_close(coll);
4393
4394    genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4395}
4396
4397static void TestPinyinProblem(void) {
4398    static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4399    genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4400}
4401
4402#define TST_UCOL_MAX_INPUT 0x220001
4403#define topByte 0xFF000000;
4404#define bottomByte 0xFF;
4405#define fourBytes 0xFFFFFFFF;
4406
4407
4408static void showImplicit(UChar32 i) {
4409    if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4410        log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4411    }
4412}
4413
4414static void TestImplicitGeneration(void) {
4415    UErrorCode status = U_ZERO_ERROR;
4416    UChar32 last = 0;
4417    UChar32 current;
4418    UChar32 i = 0, j = 0;
4419    UChar32 roundtrip = 0;
4420    UChar32 lastBottom = 0;
4421    UChar32 currentBottom = 0;
4422    UChar32 lastTop = 0;
4423    UChar32 currentTop = 0;
4424
4425    UCollator *coll = ucol_open("root", &status);
4426    if(U_FAILURE(status)) {
4427        log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4428        return;
4429    }
4430
4431    uprv_uca_getRawFromImplicit(0xE20303E7);
4432
4433    for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4434        current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4435
4436        /* check that it round-trips AND that all intervening ones are illegal*/
4437        roundtrip = uprv_uca_getRawFromImplicit(current);
4438        if (roundtrip != i) {
4439            log_err("No roundtrip %08X\n", i);
4440        }
4441        if (last != 0) {
4442            for (j = last + 1; j < current; ++j) {
4443                roundtrip = uprv_uca_getRawFromImplicit(j);
4444                /* raise an error if it *doesn't* find an error*/
4445                if (roundtrip != -1) {
4446                    log_err("Fails to recognize illegal %08X\n", j);
4447                }
4448            }
4449        }
4450        /* now do other consistency checks*/
4451        lastBottom = last & bottomByte;
4452        currentBottom = current & bottomByte;
4453        lastTop = last & topByte;
4454        currentTop = current & topByte;
4455        (void)lastBottom;     /* Suppress set but not used warnings. */
4456        (void)currentBottom;
4457
4458        /* print out some values for spot-checking*/
4459        if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4460            showImplicit(i-3);
4461            showImplicit(i-2);
4462            showImplicit(i-1);
4463            showImplicit(i);
4464            showImplicit(i+1);
4465            showImplicit(i+2);
4466        }
4467        last = current;
4468
4469        if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4470            log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4471        }
4472    }
4473    showImplicit(TST_UCOL_MAX_INPUT-2);
4474    showImplicit(TST_UCOL_MAX_INPUT-1);
4475    showImplicit(TST_UCOL_MAX_INPUT);
4476    ucol_close(coll);
4477}
4478
4479/**
4480 * Iterate through the given iterator, checking to see that all the strings
4481 * in the expected array are present.
4482 * @param expected array of strings we expect to see, or NULL
4483 * @param expectedCount number of elements of expected, or 0
4484 */
4485static int32_t checkUEnumeration(const char* msg,
4486                                 UEnumeration* iter,
4487                                 const char** expected,
4488                                 int32_t expectedCount) {
4489    UErrorCode ec = U_ZERO_ERROR;
4490    int32_t i = 0, n, j, bit;
4491    int32_t seenMask = 0;
4492
4493    U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
4494    n = uenum_count(iter, &ec);
4495    if (!assertSuccess("count", &ec)) return -1;
4496    log_verbose("%s = [", msg);
4497    for (;; ++i) {
4498        const char* s = uenum_next(iter, NULL, &ec);
4499        if (!assertSuccess("snext", &ec) || s == NULL) break;
4500        if (i != 0) log_verbose(",");
4501        log_verbose("%s", s);
4502        /* check expected list */
4503        for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4504            if ((seenMask&bit) == 0 &&
4505                uprv_strcmp(s, expected[j]) == 0) {
4506                seenMask |= bit;
4507                break;
4508            }
4509        }
4510    }
4511    log_verbose("] (%d)\n", i);
4512    assertTrue("count verified", i==n);
4513    /* did we see all expected strings? */
4514    for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4515        if ((seenMask&bit)!=0) {
4516            log_verbose("Ok: \"%s\" seen\n", expected[j]);
4517        } else {
4518            log_err("FAIL: \"%s\" not seen\n", expected[j]);
4519        }
4520    }
4521    return n;
4522}
4523
4524/**
4525 * Test new API added for separate collation tree.
4526 */
4527static void TestSeparateTrees(void) {
4528    UErrorCode ec = U_ZERO_ERROR;
4529    UEnumeration *e = NULL;
4530    int32_t n = -1;
4531    UBool isAvailable;
4532    char loc[256];
4533
4534    static const char* AVAIL[] = { "en", "de" };
4535
4536    static const char* KW[] = { "collation" };
4537
4538    static const char* KWVAL[] = { "phonebook", "stroke" };
4539
4540#if !UCONFIG_NO_SERVICE
4541    e = ucol_openAvailableLocales(&ec);
4542    if (e != NULL) {
4543        assertSuccess("ucol_openAvailableLocales", &ec);
4544        assertTrue("ucol_openAvailableLocales!=0", e!=0);
4545        n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
4546        (void)n;    /* Suppress set but not used warnings. */
4547        /* Don't need to check n because we check list */
4548        uenum_close(e);
4549    } else {
4550        log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
4551    }
4552#endif
4553
4554    e = ucol_getKeywords(&ec);
4555    if (e != NULL) {
4556        assertSuccess("ucol_getKeywords", &ec);
4557        assertTrue("ucol_getKeywords!=0", e!=0);
4558        n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
4559        /* Don't need to check n because we check list */
4560        uenum_close(e);
4561    } else {
4562        log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
4563    }
4564
4565    e = ucol_getKeywordValues(KW[0], &ec);
4566    if (e != NULL) {
4567        assertSuccess("ucol_getKeywordValues", &ec);
4568        assertTrue("ucol_getKeywordValues!=0", e!=0);
4569        n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
4570        /* Don't need to check n because we check list */
4571        uenum_close(e);
4572    } else {
4573        log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
4574    }
4575
4576    /* Try setting a warning before calling ucol_getKeywordValues */
4577    ec = U_USING_FALLBACK_WARNING;
4578    e = ucol_getKeywordValues(KW[0], &ec);
4579    if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
4580        assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
4581        n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
4582        /* Don't need to check n because we check list */
4583        uenum_close(e);
4584    }
4585
4586    /*
4587U_DRAFT int32_t U_EXPORT2
4588ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4589                             const char* locale, UBool* isAvailable,
4590                             UErrorCode* status);
4591}
4592*/
4593    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
4594                                     &isAvailable, &ec);
4595    if (assertSuccess("getFunctionalEquivalent", &ec)) {
4596        assertEquals("getFunctionalEquivalent(de)", "root", loc);
4597        assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
4598                   isAvailable == TRUE);
4599    }
4600
4601    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
4602                                     &isAvailable, &ec);
4603    if (assertSuccess("getFunctionalEquivalent", &ec)) {
4604        assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);
4605        assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
4606                   isAvailable == TRUE);
4607    }
4608}
4609
4610/* supercedes TestJ784 */
4611static void TestBeforePinyin(void) {
4612    const static char rules[] = {
4613        "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4614        "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4615        "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4616        "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4617        "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4618        "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4619    };
4620
4621    const static char *test[] = {
4622        "l\\u0101",
4623        "la",
4624        "l\\u0101n",
4625        "lan ",
4626        "l\\u0113",
4627        "le",
4628        "l\\u0113n",
4629        "len"
4630    };
4631
4632    const static char *test2[] = {
4633        "x\\u0101",
4634        "x\\u0100",
4635        "X\\u0101",
4636        "X\\u0100",
4637        "x\\u00E1",
4638        "x\\u00C1",
4639        "X\\u00E1",
4640        "X\\u00C1",
4641        "x\\u01CE",
4642        "x\\u01CD",
4643        "X\\u01CE",
4644        "X\\u01CD",
4645        "x\\u00E0",
4646        "x\\u00C0",
4647        "X\\u00E0",
4648        "X\\u00C0",
4649        "xa",
4650        "xA",
4651        "Xa",
4652        "XA",
4653        "x\\u0101x",
4654        "x\\u0100x",
4655        "x\\u00E1x",
4656        "x\\u00C1x",
4657        "x\\u01CEx",
4658        "x\\u01CDx",
4659        "x\\u00E0x",
4660        "x\\u00C0x",
4661        "xax",
4662        "xAx"
4663    };
4664
4665    genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4666    genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
4667    genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
4668    genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
4669}
4670
4671static void TestBeforeTightening(void) {
4672    static const struct {
4673        const char *rules;
4674        UErrorCode expectedStatus;
4675    } tests[] = {
4676        { "&[before 1]a<x", U_ZERO_ERROR },
4677        { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
4678        { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
4679        { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
4680        { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
4681        { "&[before 2]a<<x",U_ZERO_ERROR },
4682        { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
4683        { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
4684        { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
4685        { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
4686        { "&[before 3]a<<<x",U_ZERO_ERROR },
4687        { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
4688        { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
4689    };
4690
4691    int32_t i = 0;
4692
4693    UErrorCode status = U_ZERO_ERROR;
4694    UChar rlz[RULE_BUFFER_LEN] = { 0 };
4695    uint32_t rlen = 0;
4696
4697    UCollator *coll = NULL;
4698
4699
4700    for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4701        rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
4702        coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4703        if(status != tests[i].expectedStatus) {
4704            log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
4705                tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
4706        }
4707        ucol_close(coll);
4708        status = U_ZERO_ERROR;
4709    }
4710
4711}
4712
4713/*
4714&m < a
4715&[before 1] a < x <<< X << q <<< Q < z
4716assert: m <<< M < x <<< X << q <<< Q < z < a < n
4717
4718&m < a
4719&[before 2] a << x <<< X << q <<< Q < z
4720assert: m <<< M < x <<< X << q <<< Q << a < z < n
4721
4722&m < a
4723&[before 3] a <<< x <<< X << q <<< Q < z
4724assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4725
4726
4727&m << a
4728&[before 1] a < x <<< X << q <<< Q < z
4729assert: x <<< X << q <<< Q < z < m <<< M << a < n
4730
4731&m << a
4732&[before 2] a << x <<< X << q <<< Q < z
4733assert: m <<< M << x <<< X << q <<< Q << a < z < n
4734
4735&m << a
4736&[before 3] a <<< x <<< X << q <<< Q < z
4737assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4738
4739
4740&m <<< a
4741&[before 1] a < x <<< X << q <<< Q < z
4742assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4743
4744&m <<< a
4745&[before 2] a << x <<< X << q <<< Q < z
4746assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
4747
4748&m <<< a
4749&[before 3] a <<< x <<< X << q <<< Q < z
4750assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
4751
4752
4753&[before 1] s < x <<< X << q <<< Q < z
4754assert: r <<< R < x <<< X << q <<< Q < z < s < n
4755
4756&[before 2] s << x <<< X << q <<< Q < z
4757assert: r <<< R < x <<< X << q <<< Q << s < z < n
4758
4759&[before 3] s <<< x <<< X << q <<< Q < z
4760assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4761
4762
4763&[before 1] \u24DC < x <<< X << q <<< Q < z
4764assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4765
4766&[before 2] \u24DC << x <<< X << q <<< Q < z
4767assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4768
4769&[before 3] \u24DC <<< x <<< X << q <<< Q < z
4770assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
4771*/
4772
4773
4774#if 0
4775/* requires features not yet supported */
4776static void TestMoreBefore(void) {
4777    static const struct {
4778        const char* rules;
4779        const char* order[16];
4780        int32_t size;
4781    } tests[] = {
4782        { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4783        { "m","M","x","X","q","Q","z","a","n" }, 9},
4784        { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4785        { "m","M","x","X","q","Q","a","z","n" }, 9},
4786        { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4787        { "m","M","x","X","a","q","Q","z","n" }, 9},
4788        { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4789        { "x","X","q","Q","z","m","M","a","n" }, 9},
4790        { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4791        { "m","M","x","X","q","Q","a","z","n" }, 9},
4792        { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4793        { "m","M","x","X","a","q","Q","z","n" }, 9},
4794        { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4795        { "x","X","q","Q","z","n","m","a","M" }, 9},
4796        { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4797        { "x","X","q","Q","m","a","M","z","n" }, 9},
4798        { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4799        { "m","x","X","a","M","q","Q","z","n" }, 9},
4800        { "&[before 1] s < x <<< X << q <<< Q < z",
4801        { "r","R","x","X","q","Q","z","s","n" }, 9},
4802        { "&[before 2] s << x <<< X << q <<< Q < z",
4803        { "r","R","x","X","q","Q","s","z","n" }, 9},
4804        { "&[before 3] s <<< x <<< X << q <<< Q < z",
4805        { "r","R","x","X","s","q","Q","z","n" }, 9},
4806        { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4807        { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4808        { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4809        { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4810        { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4811        { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4812    };
4813
4814    int32_t i = 0;
4815
4816    for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4817        genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
4818    }
4819}
4820#endif
4821
4822static void TestTailorNULL( void ) {
4823    const static char* rule = "&a <<< '\\u0000'";
4824    UErrorCode status = U_ZERO_ERROR;
4825    UChar rlz[RULE_BUFFER_LEN] = { 0 };
4826    uint32_t rlen = 0;
4827    UChar a = 1, null = 0;
4828    UCollationResult res = UCOL_EQUAL;
4829
4830    UCollator *coll = NULL;
4831
4832
4833    rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
4834    coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4835
4836    if(U_FAILURE(status)) {
4837        log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
4838    } else {
4839        res = ucol_strcoll(coll, &a, 1, &null, 1);
4840
4841        if(res != UCOL_LESS) {
4842            log_err("NULL was not tailored properly!\n");
4843        }
4844    }
4845
4846    ucol_close(coll);
4847}
4848
4849static void
4850TestUpperFirstQuaternary(void)
4851{
4852  const char* tests[] = { "B", "b", "Bb", "bB" };
4853  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
4854  UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
4855  genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4856}
4857
4858static void
4859TestJ4960(void)
4860{
4861  const char* tests[] = { "\\u00e2T", "aT" };
4862  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
4863  UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
4864  const char* tests2[] = { "a", "A" };
4865  const char* rule = "&[first tertiary ignorable]=A=a";
4866  UColAttribute att2[] = { UCOL_CASE_LEVEL };
4867  UColAttributeValue attVals2[] = { UCOL_ON };
4868  /* Test whether we correctly ignore primary ignorables on case level when */
4869  /* we have only primary & case level */
4870  genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
4871  /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4872  /* and case level */
4873  genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4874  /* Test whether completely ignorable letters have case level info (they shouldn't) */
4875  genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
4876}
4877
4878static void
4879TestJ5223(void)
4880{
4881  static const char *test = "this is a test string";
4882  UChar ustr[256];
4883  int32_t ustr_length = u_unescape(test, ustr, 256);
4884  unsigned char sortkey[256];
4885  int32_t sortkey_length;
4886  UErrorCode status = U_ZERO_ERROR;
4887  static UCollator *coll = NULL;
4888  coll = ucol_open("root", &status);
4889  if(U_FAILURE(status)) {
4890    log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4891    return;
4892  }
4893  ucol_setStrength(coll, UCOL_PRIMARY);
4894  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4895  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4896  if (U_FAILURE(status)) {
4897    log_err("Failed setting atributes\n");
4898    return;
4899  }
4900  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
4901  if (sortkey_length > 256) return;
4902
4903  /* we mark the position where the null byte should be written in advance */
4904  sortkey[sortkey_length-1] = 0xAA;
4905
4906  /* we set the buffer size one byte higher than needed */
4907  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4908    sortkey_length+1);
4909
4910  /* no error occurs (for me) */
4911  if (sortkey[sortkey_length-1] == 0xAA) {
4912    log_err("Hit bug at first try\n");
4913  }
4914
4915  /* we mark the position where the null byte should be written again */
4916  sortkey[sortkey_length-1] = 0xAA;
4917
4918  /* this time we set the buffer size to the exact amount needed */
4919  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4920    sortkey_length);
4921
4922  /* now the trailing null byte is not written */
4923  if (sortkey[sortkey_length-1] == 0xAA) {
4924    log_err("Hit bug at second try\n");
4925  }
4926
4927  ucol_close(coll);
4928}
4929
4930/* Regression test for Thai partial sort key problem */
4931static void
4932TestJ5232(void)
4933{
4934    const static char *test[] = {
4935        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4936        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4937    };
4938
4939    genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
4940}
4941
4942static void
4943TestJ5367(void)
4944{
4945    const static char *test[] = { "a", "y" };
4946    const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
4947    genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4948}
4949
4950static void
4951TestVI5913(void)
4952{
4953    UErrorCode status = U_ZERO_ERROR;
4954    int32_t i, j;
4955    UCollator *coll =NULL;
4956    uint8_t  resColl[100], expColl[100];
4957    int32_t  rLen, tLen, ruleLen, sLen, kLen;
4958    UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &a<0x1FF3-omega with Ypogegrammeni*/
4959    UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
4960    UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0};  /* &z<a+e with circumflex.*/
4961    static const UChar tData[][20]={
4962        {0x1EAC, 0},
4963        {0x0041, 0x0323, 0x0302, 0},
4964        {0x1EA0, 0x0302, 0},
4965        {0x00C2, 0x0323, 0},
4966        {0x1ED8, 0},  /* O with dot and circumflex */
4967        {0x1ECC, 0x0302, 0},
4968        {0x1EB7, 0},
4969        {0x1EA1, 0x0306, 0},
4970    };
4971    static const UChar tailorData[][20]={
4972        {0x1FA2, 0},  /* Omega with 3 combining marks */
4973        {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4974        {0x1FF3, 0x0313, 0x0300, 0},
4975        {0x1F60, 0x0300, 0x0345, 0},
4976        {0x1F62, 0x0345, 0},
4977        {0x1FA0, 0x0300, 0},
4978    };
4979    static const UChar tailorData2[][20]={
4980        {0x1E63, 0x030C, 0},  /* s with dot below + caron */
4981        {0x0073, 0x0323, 0x030C, 0},
4982        {0x0073, 0x030C, 0x0323, 0},
4983    };
4984    static const UChar tailorData3[][20]={
4985        {0x007a, 0},  /*  z */
4986        {0x0061, 0x0065, 0},  /*  a + e */
4987        {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4988        {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
4989        {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4990        {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
4991        {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
4992        {0x00EA, 0},  /* e with circumflex  */
4993    };
4994
4995    /* Test Vietnamese sort. */
4996    coll = ucol_open("vi", &status);
4997    if(U_FAILURE(status)) {
4998        log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
4999        return;
5000    }
5001    log_verbose("\n\nVI collation:");
5002    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
5003        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
5004    }
5005    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
5006        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
5007    }
5008    if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
5009        log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
5010    }
5011    if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
5012        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
5013    }
5014
5015    for (j=0; j<8; j++) {
5016        tLen = u_strlen(tData[j]);
5017        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
5018        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
5019        for(i = 0; i<rLen; i++) {
5020            log_verbose(" %02X", resColl[i]);
5021        }
5022    }
5023
5024    ucol_close(coll);
5025
5026    /* Test Romanian sort. */
5027    coll = ucol_open("ro", &status);
5028    log_verbose("\n\nRO collation:");
5029    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
5030        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
5031    }
5032    if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
5033        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
5034    }
5035    if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
5036        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
5037    }
5038
5039    for (j=4; j<8; j++) {
5040        tLen = u_strlen(tData[j]);
5041        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
5042        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
5043        for(i = 0; i<rLen; i++) {
5044            log_verbose(" %02X", resColl[i]);
5045        }
5046    }
5047    ucol_close(coll);
5048
5049    /* Test the precomposed Greek character with 3 combining marks. */
5050    log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
5051    ruleLen = u_strlen(rule);
5052    coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5053    if (U_FAILURE(status)) {
5054        log_err("ucol_openRules failed with %s\n", u_errorName(status));
5055        return;
5056    }
5057    sLen = u_strlen(tailorData[0]);
5058    for (j=1; j<6; j++) {
5059        tLen = u_strlen(tailorData[j]);
5060        if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
5061            log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
5062        }
5063    }
5064    /* Test getSortKey. */
5065    tLen = u_strlen(tailorData[0]);
5066    kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
5067    for (j=0; j<6; j++) {
5068        tLen = u_strlen(tailorData[j]);
5069        rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
5070        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5071            log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5072            for(i = 0; i<rLen; i++) {
5073                log_err(" %02X", resColl[i]);
5074            }
5075        }
5076    }
5077    ucol_close(coll);
5078
5079    log_verbose("\n\nTailoring test for s with caron:");
5080    ruleLen = u_strlen(rule2);
5081    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5082    tLen = u_strlen(tailorData2[0]);
5083    kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
5084    for (j=1; j<3; j++) {
5085        tLen = u_strlen(tailorData2[j]);
5086        rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
5087        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5088            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5089            for(i = 0; i<rLen; i++) {
5090                log_err(" %02X", resColl[i]);
5091            }
5092        }
5093    }
5094    ucol_close(coll);
5095
5096    log_verbose("\n\nTailoring test for &z< ae with circumflex:");
5097    ruleLen = u_strlen(rule3);
5098    coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5099    tLen = u_strlen(tailorData3[3]);
5100    kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
5101    for (j=4; j<6; j++) {
5102        tLen = u_strlen(tailorData3[j]);
5103        rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
5104
5105        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5106            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5107            for(i = 0; i<rLen; i++) {
5108                log_err(" %02X", resColl[i]);
5109            }
5110        }
5111
5112        log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5113         for(i = 0; i<rLen; i++) {
5114             log_verbose(" %02X", resColl[i]);
5115         }
5116    }
5117    ucol_close(coll);
5118}
5119
5120static void
5121TestTailor6179(void)
5122{
5123    UErrorCode status = U_ZERO_ERROR;
5124    int32_t i;
5125    UCollator *coll =NULL;
5126    uint8_t  resColl[100];
5127    int32_t  rLen, tLen, ruleLen;
5128    /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
5129    static const UChar rule1[]={
5130            0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
5131            0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
5132            0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
5133            0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
5134    /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
5135    static const UChar rule2[]={
5136            0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
5137            0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
5138            0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
5139            0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
5140            0x3C,0x3C,0x20,0x62,0};
5141
5142    static const UChar tData1[][4]={
5143        {0x61, 0},
5144        {0x62, 0},
5145        { 0xFDD0,0x009E, 0}
5146    };
5147    static const UChar tData2[][4]={
5148        {0x61, 0},
5149        {0x62, 0},
5150        { 0xFDD0,0x009E, 0}
5151     };
5152
5153    /*
5154     * These values from FractionalUCA.txt will change,
5155     * and need to be updated here.
5156     */
5157    static const uint8_t firstPrimaryIgnCE[]={1, 0x88, 1, 5, 0};
5158    static const uint8_t lastPrimaryIgnCE[]={1, 0xE3, 1, 5, 0};
5159    static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
5160    static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xbf, 0x04, 0};
5161
5162    /* Test [Last Primary ignorable] */
5163
5164    log_verbose("Tailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b\n");
5165    ruleLen = u_strlen(rule1);
5166    coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5167    if (U_FAILURE(status)) {
5168        log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
5169        return;
5170    }
5171    tLen = u_strlen(tData1[0]);
5172    rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
5173    if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {
5174        log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
5175        for(i = 0; i<rLen; i++) {
5176            log_err(" %02X", resColl[i]);
5177        }
5178        log_err("\n");
5179    }
5180    tLen = u_strlen(tData1[1]);
5181    rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
5182    if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE, rLen) != 0) {
5183        log_err("Bad result for &[lpi]<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
5184        for(i = 0; i<rLen; i++) {
5185            log_err(" %02X", resColl[i]);
5186        }
5187        log_err("\n");
5188    }
5189    ucol_close(coll);
5190
5191
5192    /* Test [Last Secondary ignorable] */
5193    log_verbose("Tailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b\n");
5194    ruleLen = u_strlen(rule1);
5195    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5196    if (U_FAILURE(status)) {
5197        log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u_errorName(status));
5198        return;
5199    }
5200    tLen = u_strlen(tData2[0]);
5201    rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
5202    if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgnCE, rLen) != 0) {
5203        log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
5204        for(i = 0; i<rLen; i++) {
5205            log_err(" %02X", resColl[i]);
5206        }
5207        log_err("\n");
5208    }
5209    if(!log_knownIssue("8982", "debug and fix")) { /* TODO: debug & fix, see ticket #8982 */
5210      tLen = u_strlen(tData2[1]);
5211      rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
5212      if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryIgnCE, rLen) != 0) {
5213        log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
5214        for(i = 0; i<rLen; i++) {
5215          log_err(" %02X", resColl[i]);
5216        }
5217        log_err("\n");
5218      }
5219    }
5220    ucol_close(coll);
5221}
5222
5223static void
5224TestUCAPrecontext(void)
5225{
5226    UErrorCode status = U_ZERO_ERROR;
5227    int32_t i, j;
5228    UCollator *coll =NULL;
5229    uint8_t  resColl[100], prevColl[100];
5230    int32_t  rLen, tLen, ruleLen;
5231    UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
5232    UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
5233    /* & l middle-dot << a  a is an expansion. */
5234
5235    UChar tData1[][20]={
5236            { 0xb7, 0},  /* standalone middle dot(0xb7) */
5237            { 0x387, 0}, /* standalone middle dot(0x387) */
5238            { 0x61, 0},  /* a */
5239            { 0x6C, 0},  /* l */
5240            { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
5241            { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
5242            { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
5243            { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
5244            { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
5245            { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
5246            { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
5247     };
5248
5249    log_verbose("\n\nEN collation:");
5250    coll = ucol_open("en", &status);
5251    if (U_FAILURE(status)) {
5252        log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
5253        return;
5254    }
5255    for (j=0; j<11; j++) {
5256        tLen = u_strlen(tData1[j]);
5257        rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5258        if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5259            log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5260                    j, tData1[j]);
5261        }
5262        log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5263        for(i = 0; i<rLen; i++) {
5264            log_verbose(" %02X", resColl[i]);
5265        }
5266        uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5267     }
5268     ucol_close(coll);
5269
5270
5271     log_verbose("\n\nJA collation:");
5272     coll = ucol_open("ja", &status);
5273     if (U_FAILURE(status)) {
5274         log_err("Tailoring test: &z <<a|- failed!");
5275         return;
5276     }
5277     for (j=0; j<11; j++) {
5278         tLen = u_strlen(tData1[j]);
5279         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5280         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5281             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5282                     j, tData1[j]);
5283         }
5284         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5285         for(i = 0; i<rLen; i++) {
5286             log_verbose(" %02X", resColl[i]);
5287         }
5288         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5289      }
5290      ucol_close(coll);
5291
5292
5293      log_verbose("\n\nTailoring test: & middle dot < a ");
5294      ruleLen = u_strlen(rule1);
5295      coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5296      if (U_FAILURE(status)) {
5297          log_err("Tailoring test: & middle dot < a failed!");
5298          return;
5299      }
5300      for (j=0; j<11; j++) {
5301          tLen = u_strlen(tData1[j]);
5302          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5303          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5304              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5305                      j, tData1[j]);
5306          }
5307          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5308          for(i = 0; i<rLen; i++) {
5309              log_verbose(" %02X", resColl[i]);
5310          }
5311          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5312       }
5313       ucol_close(coll);
5314
5315
5316       log_verbose("\n\nTailoring test: & l middle-dot << a ");
5317       ruleLen = u_strlen(rule2);
5318       coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5319       if (U_FAILURE(status)) {
5320           log_err("Tailoring test: & l middle-dot << a failed!");
5321           return;
5322       }
5323       for (j=0; j<11; j++) {
5324           tLen = u_strlen(tData1[j]);
5325           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5326           if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5327               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5328                       j, tData1[j]);
5329           }
5330           if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
5331               log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
5332                       j, tData1[j]);
5333           }
5334           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5335           for(i = 0; i<rLen; i++) {
5336               log_verbose(" %02X", resColl[i]);
5337           }
5338           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5339        }
5340        ucol_close(coll);
5341}
5342
5343static void
5344TestOutOfBuffer5468(void)
5345{
5346    static const char *test = "\\u4e00";
5347    UChar ustr[256];
5348    int32_t ustr_length = u_unescape(test, ustr, 256);
5349    unsigned char shortKeyBuf[1];
5350    int32_t sortkey_length;
5351    UErrorCode status = U_ZERO_ERROR;
5352    static UCollator *coll = NULL;
5353
5354    coll = ucol_open("root", &status);
5355    if(U_FAILURE(status)) {
5356      log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
5357      return;
5358    }
5359    ucol_setStrength(coll, UCOL_PRIMARY);
5360    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
5361    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5362    if (U_FAILURE(status)) {
5363      log_err("Failed setting atributes\n");
5364      return;
5365    }
5366
5367    sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
5368    if (sortkey_length != 4) {
5369        log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
5370    }
5371    log_verbose("length of sortKey is %d", sortkey_length);
5372    ucol_close(coll);
5373}
5374
5375#define TSKC_DATA_SIZE 5
5376#define TSKC_BUF_SIZE  50
5377static void
5378TestSortKeyConsistency(void)
5379{
5380    UErrorCode icuRC = U_ZERO_ERROR;
5381    UCollator* ucol;
5382    UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
5383
5384    uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5385    uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5386    int32_t i, j, i2;
5387
5388    ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
5389    if (U_FAILURE(icuRC))
5390    {
5391        log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
5392        return;
5393    }
5394
5395    for (i = 0; i < TSKC_DATA_SIZE; i++)
5396    {
5397        UCharIterator uiter;
5398        uint32_t state[2] = { 0, 0 };
5399        int32_t dataLen = i+1;
5400        for (j=0; j<TSKC_BUF_SIZE; j++)
5401            bufFull[i][j] = bufPart[i][j] = 0;
5402
5403        /* Full sort key */
5404        ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
5405
5406        /* Partial sort key */
5407        uiter_setString(&uiter, data, dataLen);
5408        ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
5409        if (U_FAILURE(icuRC))
5410        {
5411            log_err("ucol_nextSortKeyPart failed\n");
5412            ucol_close(ucol);
5413            return;
5414        }
5415
5416        for (i2=0; i2<i; i2++)
5417        {
5418            UBool fullMatch = TRUE;
5419            UBool partMatch = TRUE;
5420            for (j=0; j<TSKC_BUF_SIZE; j++)
5421            {
5422                fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
5423                partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
5424            }
5425            if (fullMatch != partMatch) {
5426                log_err(fullMatch ? "full key was consistent, but partial key changed\n"
5427                                  : "partial key was consistent, but full key changed\n");
5428                ucol_close(ucol);
5429                return;
5430            }
5431        }
5432    }
5433
5434    /*=============================================*/
5435   ucol_close(ucol);
5436}
5437
5438/* ticket: 6101 */
5439static void TestCroatianSortKey(void) {
5440    const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
5441    UErrorCode status = U_ZERO_ERROR;
5442    UCollator *ucol;
5443    UCharIterator iter;
5444
5445    static const UChar text[] = { 0x0044, 0xD81A };
5446
5447    size_t length = sizeof(text)/sizeof(*text);
5448
5449    uint8_t textSortKey[32];
5450    size_t lenSortKey = 32;
5451    size_t actualSortKeyLen;
5452    uint32_t uStateInfo[2] = { 0, 0 };
5453
5454    ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
5455    if (U_FAILURE(status)) {
5456        log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
5457        return;
5458    }
5459
5460    uiter_setString(&iter, text, length);
5461
5462    actualSortKeyLen = ucol_nextSortKeyPart(
5463        ucol, &iter, (uint32_t*)uStateInfo,
5464        textSortKey, lenSortKey, &status
5465        );
5466
5467    if (actualSortKeyLen == lenSortKey) {
5468        log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
5469    }
5470
5471    ucol_close(ucol);
5472}
5473
5474/* ticket: 6140 */
5475/* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
5476 * they are both Hiragana and Katakana
5477 */
5478#define SORTKEYLEN 50
5479static void TestHiragana(void) {
5480    UErrorCode status = U_ZERO_ERROR;
5481    UCollator* ucol;
5482    UCollationResult strcollresult;
5483    UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
5484    UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
5485    int32_t data1Len = sizeof(data1)/sizeof(*data1);
5486    int32_t data2Len = sizeof(data2)/sizeof(*data2);
5487    int32_t i, j;
5488    uint8_t sortKey1[SORTKEYLEN];
5489    uint8_t sortKey2[SORTKEYLEN];
5490
5491    UCharIterator uiter1;
5492    UCharIterator uiter2;
5493    uint32_t state1[2] = { 0, 0 };
5494    uint32_t state2[2] = { 0, 0 };
5495    int32_t keySize1;
5496    int32_t keySize2;
5497
5498    ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
5499            &status);
5500    if (U_FAILURE(status)) {
5501        log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
5502        return;
5503    }
5504
5505    /* Start of full sort keys */
5506    /* Full sort key1 */
5507    keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
5508    /* Full sort key2 */
5509    keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
5510    if (keySize1 == keySize2) {
5511        for (i = 0; i < keySize1; i++) {
5512            if (sortKey1[i] != sortKey2[i]) {
5513                log_err("Full sort keys are different. Should be equal.");
5514            }
5515        }
5516    } else {
5517        log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
5518    }
5519    /* End of full sort keys */
5520
5521    /* Start of partial sort keys */
5522    /* Partial sort key1 */
5523    uiter_setString(&uiter1, data1, data1Len);
5524    keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
5525    /* Partial sort key2 */
5526    uiter_setString(&uiter2, data2, data2Len);
5527    keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
5528    if (U_SUCCESS(status) && keySize1 == keySize2) {
5529        for (j = 0; j < keySize1; j++) {
5530            if (sortKey1[j] != sortKey2[j]) {
5531                log_err("Partial sort keys are different. Should be equal");
5532            }
5533        }
5534    } else {
5535        log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
5536    }
5537    /* End of partial sort keys */
5538
5539    /* Start of strcoll */
5540    /* Use ucol_strcoll() to determine ordering */
5541    strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
5542    if (strcollresult != UCOL_EQUAL) {
5543        log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
5544    }
5545
5546    ucol_close(ucol);
5547}
5548
5549/* Convenient struct for running collation tests */
5550typedef struct {
5551  const UChar source[MAX_TOKEN_LEN];  /* String on left */
5552  const UChar target[MAX_TOKEN_LEN];  /* String on right */
5553  UCollationResult result;            /* -1, 0 or +1, depending on collation */
5554} OneTestCase;
5555
5556/*
5557 * Utility function to test one collation test case.
5558 * @param testcases Array of test cases.
5559 * @param n_testcases Size of the array testcases.
5560 * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
5561 * @param n_rules Size of the array str_rules.
5562 */
5563static void doTestOneTestCase(const OneTestCase testcases[],
5564                              int n_testcases,
5565                              const char* str_rules[],
5566                              int n_rules)
5567{
5568  int rule_no, testcase_no;
5569  UChar rule[500];
5570  int32_t length = 0;
5571  UErrorCode status = U_ZERO_ERROR;
5572  UParseError parse_error;
5573  UCollator  *myCollation;
5574
5575  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5576
5577    length = u_unescape(str_rules[rule_no], rule, 500);
5578    if (length == 0) {
5579        log_err("ERROR: The rule cannot be unescaped: %s\n");
5580        return;
5581    }
5582    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5583    if(U_FAILURE(status)){
5584        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5585        return;
5586    }
5587    log_verbose("Testing the <<* syntax\n");
5588    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5589    ucol_setStrength(myCollation, UCOL_TERTIARY);
5590    for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
5591      doTest(myCollation,
5592             testcases[testcase_no].source,
5593             testcases[testcase_no].target,
5594             testcases[testcase_no].result
5595             );
5596    }
5597    ucol_close(myCollation);
5598  }
5599}
5600
5601const static OneTestCase rangeTestcases[] = {
5602  { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
5603  { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
5604  { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
5605
5606  { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
5607  { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
5608  { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
5609  { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
5610  { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
5611
5612  { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
5613  { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
5614  { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
5615  { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
5616
5617  { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
5618  { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
5619  { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
5620  { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
5621  { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
5622  { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
5623  { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
5624  { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
5625};
5626
5627static int nRangeTestcases = LEN(rangeTestcases);
5628
5629const static OneTestCase rangeTestcasesSupplemental[] = {
5630  { {0xfffe},                            {0xffff},                          UCOL_LESS }, /* U+FFFE < U+FFFF */
5631  { {0xffff},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFF < U+10000 */
5632  { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
5633  { {0xfffe},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+FFFE < U+10001 */
5634  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
5635  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
5636  { {0xfffe},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+FFFE < U+10001 */
5637};
5638
5639static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
5640
5641const static OneTestCase rangeTestcasesQwerty[] = {
5642  { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
5643  { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
5644
5645  { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
5646  { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
5647
5648  { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
5649  { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
5650
5651  { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
5652  { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
5653
5654  { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
5655    {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
5656  { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
5657    {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
5658};
5659
5660static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
5661
5662static void TestSameStrengthList(void)
5663{
5664  const char* strRules[] = {
5665    /* Normal */
5666    "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
5667
5668    /* Lists */
5669    "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
5670  };
5671  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5672}
5673
5674static void TestSameStrengthListQuoted(void)
5675{
5676  const char* strRules[] = {
5677    /* Lists with quoted characters */
5678    "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
5679    "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
5680
5681    "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
5682    "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
5683
5684    "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
5685    "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
5686  };
5687  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5688}
5689
5690static void TestSameStrengthListSupplemental(void)
5691{
5692  const char* strRules[] = {
5693    "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
5694    "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
5695    "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
5696    "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
5697  };
5698  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5699}
5700
5701static void TestSameStrengthListQwerty(void)
5702{
5703  const char* strRules[] = {
5704    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
5705    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
5706    "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
5707    "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
5708    "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
5709
5710    /* Quoted characters also will work if two quoted characters are not consecutive.  */
5711    "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
5712
5713    /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
5714    /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
5715
5716 };
5717  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5718}
5719
5720static void TestSameStrengthListQuotedQwerty(void)
5721{
5722  const char* strRules[] = {
5723    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
5724    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
5725    "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
5726
5727    /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
5728    /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
5729   };
5730  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5731}
5732
5733static void TestSameStrengthListRanges(void)
5734{
5735  const char* strRules[] = {
5736    "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
5737  };
5738  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5739}
5740
5741static void TestSameStrengthListSupplementalRanges(void)
5742{
5743  const char* strRules[] = {
5744    "&\\ufffe<*\\uffff-\\U00010002",
5745  };
5746  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5747}
5748
5749static void TestSpecialCharacters(void)
5750{
5751  const char* strRules[] = {
5752    /* Normal */
5753    "&';'<'+'<','<'-'<'&'<'*'",
5754
5755    /* List */
5756    "&';'<*'+,-&*'",
5757
5758    /* Range */
5759    "&';'<*'+'-'-&*'",
5760  };
5761
5762  const static OneTestCase specialCharacterStrings[] = {
5763    { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
5764    { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
5765    { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
5766    { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
5767  };
5768  doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
5769}
5770
5771static void TestPrivateUseCharacters(void)
5772{
5773  const char* strRules[] = {
5774    /* Normal */
5775    "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
5776    "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
5777  };
5778
5779  const static OneTestCase privateUseCharacterStrings[] = {
5780    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5781    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5782    { {0xe2d9}, {0xe2da}, UCOL_LESS },
5783    { {0xe2da}, {0xe2db}, UCOL_LESS },
5784    { {0xe2db}, {0xe2dc}, UCOL_LESS },
5785    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5786  };
5787  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5788}
5789
5790static void TestPrivateUseCharactersInList(void)
5791{
5792  const char* strRules[] = {
5793    /* List */
5794    "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
5795    /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
5796    "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
5797  };
5798
5799  const static OneTestCase privateUseCharacterStrings[] = {
5800    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5801    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5802    { {0xe2d9}, {0xe2da}, UCOL_LESS },
5803    { {0xe2da}, {0xe2db}, UCOL_LESS },
5804    { {0xe2db}, {0xe2dc}, UCOL_LESS },
5805    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5806  };
5807  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5808}
5809
5810static void TestPrivateUseCharactersInRange(void)
5811{
5812  const char* strRules[] = {
5813    /* Range */
5814    "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
5815    "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
5816    /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
5817  };
5818
5819  const static OneTestCase privateUseCharacterStrings[] = {
5820    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5821    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5822    { {0xe2d9}, {0xe2da}, UCOL_LESS },
5823    { {0xe2da}, {0xe2db}, UCOL_LESS },
5824    { {0xe2db}, {0xe2dc}, UCOL_LESS },
5825    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5826  };
5827  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5828}
5829
5830static void TestInvalidListsAndRanges(void)
5831{
5832  const char* invalidRules[] = {
5833    /* Range not in starred expression */
5834    "&\\ufffe<\\uffff-\\U00010002",
5835
5836    /* Range without start */
5837    "&a<*-c",
5838
5839    /* Range without end */
5840    "&a<*b-",
5841
5842    /* More than one hyphen */
5843    "&a<*b-g-l",
5844
5845    /* Range in the wrong order */
5846    "&a<*k-b",
5847
5848  };
5849
5850  UChar rule[500];
5851  UErrorCode status = U_ZERO_ERROR;
5852  UParseError parse_error;
5853  int n_rules = LEN(invalidRules);
5854  int rule_no;
5855  int length;
5856  UCollator  *myCollation;
5857
5858  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5859
5860    length = u_unescape(invalidRules[rule_no], rule, 500);
5861    if (length == 0) {
5862        log_err("ERROR: The rule cannot be unescaped: %s\n");
5863        return;
5864    }
5865    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5866    (void)myCollation;      /* Suppress set but not used warning. */
5867    if(!U_FAILURE(status)){
5868      log_err("ERROR: Could not cause a failure as expected: \n");
5869    }
5870    status = U_ZERO_ERROR;
5871  }
5872}
5873
5874/*
5875 * This test ensures that characters placed before a character in a different script have the same lead byte
5876 * in their collation key before and after script reordering.
5877 */
5878static void TestBeforeRuleWithScriptReordering(void)
5879{
5880    UParseError error;
5881    UErrorCode status = U_ZERO_ERROR;
5882    UCollator  *myCollation;
5883    char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
5884    UChar rules[500];
5885    uint32_t rulesLength = 0;
5886    int32_t reorderCodes[1] = {USCRIPT_GREEK};
5887    UCollationResult collResult;
5888
5889    uint8_t baseKey[256];
5890    uint32_t baseKeyLength;
5891    uint8_t beforeKey[256];
5892    uint32_t beforeKeyLength;
5893