1
2/********************************************************************
3 * COPYRIGHT:
4 * Copyright (c) 2001-2011, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************/
7/*******************************************************************************
8*
9* File cmsccoll.C
10*
11*******************************************************************************/
12/**
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where
14 * to fit.
15 */
16
17#include <stdio.h>
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_COLLATION
22
23#include "unicode/ucol.h"
24#include "unicode/ucoleitr.h"
25#include "unicode/uloc.h"
26#include "cintltst.h"
27#include "ccolltst.h"
28#include "callcoll.h"
29#include "unicode/ustring.h"
30#include "string.h"
31#include "ucol_imp.h"
32#include "ucol_tok.h"
33#include "cmemory.h"
34#include "cstring.h"
35#include "uassert.h"
36#include "unicode/parseerr.h"
37#include "unicode/ucnv.h"
38#include "unicode/ures.h"
39#include "unicode/uscript.h"
40#include "uparse.h"
41#include "putilimp.h"
42
43
44#define LEN(a) (sizeof(a)/sizeof(a[0]))
45
46#define MAX_TOKEN_LEN 16
47
48typedef UCollationResult tst_strcoll(void *collator, const int object,
49                        const UChar *source, const int sLen,
50                        const UChar *target, const int tLen);
51
52
53
54const static char cnt1[][10] = {
55
56  "AA",
57  "AC",
58  "AZ",
59  "AQ",
60  "AB",
61  "ABZ",
62  "ABQ",
63  "Z",
64  "ABC",
65  "Q",
66  "B"
67};
68
69const static char cnt2[][10] = {
70  "DA",
71  "DAD",
72  "DAZ",
73  "MAR",
74  "Z",
75  "DAVIS",
76  "MARK",
77  "DAV",
78  "DAVI"
79};
80
81static void IncompleteCntTest(void)
82{
83  UErrorCode status = U_ZERO_ERROR;
84  UChar temp[90];
85  UChar t1[90];
86  UChar t2[90];
87
88  UCollator *coll =  NULL;
89  uint32_t i = 0, j = 0;
90  uint32_t size = 0;
91
92  u_uastrcpy(temp, " & Z < ABC < Q < B");
93
94  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
95
96  if(U_SUCCESS(status)) {
97    size = sizeof(cnt1)/sizeof(cnt1[0]);
98    for(i = 0; i < size-1; i++) {
99      for(j = i+1; j < size; j++) {
100        UCollationElements *iter;
101        u_uastrcpy(t1, cnt1[i]);
102        u_uastrcpy(t2, cnt1[j]);
103        doTest(coll, t1, t2, UCOL_LESS);
104        /* synwee : added collation element iterator test */
105        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
106        if (U_FAILURE(status)) {
107          log_err("Creation of iterator failed\n");
108          break;
109        }
110        backAndForth(iter);
111        ucol_closeElements(iter);
112      }
113    }
114  }
115
116  ucol_close(coll);
117
118
119  u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");
120  coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
121
122  if(U_SUCCESS(status)) {
123    size = sizeof(cnt2)/sizeof(cnt2[0]);
124    for(i = 0; i < size-1; i++) {
125      for(j = i+1; j < size; j++) {
126        UCollationElements *iter;
127        u_uastrcpy(t1, cnt2[i]);
128        u_uastrcpy(t2, cnt2[j]);
129        doTest(coll, t1, t2, UCOL_LESS);
130
131        /* synwee : added collation element iterator test */
132        iter = ucol_openElements(coll, t2, u_strlen(t2), &status);
133        if (U_FAILURE(status)) {
134          log_err("Creation of iterator failed\n");
135          break;
136        }
137        backAndForth(iter);
138        ucol_closeElements(iter);
139      }
140    }
141  }
142
143  ucol_close(coll);
144
145
146}
147
148const static char shifted[][20] = {
149  "black bird",
150  "black-bird",
151  "blackbird",
152  "black Bird",
153  "black-Bird",
154  "blackBird",
155  "black birds",
156  "black-birds",
157  "blackbirds"
158};
159
160const static UCollationResult shiftedTert[] = {
161  UCOL_EQUAL,
162  UCOL_EQUAL,
163  UCOL_EQUAL,
164  UCOL_LESS,
165  UCOL_EQUAL,
166  UCOL_EQUAL,
167  UCOL_LESS,
168  UCOL_EQUAL,
169  UCOL_EQUAL
170};
171
172const static char nonignorable[][20] = {
173  "black bird",
174  "black Bird",
175  "black birds",
176  "black-bird",
177  "black-Bird",
178  "black-birds",
179  "blackbird",
180  "blackBird",
181  "blackbirds"
182};
183
184static void BlackBirdTest(void) {
185  UErrorCode status = U_ZERO_ERROR;
186  UChar t1[90];
187  UChar t2[90];
188
189  uint32_t i = 0, j = 0;
190  uint32_t size = 0;
191  UCollator *coll = ucol_open("en_US", &status);
192
193  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
194  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
195
196  if(U_SUCCESS(status)) {
197    size = sizeof(nonignorable)/sizeof(nonignorable[0]);
198    for(i = 0; i < size-1; i++) {
199      for(j = i+1; j < size; j++) {
200        u_uastrcpy(t1, nonignorable[i]);
201        u_uastrcpy(t2, nonignorable[j]);
202        doTest(coll, t1, t2, UCOL_LESS);
203      }
204    }
205  }
206
207  ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
208  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
209
210  if(U_SUCCESS(status)) {
211    size = sizeof(shifted)/sizeof(shifted[0]);
212    for(i = 0; i < size-1; i++) {
213      for(j = i+1; j < size; j++) {
214        u_uastrcpy(t1, shifted[i]);
215        u_uastrcpy(t2, shifted[j]);
216        doTest(coll, t1, t2, UCOL_LESS);
217      }
218    }
219  }
220
221  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);
222  if(U_SUCCESS(status)) {
223    size = sizeof(shifted)/sizeof(shifted[0]);
224    for(i = 1; i < size; i++) {
225      u_uastrcpy(t1, shifted[i-1]);
226      u_uastrcpy(t2, shifted[i]);
227      doTest(coll, t1, t2, shiftedTert[i]);
228    }
229  }
230
231  ucol_close(coll);
232}
233
234const static UChar testSourceCases[][MAX_TOKEN_LEN] = {
235    {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000},
236    {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000},
237    {0x0041/*'A'*/, 0x0300, 0x0000},
238    {0x00C0, 0x0301, 0x0000},
239    /* this would work with forced normalization */
240    {0x00C0, 0x0316, 0x0000}
241};
242
243const static UChar testTargetCases[][MAX_TOKEN_LEN] = {
244    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
245    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000},
246    {0x00C0, 0},
247    {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000},
248    /* this would work with forced normalization */
249    {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}
250};
251
252const static UCollationResult results[] = {
253    UCOL_GREATER,
254    UCOL_EQUAL,
255    UCOL_EQUAL,
256    UCOL_GREATER,
257    UCOL_EQUAL
258};
259
260static void FunkyATest(void)
261{
262
263    int32_t i;
264    UErrorCode status = U_ZERO_ERROR;
265    UCollator  *myCollation;
266    myCollation = ucol_open("en_US", &status);
267    if(U_FAILURE(status)){
268        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
269        return;
270    }
271    log_verbose("Testing some A letters, for some reason\n");
272    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
273    ucol_setStrength(myCollation, UCOL_TERTIARY);
274    for (i = 0; i < 4 ; i++)
275    {
276        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
277    }
278    ucol_close(myCollation);
279}
280
281UColAttributeValue caseFirst[] = {
282    UCOL_OFF,
283    UCOL_LOWER_FIRST,
284    UCOL_UPPER_FIRST
285};
286
287
288UColAttributeValue alternateHandling[] = {
289    UCOL_NON_IGNORABLE,
290    UCOL_SHIFTED
291};
292
293UColAttributeValue caseLevel[] = {
294    UCOL_OFF,
295    UCOL_ON
296};
297
298UColAttributeValue strengths[] = {
299    UCOL_PRIMARY,
300    UCOL_SECONDARY,
301    UCOL_TERTIARY,
302    UCOL_QUATERNARY,
303    UCOL_IDENTICAL
304};
305
306#if 0
307static const char * strengthsC[] = {
308    "UCOL_PRIMARY",
309    "UCOL_SECONDARY",
310    "UCOL_TERTIARY",
311    "UCOL_QUATERNARY",
312    "UCOL_IDENTICAL"
313};
314
315static const char * caseFirstC[] = {
316    "UCOL_OFF",
317    "UCOL_LOWER_FIRST",
318    "UCOL_UPPER_FIRST"
319};
320
321
322static const char * alternateHandlingC[] = {
323    "UCOL_NON_IGNORABLE",
324    "UCOL_SHIFTED"
325};
326
327static const char * caseLevelC[] = {
328    "UCOL_OFF",
329    "UCOL_ON"
330};
331
332/* not used currently - does not test only prints */
333static void PrintMarkDavis(void)
334{
335  UErrorCode status = U_ZERO_ERROR;
336  UChar m[256];
337  uint8_t sortkey[256];
338  UCollator *coll = ucol_open("en_US", &status);
339  uint32_t h,i,j,k, sortkeysize;
340  uint32_t sizem = 0;
341  char buffer[512];
342  uint32_t len = 512;
343
344  log_verbose("PrintMarkDavis");
345
346  u_uastrcpy(m, "Mark Davis");
347  sizem = u_strlen(m);
348
349
350  m[1] = 0xe4;
351
352  for(i = 0; i<sizem; i++) {
353    fprintf(stderr, "\\u%04X ", m[i]);
354  }
355  fprintf(stderr, "\n");
356
357  for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
358    ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
359    fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
360
361    for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
362      ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
363      fprintf(stderr, "  AltHandling: %s\n", alternateHandlingC[i]);
364
365      for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
366        ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
367        fprintf(stderr, "    caseLevel: %s\n", caseLevelC[j]);
368
369        for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
370          ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
371          sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
372          fprintf(stderr, "      strength: %s\n      Sortkey: ", strengthsC[k]);
373          fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
374        }
375
376      }
377
378    }
379
380  }
381}
382#endif
383
384static void BillFairmanTest(void) {
385/*
386** check for actual locale via ICU resource bundles
387**
388** lp points to the original locale ("fr_FR_....")
389*/
390
391    UResourceBundle *lr,*cr;
392    UErrorCode              lec = U_ZERO_ERROR;
393    const char *lp = "fr_FR_you_ll_never_find_this_locale";
394
395    log_verbose("BillFairmanTest\n");
396
397    lr = ures_open(NULL,lp,&lec);
398    if (lr) {
399        cr = ures_getByKey(lr,"collations",0,&lec);
400        if (cr) {
401            lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);
402            if (lp) {
403                if (U_SUCCESS(lec)) {
404                    if(strcmp(lp, "fr") != 0) {
405                        log_err("Wrong locale for French Collation Data, expected \"fr\" got %s", lp);
406                    }
407                }
408            }
409            ures_close(cr);
410        }
411        ures_close(lr);
412    }
413}
414
415static void testPrimary(UCollator* col, const UChar* p,const UChar* q){
416    UChar source[256] = { '\0'};
417    UChar target[256] = { '\0'};
418    UChar preP = 0x31a3;
419    UChar preQ = 0x310d;
420/*
421    UChar preP = (*p>0x0400 && *p<0x0500)?0x00e1:0x491;
422    UChar preQ = (*p>0x0400 && *p<0x0500)?0x0041:0x413;
423*/
424    /*log_verbose("Testing primary\n");*/
425
426    doTest(col, p, q, UCOL_LESS);
427/*
428    UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
429
430    if(result!=UCOL_LESS){
431       aescstrdup(p,utfSource,256);
432       aescstrdup(q,utfTarget,256);
433       fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
434    }
435*/
436    source[0] = preP;
437    u_strcpy(source+1,p);
438    target[0] = preQ;
439    u_strcpy(target+1,q);
440    doTest(col, source, target, UCOL_LESS);
441/*
442    fprintf(file,"Primary swamps 2nd failed  source: %s target: %s \n", utfSource,utfTarget);
443*/
444}
445
446static void testSecondary(UCollator* col, const UChar* p,const UChar* q){
447    UChar source[256] = { '\0'};
448    UChar target[256] = { '\0'};
449
450    /*log_verbose("Testing secondary\n");*/
451
452    doTest(col, p, q, UCOL_LESS);
453/*
454    fprintf(file,"secondary failed  source: %s target: %s \n", utfSource,utfTarget);
455*/
456    source[0] = 0x0053;
457    u_strcpy(source+1,p);
458    target[0]= 0x0073;
459    u_strcpy(target+1,q);
460
461    doTest(col, source, target, UCOL_LESS);
462/*
463    fprintf(file,"secondary swamps 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
464*/
465
466
467    u_strcpy(source,p);
468    source[u_strlen(p)] = 0x62;
469    source[u_strlen(p)+1] = 0;
470
471
472    u_strcpy(target,q);
473    target[u_strlen(q)] = 0x61;
474    target[u_strlen(q)+1] = 0;
475
476    doTest(col, source, target, UCOL_GREATER);
477
478/*
479    fprintf(file,"secondary is swamped by 1  failed  source: %s target: %s \n",utfSource,utfTarget);
480*/
481}
482
483static void testTertiary(UCollator* col, const UChar* p,const UChar* q){
484    UChar source[256] = { '\0'};
485    UChar target[256] = { '\0'};
486
487    /*log_verbose("Testing tertiary\n");*/
488
489    doTest(col, p, q, UCOL_LESS);
490/*
491    fprintf(file,"Tertiary failed  source: %s target: %s \n",utfSource,utfTarget);
492*/
493    source[0] = 0x0020;
494    u_strcpy(source+1,p);
495    target[0]= 0x002D;
496    u_strcpy(target+1,q);
497
498    doTest(col, source, target, UCOL_LESS);
499/*
500    fprintf(file,"Tertiary swamps 4th failed  source: %s target: %s \n", utfSource,utfTarget);
501*/
502
503    u_strcpy(source,p);
504    source[u_strlen(p)] = 0xE0;
505    source[u_strlen(p)+1] = 0;
506
507    u_strcpy(target,q);
508    target[u_strlen(q)] = 0x61;
509    target[u_strlen(q)+1] = 0;
510
511    doTest(col, source, target, UCOL_GREATER);
512
513/*
514    fprintf(file,"Tertiary is swamped by 3rd failed  source: %s target: %s \n",utfSource,utfTarget);
515*/
516}
517
518static void testEquality(UCollator* col, const UChar* p,const UChar* q){
519/*
520    UChar source[256] = { '\0'};
521    UChar target[256] = { '\0'};
522*/
523
524    doTest(col, p, q, UCOL_EQUAL);
525/*
526    fprintf(file,"Primary failed  source: %s target: %s \n", utfSource,utfTarget);
527*/
528}
529
530static void testCollator(UCollator *coll, UErrorCode *status) {
531  const UChar *rules = NULL, *current = NULL;
532  int32_t ruleLen = 0;
533  uint32_t strength = 0;
534  uint32_t chOffset = 0; uint32_t chLen = 0;
535  uint32_t exOffset = 0; uint32_t exLen = 0;
536  uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
537  uint32_t firstEx = 0;
538/*  uint32_t rExpsLen = 0; */
539  uint32_t firstLen = 0;
540  UBool varT = FALSE; UBool top_ = TRUE;
541  uint16_t specs = 0;
542  UBool startOfRules = TRUE;
543  UBool lastReset = FALSE;
544  UBool before = FALSE;
545  uint32_t beforeStrength = 0;
546  UColTokenParser src;
547  UColOptionSet opts;
548
549  UChar first[256];
550  UChar second[256];
551  UChar tempB[256];
552  uint32_t tempLen;
553  UChar *rulesCopy = NULL;
554  UParseError parseError;
555
556  uprv_memset(&src, 0, sizeof(UColTokenParser));
557
558  src.opts = &opts;
559
560  rules = ucol_getRules(coll, &ruleLen);
561  if(U_SUCCESS(*status) && ruleLen > 0) {
562    rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
563    uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
564    src.current = src.source = rulesCopy;
565    src.end = rulesCopy+ruleLen;
566    src.extraCurrent = src.end;
567    src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
568    *first = *second = 0;
569
570	/* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
571	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
572    while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, status)) != NULL) {
573      strength = src.parsedToken.strength;
574      chOffset = src.parsedToken.charsOffset;
575      chLen = src.parsedToken.charsLen;
576      exOffset = src.parsedToken.extensionOffset;
577      exLen = src.parsedToken.extensionLen;
578      prefixOffset = src.parsedToken.prefixOffset;
579      prefixLen = src.parsedToken.prefixLen;
580      specs = src.parsedToken.flags;
581
582      startOfRules = FALSE;
583      varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
584      top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
585      if(top_) { /* if reset is on top, the sequence is broken. We should have an empty string */
586        second[0] = 0;
587      } else {
588        u_strncpy(second,src.source+chOffset, chLen);
589        second[chLen] = 0;
590
591        if(exLen > 0 && firstEx == 0) {
592          u_strncat(first, src.source+exOffset, exLen);
593          first[firstLen+exLen] = 0;
594        }
595
596        if(lastReset == TRUE && prefixLen != 0) {
597          u_strncpy(first+prefixLen, first, firstLen);
598          u_strncpy(first, src.source+prefixOffset, prefixLen);
599          first[firstLen+prefixLen] = 0;
600          firstLen = firstLen+prefixLen;
601        }
602
603        if(before == TRUE) { /* swap first and second */
604          u_strcpy(tempB, first);
605          u_strcpy(first, second);
606          u_strcpy(second, tempB);
607
608          tempLen = firstLen;
609          firstLen = chLen;
610          chLen = tempLen;
611
612          tempLen = firstEx;
613          firstEx = exLen;
614          exLen = tempLen;
615          if(beforeStrength < strength) {
616            strength = beforeStrength;
617          }
618        }
619      }
620      lastReset = FALSE;
621
622      switch(strength){
623      case UCOL_IDENTICAL:
624          testEquality(coll,first,second);
625          break;
626      case UCOL_PRIMARY:
627          testPrimary(coll,first,second);
628          break;
629      case UCOL_SECONDARY:
630          testSecondary(coll,first,second);
631          break;
632      case UCOL_TERTIARY:
633          testTertiary(coll,first,second);
634          break;
635      case UCOL_TOK_RESET:
636        lastReset = TRUE;
637        before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
638        if(before) {
639          beforeStrength = (specs & UCOL_TOK_BEFORE)-1;
640        }
641        break;
642      default:
643          break;
644      }
645
646      if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second were swapped */
647        before = FALSE;
648      } else {
649        firstLen = chLen;
650        firstEx = exLen;
651        u_strcpy(first, second);
652      }
653    }
654    uprv_free(src.source);
655  }
656}
657
658static UCollationResult ucaTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
659  UCollator *UCA = (UCollator *)collator;
660  return ucol_strcoll(UCA, source, sLen, target, tLen);
661}
662
663/*
664static UCollationResult winTest(void *collator, const int object, const UChar *source, const int sLen, const UChar *target, const int tLen) {
665#ifdef U_WINDOWS
666  LCID lcid = (LCID)collator;
667  return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);
668#else
669  return 0;
670#endif
671}
672*/
673
674static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts,
675                                     UChar s1, UChar s2,
676                                     const UChar *s, const uint32_t sLen,
677                                     const UChar *t, const uint32_t tLen) {
678  UChar source[256] = {0};
679  UChar target[256] = {0};
680
681  source[0] = s1;
682  u_strcpy(source+1, s);
683  target[0] = s2;
684  u_strcpy(target+1, t);
685
686  return func(collator, opts, source, sLen+1, target, tLen+1);
687}
688
689static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,
690                                   UChar s1, UChar s2,
691                                   const UChar *s, const uint32_t sLen,
692                                   const UChar *t, const uint32_t tLen) {
693  UChar source[256] = {0};
694  UChar target[256] = {0};
695
696  u_strcpy(source, s);
697  source[sLen] = s1;
698  u_strcpy(target, t);
699  target[tLen] = s2;
700
701  return func(collator, opts, source, sLen+1, target, tLen+1);
702}
703
704static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,
705                              const UChar *s, const uint32_t sLen,
706                              const UChar *t, const uint32_t tLen,
707                              UCollationResult result) {
708  /*UChar fPrimary = 0x6d;*/
709  /*UChar sPrimary = 0x6e;*/
710  UChar fSecondary = 0x310d;
711  UChar sSecondary = 0x31a3;
712  UChar fTertiary = 0x310f;
713  UChar sTertiary = 0x31b7;
714
715  UCollationResult oposite;
716  if(result == UCOL_EQUAL) {
717    return UCOL_IDENTICAL;
718  } else if(result == UCOL_GREATER) {
719    oposite = UCOL_LESS;
720  } else {
721    oposite = UCOL_GREATER;
722  }
723
724  if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen) == result) {
725    return UCOL_PRIMARY;
726  } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == result) &&
727    (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {
728    return UCOL_SECONDARY;
729  } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&
730    (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {
731    return UCOL_TERTIARY;
732  } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLen) == oposite) &&
733    (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {
734    return UCOL_QUATERNARY;
735  } else {
736    return UCOL_IDENTICAL;
737  }
738}
739
740static char *getRelationSymbol(UCollationResult res, uint32_t strength, char *buffer) {
741  uint32_t i = 0;
742
743  if(res == UCOL_EQUAL || strength == 0xdeadbeef) {
744    buffer[0] = '=';
745    buffer[1] = '=';
746    buffer[2] = '\0';
747  } else if(res == UCOL_GREATER) {
748    for(i = 0; i<strength+1; i++) {
749      buffer[i] = '>';
750    }
751    buffer[strength+1] = '\0';
752  } else {
753    for(i = 0; i<strength+1; i++) {
754      buffer[i] = '<';
755    }
756    buffer[strength+1] = '\0';
757  }
758
759  return buffer;
760}
761
762
763
764static void logFailure (const char *platform, const char *test,
765                        const UChar *source, const uint32_t sLen,
766                        const UChar *target, const uint32_t tLen,
767                        UCollationResult realRes, uint32_t realStrength,
768                        UCollationResult expRes, uint32_t expStrength, UBool error) {
769
770  uint32_t i = 0;
771
772  char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];
773  static int32_t maxOutputLength = 0;
774  int32_t outputLength;
775
776  *sEsc = *tEsc = *s = *t = 0;
777  if(error == TRUE) {
778    log_err("Difference between expected and generated order. Run test with -v for more info\n");
779  } else if(getTestOption(VERBOSITY_OPTION) == 0) {
780    return;
781  }
782  for(i = 0; i<sLen; i++) {
783    sprintf(b, "%04X", source[i]);
784    strcat(sEsc, "\\u");
785    strcat(sEsc, b);
786    strcat(s, b);
787    strcat(s, " ");
788    if(source[i] < 0x80) {
789      sprintf(b, "(%c)", source[i]);
790      strcat(sEsc, b);
791    }
792  }
793  for(i = 0; i<tLen; i++) {
794    sprintf(b, "%04X", target[i]);
795    strcat(tEsc, "\\u");
796    strcat(tEsc, b);
797    strcat(t, b);
798    strcat(t, " ");
799    if(target[i] < 0x80) {
800      sprintf(b, "(%c)", target[i]);
801      strcat(tEsc, b);
802    }
803  }
804/*
805  strcpy(output, "[[ ");
806  strcat(output, sEsc);
807  strcat(output, getRelationSymbol(expRes, expStrength, relation));
808  strcat(output, tEsc);
809
810  strcat(output, " : ");
811
812  strcat(output, sEsc);
813  strcat(output, getRelationSymbol(realRes, realStrength, relation));
814  strcat(output, tEsc);
815  strcat(output, " ]] ");
816
817  log_verbose("%s", output);
818*/
819
820
821  strcpy(output, "DIFF: ");
822
823  strcat(output, s);
824  strcat(output, " : ");
825  strcat(output, t);
826
827  strcat(output, test);
828  strcat(output, ": ");
829
830  strcat(output, sEsc);
831  strcat(output, getRelationSymbol(expRes, expStrength, relation));
832  strcat(output, tEsc);
833
834  strcat(output, " ");
835
836  strcat(output, platform);
837  strcat(output, ": ");
838
839  strcat(output, sEsc);
840  strcat(output, getRelationSymbol(realRes, realStrength, relation));
841  strcat(output, tEsc);
842
843  outputLength = (int32_t)strlen(output);
844  if(outputLength > maxOutputLength) {
845    maxOutputLength = outputLength;
846    U_ASSERT(outputLength < sizeof(output));
847  }
848
849  log_verbose("%s\n", output);
850
851}
852
853/*
854static void printOutRules(const UChar *rules) {
855  uint32_t len = u_strlen(rules);
856  uint32_t i = 0;
857  char toPrint;
858  uint32_t line = 0;
859
860  fprintf(stdout, "Rules:");
861
862  for(i = 0; i<len; i++) {
863    if(rules[i]<0x7f && rules[i]>=0x20) {
864      toPrint = (char)rules[i];
865      if(toPrint == '&') {
866        line = 1;
867        fprintf(stdout, "\n&");
868      } else if(toPrint == ';') {
869        fprintf(stdout, "<<");
870        line+=2;
871      } else if(toPrint == ',') {
872        fprintf(stdout, "<<<");
873        line+=3;
874      } else {
875        fprintf(stdout, "%c", toPrint);
876        line++;
877      }
878    } else if(rules[i]<0x3400 || rules[i]>=0xa000) {
879      fprintf(stdout, "\\u%04X", rules[i]);
880      line+=6;
881    }
882    if(line>72) {
883      fprintf(stdout, "\n");
884      line = 0;
885    }
886  }
887
888  log_verbose("\n");
889
890}
891*/
892
893static uint32_t testSwitch(tst_strcoll* func, void *collator, int opts, uint32_t strength, const UChar *first, const UChar *second, const char* msg, UBool error) {
894  uint32_t diffs = 0;
895  UCollationResult realResult;
896  uint32_t realStrength;
897
898  uint32_t sLen = u_strlen(first);
899  uint32_t tLen = u_strlen(second);
900
901  realResult = func(collator, opts, first, sLen, second, tLen);
902  realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);
903
904  if(strength == UCOL_IDENTICAL && realResult != UCOL_IDENTICAL) {
905    logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_EQUAL, strength, error);
906    diffs++;
907  } else if(realResult != UCOL_LESS || realStrength != strength) {
908    logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStrength, UCOL_LESS, strength, error);
909    diffs++;
910  }
911  return diffs;
912}
913
914
915static void testAgainstUCA(UCollator *coll, UCollator *UCA, const char *refName, UBool error, UErrorCode *status) {
916  const UChar *rules = NULL, *current = NULL;
917  int32_t ruleLen = 0;
918  uint32_t strength = 0;
919  uint32_t chOffset = 0; uint32_t chLen = 0;
920  uint32_t exOffset = 0; uint32_t exLen = 0;
921  uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
922/*  uint32_t rExpsLen = 0; */
923  uint32_t firstLen = 0, secondLen = 0;
924  UBool varT = FALSE; UBool top_ = TRUE;
925  uint16_t specs = 0;
926  UBool startOfRules = TRUE;
927  UColTokenParser src;
928  UColOptionSet opts;
929
930  UChar first[256];
931  UChar second[256];
932  UChar *rulesCopy = NULL;
933
934  uint32_t UCAdiff = 0;
935  uint32_t Windiff = 1;
936  UParseError parseError;
937
938  uprv_memset(&src, 0, sizeof(UColTokenParser));
939  src.opts = &opts;
940
941  rules = ucol_getRules(coll, &ruleLen);
942
943  /*printOutRules(rules);*/
944
945  if(U_SUCCESS(*status) && ruleLen > 0) {
946    rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
947    uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
948    src.current = src.source = rulesCopy;
949    src.end = rulesCopy+ruleLen;
950    src.extraCurrent = src.end;
951    src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
952    *first = *second = 0;
953
954    /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
955       the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
956    while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
957      strength = src.parsedToken.strength;
958      chOffset = src.parsedToken.charsOffset;
959      chLen = src.parsedToken.charsLen;
960      exOffset = src.parsedToken.extensionOffset;
961      exLen = src.parsedToken.extensionLen;
962      prefixOffset = src.parsedToken.prefixOffset;
963      prefixLen = src.parsedToken.prefixLen;
964      specs = src.parsedToken.flags;
965
966      startOfRules = FALSE;
967      varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
968      top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
969
970      u_strncpy(second,src.source+chOffset, chLen);
971      second[chLen] = 0;
972      secondLen = chLen;
973
974      if(exLen > 0) {
975        u_strncat(first, src.source+exOffset, exLen);
976        first[firstLen+exLen] = 0;
977        firstLen += exLen;
978      }
979
980      if(strength != UCOL_TOK_RESET) {
981        if((*first<0x3400 || *first>=0xa000) && (*second<0x3400 || *second>=0xa000)) {
982          UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, second, refName, error);
983          /*Windiff += testSwitch(&winTest, (void *)lcid, 0, strength, first, second, "Win32");*/
984        }
985      }
986
987
988      firstLen = chLen;
989      u_strcpy(first, second);
990
991    }
992    if(UCAdiff != 0 && Windiff != 0) {
993      log_verbose("\n");
994    }
995    if(UCAdiff == 0) {
996      log_verbose("No immediate difference with %s!\n", refName);
997    }
998    if(Windiff == 0) {
999      log_verbose("No immediate difference with Win32!\n");
1000    }
1001    uprv_free(src.source);
1002  }
1003}
1004
1005/*
1006 * Takes two CEs (lead and continuation) and
1007 * compares them as CEs should be compared:
1008 * primary vs. primary, secondary vs. secondary
1009 * tertiary vs. tertiary
1010 */
1011static int32_t compareCEs(uint32_t s1, uint32_t s2,
1012                   uint32_t t1, uint32_t t2) {
1013  uint32_t s = 0, t = 0;
1014  if(s1 == t1 && s2 == t2) {
1015    return 0;
1016  }
1017  s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
1018  t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
1019  if(s < t) {
1020    return -1;
1021  } else if(s > t) {
1022    return 1;
1023  } else {
1024    s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
1025    t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
1026    if(s < t) {
1027      return -1;
1028    } else if(s > t) {
1029      return 1;
1030    } else {
1031      s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
1032      t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
1033      if(s < t) {
1034        return -1;
1035      } else {
1036        return 1;
1037      }
1038    }
1039  }
1040}
1041
1042typedef struct {
1043  uint32_t startCE;
1044  uint32_t startContCE;
1045  uint32_t limitCE;
1046  uint32_t limitContCE;
1047} indirectBoundaries;
1048
1049/* these values are used for finding CE values for indirect positioning. */
1050/* Indirect positioning is a mechanism for allowing resets on symbolic   */
1051/* values. It only works for resets and you cannot tailor indirect names */
1052/* An indirect name can define either an anchor point or a range. An     */
1053/* anchor point behaves in exactly the same way as a code point in reset */
1054/* would, except that it cannot be tailored. A range (we currently only  */
1055/* know for the [top] range will explicitly set the upper bound for      */
1056/* generated CEs, thus allowing for better control over how many CEs can */
1057/* be squeezed between in the range without performance penalty.         */
1058/* In that respect, we use [top] for tailoring of locales that use CJK   */
1059/* characters. Other indirect values are currently a pure convenience,   */
1060/* they can be used to assure that the CEs will be always positioned in  */
1061/* the same place relative to a point with known properties (e.g. first  */
1062/* primary ignorable). */
1063static indirectBoundaries ucolIndirectBoundaries[15];
1064static UBool indirectBoundariesSet = FALSE;
1065static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
1066    /* Set values for the top - TODO: once we have values for all the indirects, we are going */
1067    /* to initalize here. */
1068    ucolIndirectBoundaries[indexR].startCE = start[0];
1069    ucolIndirectBoundaries[indexR].startContCE = start[1];
1070    if(end) {
1071        ucolIndirectBoundaries[indexR].limitCE = end[0];
1072        ucolIndirectBoundaries[indexR].limitContCE = end[1];
1073    } else {
1074        ucolIndirectBoundaries[indexR].limitCE = 0;
1075        ucolIndirectBoundaries[indexR].limitContCE = 0;
1076    }
1077}
1078
1079static void testCEs(UCollator *coll, UErrorCode *status) {
1080    const UChar *rules = NULL, *current = NULL;
1081    int32_t ruleLen = 0;
1082
1083    uint32_t strength = 0;
1084    uint32_t maxStrength = UCOL_IDENTICAL;
1085    uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
1086    uint32_t lastCE;
1087    uint32_t lastContCE;
1088
1089    int32_t result = 0;
1090    uint32_t chOffset = 0; uint32_t chLen = 0;
1091    uint32_t exOffset = 0; uint32_t exLen = 0;
1092    uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
1093    uint32_t oldOffset = 0;
1094
1095    /* uint32_t rExpsLen = 0; */
1096    /* uint32_t firstLen = 0; */
1097    uint16_t specs = 0;
1098    UBool varT = FALSE; UBool top_ = TRUE;
1099    UBool startOfRules = TRUE;
1100    UBool before = FALSE;
1101    UColTokenParser src;
1102    UColOptionSet opts;
1103    UParseError parseError;
1104    UChar *rulesCopy = NULL;
1105    collIterate *c = uprv_new_collIterate(status);
1106    UCAConstants *consts = NULL;
1107    uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
1108        UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
1109    const char *colLoc;
1110    UCollator *UCA = ucol_open("root", status);
1111
1112    if (U_FAILURE(*status)) {
1113        log_err("Could not open root collator %s\n", u_errorName(*status));
1114        uprv_delete_collIterate(c);
1115        return;
1116    }
1117
1118    colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);
1119    if (U_FAILURE(*status)) {
1120        log_err("Could not get collator name: %s\n", u_errorName(*status));
1121        ucol_close(UCA);
1122        uprv_delete_collIterate(c);
1123        return;
1124    }
1125
1126    uprv_memset(&src, 0, sizeof(UColTokenParser));
1127
1128    consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
1129    UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
1130    /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
1131    UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
1132    UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
1133
1134    baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
1135
1136    src.opts = &opts;
1137
1138    rules = ucol_getRules(coll, &ruleLen);
1139
1140    src.invUCA = ucol_initInverseUCA(status);
1141
1142    if(indirectBoundariesSet == FALSE) {
1143        /* UCOL_RESET_TOP_VALUE */
1144        setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1145        /* UCOL_FIRST_PRIMARY_IGNORABLE */
1146        setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
1147        /* UCOL_LAST_PRIMARY_IGNORABLE */
1148        setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
1149        /* UCOL_FIRST_SECONDARY_IGNORABLE */
1150        setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
1151        /* UCOL_LAST_SECONDARY_IGNORABLE */
1152        setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
1153        /* UCOL_FIRST_TERTIARY_IGNORABLE */
1154        setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
1155        /* UCOL_LAST_TERTIARY_IGNORABLE */
1156        setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
1157        /* UCOL_FIRST_VARIABLE */
1158        setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
1159        /* UCOL_LAST_VARIABLE */
1160        setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
1161        /* UCOL_FIRST_NON_VARIABLE */
1162        setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
1163        /* UCOL_LAST_NON_VARIABLE */
1164        setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
1165        /* UCOL_FIRST_IMPLICIT */
1166        setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
1167        /* UCOL_LAST_IMPLICIT */
1168        setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
1169        /* UCOL_FIRST_TRAILING */
1170        setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
1171        /* UCOL_LAST_TRAILING */
1172        setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
1173        ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
1174        indirectBoundariesSet = TRUE;
1175    }
1176
1177
1178    if(U_SUCCESS(*status) && ruleLen > 0) {
1179        rulesCopy = (UChar *)uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
1180        uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
1181        src.current = src.source = rulesCopy;
1182        src.end = rulesCopy+ruleLen;
1183        src.extraCurrent = src.end;
1184        src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1185
1186	    /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1187	       the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1188        while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
1189            strength = src.parsedToken.strength;
1190            chOffset = src.parsedToken.charsOffset;
1191            chLen = src.parsedToken.charsLen;
1192            exOffset = src.parsedToken.extensionOffset;
1193            exLen = src.parsedToken.extensionLen;
1194            prefixOffset = src.parsedToken.prefixOffset;
1195            prefixLen = src.parsedToken.prefixLen;
1196            specs = src.parsedToken.flags;
1197
1198            startOfRules = FALSE;
1199            varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
1200            top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
1201
1202            uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);
1203
1204            currCE = ucol_getNextCE(coll, c, status);
1205            if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {
1206                log_verbose("Thai prevowel detected. Will pick next CE\n");
1207                currCE = ucol_getNextCE(coll, c, status);
1208            }
1209
1210            currContCE = ucol_getNextCE(coll, c, status);
1211            if(!isContinuation(currContCE)) {
1212                currContCE = 0;
1213            }
1214
1215            /* we need to repack CEs here */
1216
1217            if(strength == UCOL_TOK_RESET) {
1218                before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
1219                if(top_ == TRUE) {
1220                    int32_t tokenIndex = src.parsedToken.indirectIndex;
1221
1222                    nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex].startCE;
1223                    nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[tokenIndex].startContCE;
1224                } else {
1225                    nextCE = baseCE = currCE;
1226                    nextContCE = baseContCE = currContCE;
1227                }
1228                maxStrength = UCOL_IDENTICAL;
1229            } else {
1230                if(strength < maxStrength) {
1231                    maxStrength = strength;
1232                    if(baseCE == UCOL_RESET_TOP_VALUE) {
1233                        log_verbose("Resetting to [top]\n");
1234                        nextCE = UCOL_NEXT_TOP_VALUE;
1235                        nextContCE = UCOL_NEXT_TOP_CONT;
1236                    } else {
1237                        result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
1238                    }
1239                    if(result < 0) {
1240                        if(ucol_isTailored(coll, *(src.source+oldOffset), status)) {
1241                            log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));
1242                            return;
1243                        } else {
1244                            log_err("%s: couldn't find the CE\n", colLoc);
1245                            return;
1246                        }
1247                    }
1248                }
1249
1250                currCE &= 0xFFFFFF3F;
1251                currContCE &= 0xFFFFFFBF;
1252
1253                if(maxStrength == UCOL_IDENTICAL) {
1254                    if(baseCE != currCE || baseContCE != currContCE) {
1255                        log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
1256                    }
1257                } else {
1258                    if(strength == UCOL_IDENTICAL) {
1259                        if(lastCE != currCE || lastContCE != currContCE) {
1260                            log_err("%s: current CE  (initial strength UCOL_EQUAL)\n", colLoc);
1261                        }
1262                    } else {
1263                        if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
1264                            /*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
1265                            log_err("%s: current CE is not less than base CE\n", colLoc);
1266                        }
1267                        if(!before) {
1268                            if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
1269                                /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1270                                log_err("%s: sequence of generated CEs is broken\n", colLoc);
1271                            }
1272                        } else {
1273                            before = FALSE;
1274                            if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
1275                                /*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
1276                                log_err("%s: sequence of generated CEs is broken\n", colLoc);
1277                            }
1278                        }
1279                    }
1280                }
1281
1282            }
1283
1284            oldOffset = chOffset;
1285            lastCE = currCE & 0xFFFFFF3F;
1286            lastContCE = currContCE & 0xFFFFFFBF;
1287        }
1288        uprv_free(src.source);
1289    }
1290    ucol_close(UCA);
1291    uprv_delete_collIterate(c);
1292}
1293
1294#if 0
1295/* these locales are now picked from index RB */
1296static const char* localesToTest[] = {
1297"ar", "bg", "ca", "cs", "da",
1298"el", "en_BE", "en_US_POSIX",
1299"es", "et", "fi", "fr", "hi",
1300"hr", "hu", "is", "iw", "ja",
1301"ko", "lt", "lv", "mk", "mt",
1302"nb", "nn", "nn_NO", "pl", "ro",
1303"ru", "sh", "sk", "sl", "sq",
1304"sr", "sv", "th", "tr", "uk",
1305"vi", "zh", "zh_TW"
1306};
1307#endif
1308
1309static const char* rulesToTest[] = {
1310  /* Funky fa rule */
1311  "&\\u0622 < \\u0627 << \\u0671 < \\u0621",
1312  /*"& Z < p, P",*/
1313    /* Cui Mins rules */
1314    "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",*/
1315    "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1316    "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /*"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",*/
1317    "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",*/
1318    "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U",  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",*/
1319    "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",*/
1320    "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U"  /*"<'?'<3<4<5<a,A<f,F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"*/
1321};
1322
1323
1324static void TestCollations(void) {
1325    int32_t noOfLoc = uloc_countAvailable();
1326    int32_t i = 0, j = 0;
1327
1328    UErrorCode status = U_ZERO_ERROR;
1329    char cName[256];
1330    UChar name[256];
1331    int32_t nameSize;
1332
1333
1334    const char *locName = NULL;
1335    UCollator *coll = NULL;
1336    UCollator *UCA = ucol_open("", &status);
1337    UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &status);
1338    if (U_FAILURE(status)) {
1339        log_err_status(status, "Could not open UCA collator %s\n", u_errorName(status));
1340        return;
1341    }
1342    ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
1343
1344    for(i = 0; i<noOfLoc; i++) {
1345        status = U_ZERO_ERROR;
1346        locName = uloc_getAvailable(i);
1347        if(uprv_strcmp("ja", locName) == 0) {
1348            log_verbose("Don't know how to test prefixes\n");
1349            continue;
1350        }
1351        if(hasCollationElements(locName)) {
1352            nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);
1353            for(j = 0; j<nameSize; j++) {
1354                cName[j] = (char)name[j];
1355            }
1356            cName[nameSize] = 0;
1357            log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1358            coll = ucol_open(locName, &status);
1359            if(U_SUCCESS(status)) {
1360                testAgainstUCA(coll, UCA, "UCA", FALSE, &status);
1361                ucol_close(coll);
1362            } else {
1363                log_err("Couldn't instantiate collator for locale %s, error: %s\n", locName, u_errorName(status));
1364                status = U_ZERO_ERROR;
1365            }
1366        }
1367    }
1368    ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);
1369    ucol_close(UCA);
1370}
1371
1372static void RamsRulesTest(void) {
1373    UErrorCode status = U_ZERO_ERROR;
1374    int32_t i = 0;
1375    UCollator *coll = NULL;
1376    UChar rule[2048];
1377    uint32_t ruleLen;
1378    int32_t noOfLoc = uloc_countAvailable();
1379    const char *locName = NULL;
1380
1381    log_verbose("RamsRulesTest\n");
1382
1383    if (uprv_strcmp("km", uloc_getDefault())==0 || uprv_strcmp("km_KH", uloc_getDefault())==0) {
1384        /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */
1385        return;
1386    }
1387
1388    for(i = 0; i<noOfLoc; i++) {
1389        locName = uloc_getAvailable(i);
1390        if(hasCollationElements(locName)) {
1391            if (uprv_strcmp("ja", locName)==0) {
1392                log_verbose("Don't know how to test Japanese because of prefixes\n");
1393                continue;
1394            }
1395            if (uprv_strcmp("de__PHONEBOOK", locName)==0) {
1396                log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");
1397                continue;
1398            }
1399            if (uprv_strcmp("bn", locName)==0 ||
1400                uprv_strcmp("en_US_POSIX", locName)==0 ||
1401                uprv_strcmp("km", locName)==0 ||
1402                uprv_strcmp("km_KH", locName)==0 ||
1403                uprv_strcmp("my", locName)==0 ||
1404                uprv_strcmp("si", locName)==0 ||
1405                uprv_strcmp("si_LK", locName)==0 ||
1406                uprv_strcmp("zh", locName)==0 ||
1407                uprv_strcmp("zh_Hant", locName)==0
1408            ) {
1409                log_verbose("Don't know how to test %s. "
1410                            "TODO: Fix ticket #6040 and reenable RamsRulesTest for this locale.\n", locName);
1411                continue;
1412            }
1413            log_verbose("Testing locale %s\n", locName);
1414            status = U_ZERO_ERROR;
1415            coll = ucol_open(locName, &status);
1416            if(U_SUCCESS(status)) {
1417              if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLBACK_WARNING)) {
1418                if(coll->image->jamoSpecial == TRUE) {
1419                  log_err("%s has special JAMOs\n", locName);
1420                }
1421                ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);
1422                testCollator(coll, &status);
1423                testCEs(coll, &status);
1424              } else {
1425                log_verbose("Skipping %s: %s\n", locName, u_errorName(status));
1426              }
1427              ucol_close(coll);
1428            } else {
1429              log_err("Could not open %s: %s\n", locName, u_errorName(status));
1430            }
1431        }
1432    }
1433
1434    for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {
1435        log_verbose("Testing rule: %s\n", rulesToTest[i]);
1436        ruleLen = u_unescape(rulesToTest[i], rule, 2048);
1437        status = U_ZERO_ERROR;
1438        coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1439        if(U_SUCCESS(status)) {
1440            testCollator(coll, &status);
1441            testCEs(coll, &status);
1442            ucol_close(coll);
1443        } else {
1444          log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName(status), rulesToTest[i]);
1445        }
1446    }
1447
1448}
1449
1450static void IsTailoredTest(void) {
1451    UErrorCode status = U_ZERO_ERROR;
1452    uint32_t i = 0;
1453    UCollator *coll = NULL;
1454    UChar rule[2048];
1455    UChar tailored[2048];
1456    UChar notTailored[2048];
1457    uint32_t ruleLen, tailoredLen, notTailoredLen;
1458
1459    log_verbose("IsTailoredTest\n");
1460
1461    u_uastrcpy(rule, "&Z < A, B, C;c < d");
1462    ruleLen = u_strlen(rule);
1463
1464    u_uastrcpy(tailored, "ABCcd");
1465    tailoredLen = u_strlen(tailored);
1466
1467    u_uastrcpy(notTailored, "ZabD");
1468    notTailoredLen = u_strlen(notTailored);
1469
1470    coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1471    if(U_SUCCESS(status)) {
1472        for(i = 0; i<tailoredLen; i++) {
1473            if(!ucol_isTailored(coll, tailored[i], &status)) {
1474                log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);
1475            }
1476        }
1477        for(i = 0; i<notTailoredLen; i++) {
1478            if(ucol_isTailored(coll, notTailored[i], &status)) {
1479                log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);
1480            }
1481        }
1482        ucol_close(coll);
1483    }
1484    else {
1485        log_err_status(status, "Can't tailor rules\n");
1486    }
1487    /* Code coverage */
1488    status = U_ZERO_ERROR;
1489    coll = ucol_open("ja", &status);
1490    if(!ucol_isTailored(coll, 0x4E9C, &status)) {
1491        log_err_status(status, "0x4E9C should be tailored - it is reported as not\n");
1492    }
1493    ucol_close(coll);
1494}
1495
1496
1497const static char chTest[][20] = {
1498  "c",
1499  "C",
1500  "ca", "cb", "cx", "cy", "CZ",
1501  "c\\u030C", "C\\u030C",
1502  "h",
1503  "H",
1504  "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",
1505  "ch", "cH", "Ch", "CH",
1506  "cha", "charly", "che", "chh", "chch", "chr",
1507  "i", "I", "iarly",
1508  "r", "R",
1509  "r\\u030C", "R\\u030C",
1510  "s",
1511  "S",
1512  "s\\u030C", "S\\u030C",
1513  "z", "Z",
1514  "z\\u030C", "Z\\u030C"
1515};
1516
1517static void TestChMove(void) {
1518    UChar t1[256] = {0};
1519    UChar t2[256] = {0};
1520
1521    uint32_t i = 0, j = 0;
1522    uint32_t size = 0;
1523    UErrorCode status = U_ZERO_ERROR;
1524
1525    UCollator *coll = ucol_open("cs", &status);
1526
1527    if(U_SUCCESS(status)) {
1528        size = sizeof(chTest)/sizeof(chTest[0]);
1529        for(i = 0; i < size-1; i++) {
1530            for(j = i+1; j < size; j++) {
1531                u_unescape(chTest[i], t1, 256);
1532                u_unescape(chTest[j], t2, 256);
1533                doTest(coll, t1, t2, UCOL_LESS);
1534            }
1535        }
1536    }
1537    else {
1538        log_data_err("Can't open collator");
1539    }
1540    ucol_close(coll);
1541}
1542
1543
1544
1545
1546const static char impTest[][20] = {
1547  "\\u4e00",
1548    "a",
1549    "A",
1550    "b",
1551    "B",
1552    "\\u4e01"
1553};
1554
1555
1556static void TestImplicitTailoring(void) {
1557  static const struct {
1558    const char *rules;
1559    const char *data[10];
1560    const uint32_t len;
1561  } tests[] = {
1562      { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b", "c", "\\u4e00"}, 5 },
1563      { "&\\u4e00 < a <<< A < b <<< B",   { "\\u4e00", "a", "A", "b", "B", "\\u4e01"}, 6 },
1564      { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e00"}, 3},
1565      { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e01"}, 3}
1566  };
1567
1568  int32_t i = 0;
1569
1570  for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
1571      genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
1572  }
1573
1574/*
1575  UChar t1[256] = {0};
1576  UChar t2[256] = {0};
1577
1578  const char *rule = "&\\u4e00 < a <<< A < b <<< B";
1579
1580  uint32_t i = 0, j = 0;
1581  uint32_t size = 0;
1582  uint32_t ruleLen = 0;
1583  UErrorCode status = U_ZERO_ERROR;
1584  UCollator *coll = NULL;
1585  ruleLen = u_unescape(rule, t1, 256);
1586
1587  coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1588
1589  if(U_SUCCESS(status)) {
1590    size = sizeof(impTest)/sizeof(impTest[0]);
1591    for(i = 0; i < size-1; i++) {
1592      for(j = i+1; j < size; j++) {
1593        u_unescape(impTest[i], t1, 256);
1594        u_unescape(impTest[j], t2, 256);
1595        doTest(coll, t1, t2, UCOL_LESS);
1596      }
1597    }
1598  }
1599  else {
1600    log_err("Can't open collator");
1601  }
1602  ucol_close(coll);
1603  */
1604}
1605
1606static void TestFCDProblem(void) {
1607  UChar t1[256] = {0};
1608  UChar t2[256] = {0};
1609
1610  const char *s1 = "\\u0430\\u0306\\u0325";
1611  const char *s2 = "\\u04D1\\u0325";
1612
1613  UErrorCode status = U_ZERO_ERROR;
1614  UCollator *coll = ucol_open("", &status);
1615  u_unescape(s1, t1, 256);
1616  u_unescape(s2, t2, 256);
1617
1618  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1619  doTest(coll, t1, t2, UCOL_EQUAL);
1620
1621  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1622  doTest(coll, t1, t2, UCOL_EQUAL);
1623
1624  ucol_close(coll);
1625}
1626
1627/*
1628The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC
1629We're only using NFC/NFD in this test.
1630*/
1631#define NORM_BUFFER_TEST_LEN 18
1632typedef struct {
1633  UChar32 u;
1634  UChar NFC[NORM_BUFFER_TEST_LEN];
1635  UChar NFD[NORM_BUFFER_TEST_LEN];
1636} tester;
1637
1638static void TestComposeDecompose(void) {
1639    /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */
1640    static const UChar UNICODESET_STR[] = {
1641        0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x61,
1642        0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x72,
1643        0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0
1644    };
1645    int32_t noOfLoc;
1646    int32_t i = 0, j = 0;
1647
1648    UErrorCode status = U_ZERO_ERROR;
1649    const char *locName = NULL;
1650    uint32_t nfcSize;
1651    uint32_t nfdSize;
1652    tester **t;
1653    uint32_t noCases = 0;
1654    UCollator *coll = NULL;
1655    UChar32 u = 0;
1656    UChar comp[NORM_BUFFER_TEST_LEN];
1657    uint32_t len = 0;
1658    UCollationElements *iter;
1659    USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);
1660    int32_t charsToTestSize;
1661
1662    noOfLoc = uloc_countAvailable();
1663
1664    coll = ucol_open("", &status);
1665    if (U_FAILURE(status)) {
1666        log_data_err("Error opening collator -> %s (Are you missing data?)\n", u_errorName(status));
1667        return;
1668    }
1669    charsToTestSize = uset_size(charsToTest);
1670    if (charsToTestSize <= 0) {
1671        log_err("Set was zero. Missing data?\n");
1672        return;
1673    }
1674    t = (tester **)malloc(charsToTestSize * sizeof(tester *));
1675    t[0] = (tester *)malloc(sizeof(tester));
1676    log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
1677
1678    for(u = 0; u < charsToTestSize; u++) {
1679        UChar32 ch = uset_charAt(charsToTest, u);
1680        len = 0;
1681        UTF_APPEND_CHAR_UNSAFE(comp, len, ch);
1682        nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1683        nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1684
1685        if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)
1686          || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0))) {
1687            t[noCases]->u = ch;
1688            if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * sizeof(UChar)) != 0)) {
1689                u_strncpy(t[noCases]->NFC, comp, len);
1690                t[noCases]->NFC[len] = 0;
1691            }
1692            noCases++;
1693            t[noCases] = (tester *)malloc(sizeof(tester));
1694            uprv_memset(t[noCases], 0, sizeof(tester));
1695        }
1696    }
1697    log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSize);
1698    uset_close(charsToTest);
1699    charsToTest = NULL;
1700
1701    for(u=0; u<(UChar32)noCases; u++) {
1702        if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1703            log_err("Failure: codePoint %05X fails TestComposeDecompose in the UCA\n", t[u]->u);
1704            doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1705        }
1706    }
1707    /*
1708    for(u = 0; u < charsToTestSize; u++) {
1709      if(!(u&0xFFFF)) {
1710        log_verbose("%08X ", u);
1711      }
1712      uprv_memset(t[noCases], 0, sizeof(tester));
1713      t[noCases]->u = u;
1714      len = 0;
1715      UTF_APPEND_CHAR_UNSAFE(comp, len, u);
1716      comp[len] = 0;
1717      nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_BUFFER_TEST_LEN, &status);
1718      nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_BUFFER_TEST_LEN, &status);
1719      doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);
1720      doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);
1721    }
1722    */
1723
1724    ucol_close(coll);
1725
1726    log_verbose("Testing locales, number of cases = %i\n", noCases);
1727    for(i = 0; i<noOfLoc; i++) {
1728        status = U_ZERO_ERROR;
1729        locName = uloc_getAvailable(i);
1730        if(hasCollationElements(locName)) {
1731            char cName[256];
1732            UChar name[256];
1733            int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(cName), &status);
1734
1735            for(j = 0; j<nameSize; j++) {
1736                cName[j] = (char)name[j];
1737            }
1738            cName[nameSize] = 0;
1739            log_verbose("\nTesting locale %s (%s)\n", locName, cName);
1740
1741            coll = ucol_open(locName, &status);
1742            ucol_setStrength(coll, UCOL_IDENTICAL);
1743            iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1744
1745            for(u=0; u<(UChar32)noCases; u++) {
1746                if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {
1747                    log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);
1748                    doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);
1749                    log_verbose("Testing NFC\n");
1750                    ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);
1751                    backAndForth(iter);
1752                    log_verbose("Testing NFD\n");
1753                    ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);
1754                    backAndForth(iter);
1755                }
1756            }
1757            ucol_closeElements(iter);
1758            ucol_close(coll);
1759        }
1760    }
1761    for(u = 0; u <= (UChar32)noCases; u++) {
1762        free(t[u]);
1763    }
1764    free(t);
1765}
1766
1767static void TestEmptyRule(void) {
1768  UErrorCode status = U_ZERO_ERROR;
1769  UChar rulez[] = { 0 };
1770  UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &status);
1771
1772  ucol_close(coll);
1773}
1774
1775static void TestUCARules(void) {
1776  UErrorCode status = U_ZERO_ERROR;
1777  UChar b[256];
1778  UChar *rules = b;
1779  uint32_t ruleLen = 0;
1780  UCollator *UCAfromRules = NULL;
1781  UCollator *coll = ucol_open("", &status);
1782  if(status == U_FILE_ACCESS_ERROR) {
1783    log_data_err("Is your data around?\n");
1784    return;
1785  } else if(U_FAILURE(status)) {
1786    log_err("Error opening collator\n");
1787    return;
1788  }
1789  ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);
1790
1791  log_verbose("TestUCARules\n");
1792  if(ruleLen > 256) {
1793    rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar));
1794    ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);
1795  }
1796  log_verbose("Rules length is %d\n", ruleLen);
1797  UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
1798  if(U_SUCCESS(status)) {
1799    ucol_close(UCAfromRules);
1800  } else {
1801    log_verbose("Unable to create a collator from UCARules!\n");
1802  }
1803/*
1804  u_unescape(blah, b, 256);
1805  ucol_getSortKey(coll, b, 1, res, 256);
1806*/
1807  ucol_close(coll);
1808  if(rules != b) {
1809    free(rules);
1810  }
1811}
1812
1813
1814/* Pinyin tonal order */
1815/*
1816    A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)
1817          (w/macron)<  (w/acute)<   (w/caron)<   (w/grave)
1818    E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)
1819    I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)
1820    O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)
1821    U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)
1822      < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <
1823.. (\u00fc)
1824
1825However, in testing we got the following order:
1826    A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)
1827          (w/acute)<   (w/grave)<   (w/caron)<   (w/macron)
1828    E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <
1829.. (\u0113)
1830    I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)
1831    O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)
1832    U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <
1833.. (\u01d8)
1834      < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)
1835*/
1836
1837static void TestBefore(void) {
1838  const static char *data[] = {
1839      "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",
1840      "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",
1841      "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",
1842      "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",
1843      "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",
1844      "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"
1845  };
1846  genericRulesStarter(
1847    "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"
1848    "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"
1849    "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"
1850    "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"
1851    "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"
1852    "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",
1853    data, sizeof(data)/sizeof(data[0]));
1854}
1855
1856#if 0
1857/* superceded by TestBeforePinyin */
1858static void TestJ784(void) {
1859  const static char *data[] = {
1860      "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",
1861      "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",
1862      "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",
1863      "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",
1864      "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",
1865      "\\u00fc",
1866           "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"
1867  };
1868  genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));
1869}
1870#endif
1871
1872#if 0
1873/* superceded by the changes to the lv locale */
1874static void TestJ831(void) {
1875  const static char *data[] = {
1876    "I",
1877      "i",
1878      "Y",
1879      "y"
1880  };
1881  genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));
1882}
1883#endif
1884
1885static void TestJ815(void) {
1886  const static char *data[] = {
1887    "aa",
1888      "Aa",
1889      "ab",
1890      "Ab",
1891      "ad",
1892      "Ad",
1893      "ae",
1894      "Ae",
1895      "\\u00e6",
1896      "\\u00c6",
1897      "af",
1898      "Af",
1899      "b",
1900      "B"
1901  };
1902  genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));
1903  genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(data)/sizeof(data[0]));
1904}
1905
1906
1907/*
1908"& a < b < c < d& r < c",                                   "& a < b < d& r < c",
1909"& a < b < c < d& c < m",                                   "& a < b < c < m < d",
1910"& a < b < c < d& a < m",                                   "& a < m < b < c < d",
1911"& a <<< b << c < d& a < m",                                "& a <<< b << c < m < d",
1912"& a < b < c < d& [before 1] c < m",                        "& a < b < m < c < d",
1913"& a < b <<< c << d <<< e& [before 3] e <<< x",            "& a < b <<< c << d <<< x <<< e",
1914"& a < b <<< c << d <<< e& [before 2] e <<< x",            "& a < b <<< c <<< x << d <<< e",
1915"& a < b <<< c << d <<< e& [before 1] e <<< x",            "& a <<< x < b <<< c << d <<< e",
1916"& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",    "& a < b <<< c << d <<< e <<< f < x < g",
1917*/
1918static void TestRedundantRules(void) {
1919  int32_t i;
1920
1921  static const struct {
1922      const char *rules;
1923      const char *expectedRules;
1924      const char *testdata[8];
1925      uint32_t testdatalen;
1926  } tests[] = {
1927    /* this test conflicts with positioning of CODAN placeholder */
1928       /*{
1929        "& a <<< b <<< c << d <<< e& [before 1] e <<< x",
1930        "&\\u2089<<<x",
1931        {"\\u2089", "x"}, 2
1932       }, */
1933    /* this test conflicts with the [before x] syntax tightening */
1934      /*{
1935        "& b <<< c <<< d << e <<< f& [before 1] f <<< x",
1936        "&\\u0252<<<x",
1937        {"\\u0252", "x"}, 2
1938      }, */
1939    /* this test conflicts with the [before x] syntax tightening */
1940      /*{
1941         "& a < b <<< c << d <<< e& [before 1] e <<< x",
1942         "& a <<< x < b <<< c << d <<< e",
1943        {"a", "x", "b", "c", "d", "e"}, 6
1944      }, */
1945      {
1946        "& a < b < c < d& [before 1] c < m",
1947        "& a < b < m < c < d",
1948        {"a", "b", "m", "c", "d"}, 5
1949      },
1950      {
1951        "& a < b <<< c << d <<< e& [before 3] e <<< x",
1952        "& a < b <<< c << d <<< x <<< e",
1953        {"a", "b", "c", "d", "x", "e"}, 6
1954      },
1955    /* this test conflicts with the [before x] syntax tightening */
1956      /* {
1957        "& a < b <<< c << d <<< e& [before 2] e <<< x",
1958        "& a < b <<< c <<< x << d <<< e",
1959        {"a", "b", "c", "x", "d", "e"},, 6
1960      }, */
1961      {
1962        "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",
1963        "& a < b <<< c << d <<< e <<< f < x < g",
1964        {"a", "b", "c", "d", "e", "f", "x", "g"}, 8
1965      },
1966      {
1967        "& a <<< b << c < d& a < m",
1968        "& a <<< b << c < m < d",
1969        {"a", "b", "c", "m", "d"}, 5
1970      },
1971      {
1972        "&a<b<<b\\u0301 &z<b",
1973        "&a<b\\u0301 &z<b",
1974        {"a", "b\\u0301", "z", "b"}, 4
1975      },
1976      {
1977        "&z<m<<<q<<<m",
1978        "&z<q<<<m",
1979        {"z", "q", "m"},3
1980      },
1981      {
1982        "&z<<<m<q<<<m",
1983        "&z<q<<<m",
1984        {"z", "q", "m"}, 3
1985      },
1986      {
1987        "& a < b < c < d& r < c",
1988        "& a < b < d& r < c",
1989        {"a", "b", "d"}, 3
1990      },
1991      {
1992        "& a < b < c < d& r < c",
1993        "& a < b < d& r < c",
1994        {"r", "c"}, 2
1995      },
1996      {
1997        "& a < b < c < d& c < m",
1998        "& a < b < c < m < d",
1999        {"a", "b", "c", "m", "d"}, 5
2000      },
2001      {
2002        "& a < b < c < d& a < m",
2003        "& a < m < b < c < d",
2004        {"a", "m", "b", "c", "d"}, 5
2005      }
2006  };
2007
2008
2009  UCollator *credundant = NULL;
2010  UCollator *cresulting = NULL;
2011  UErrorCode status = U_ZERO_ERROR;
2012  UChar rlz[2048] = { 0 };
2013  uint32_t rlen = 0;
2014
2015  for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
2016    log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i].expectedRules);
2017    rlen = u_unescape(tests[i].rules, rlz, 2048);
2018
2019    credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2020    if(status == U_FILE_ACCESS_ERROR) {
2021      log_data_err("Is your data around?\n");
2022      return;
2023    } else if(U_FAILURE(status)) {
2024      log_err("Error opening collator\n");
2025      return;
2026    }
2027
2028    rlen = u_unescape(tests[i].expectedRules, rlz, 2048);
2029    cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2030
2031    testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);
2032
2033    ucol_close(credundant);
2034    ucol_close(cresulting);
2035
2036    log_verbose("testing using data\n");
2037
2038    genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen);
2039  }
2040
2041}
2042
2043static void TestExpansionSyntax(void) {
2044  int32_t i;
2045
2046  const static char *rules[] = {
2047    "&AE <<< a << b <<< c &d <<< f",
2048    "&AE <<< a <<< b << c << d < e < f <<< g",
2049    "&AE <<< B <<< C / D <<< F"
2050  };
2051
2052  const static char *expectedRules[] = {
2053    "&A <<< a / E << b / E <<< c /E  &d <<< f",
2054    "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",
2055    "&A <<< B / E <<< C / ED <<< F / E"
2056  };
2057
2058  const static char *testdata[][8] = {
2059    {"AE", "a", "b", "c"},
2060    {"AE", "a", "b", "c", "d", "e", "f", "g"},
2061    {"AE", "B", "C"} /* / ED <<< F / E"},*/
2062  };
2063
2064  const static uint32_t testdatalen[] = {
2065      4,
2066      8,
2067      3
2068  };
2069
2070
2071
2072  UCollator *credundant = NULL;
2073  UCollator *cresulting = NULL;
2074  UErrorCode status = U_ZERO_ERROR;
2075  UChar rlz[2048] = { 0 };
2076  uint32_t rlen = 0;
2077
2078  for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {
2079    log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[i]);
2080    rlen = u_unescape(rules[i], rlz, 2048);
2081
2082    credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
2083    if(status == U_FILE_ACCESS_ERROR) {
2084      log_data_err("Is your data around?\n");
2085      return;
2086    } else if(U_FAILURE(status)) {
2087      log_err("Error opening collator\n");
2088      return;
2089    }
2090    rlen = u_unescape(expectedRules[i], rlz, 2048);
2091    cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&status);
2092
2093    /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */
2094    /* as a hard error test, but only in information mode */
2095    testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);
2096
2097    ucol_close(credundant);
2098    ucol_close(cresulting);
2099
2100    log_verbose("testing using data\n");
2101
2102    genericRulesStarter(rules[i], testdata[i], testdatalen[i]);
2103  }
2104}
2105
2106static void TestCase(void)
2107{
2108    const static UChar gRules[MAX_TOKEN_LEN] =
2109    /*" & 0 < 1,\u2461<a,A"*/
2110    { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x0041, 0x0000 };
2111
2112    const static UChar testCase[][MAX_TOKEN_LEN] =
2113    {
2114        /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000},
2115        /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000},
2116        /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000},
2117        /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000}
2118    };
2119
2120    const static UCollationResult caseTestResults[][9] =
2121    {
2122        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2123        { UCOL_GREATER, UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },
2124        { UCOL_LESS,    UCOL_LESS, UCOL_LESS,    UCOL_EQUAL, UCOL_GREATER, UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },
2125        { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS,    UCOL_LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }
2126    };
2127
2128    const static UColAttributeValue caseTestAttributes[][2] =
2129    {
2130        { UCOL_LOWER_FIRST, UCOL_OFF},
2131        { UCOL_UPPER_FIRST, UCOL_OFF},
2132        { UCOL_LOWER_FIRST, UCOL_ON},
2133        { UCOL_UPPER_FIRST, UCOL_ON}
2134    };
2135    int32_t i,j,k;
2136    UErrorCode status = U_ZERO_ERROR;
2137    UCollationElements *iter;
2138    UCollator  *myCollation;
2139    myCollation = ucol_open("en_US", &status);
2140
2141    if(U_FAILURE(status)){
2142        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2143        return;
2144    }
2145    log_verbose("Testing different case settings\n");
2146    ucol_setStrength(myCollation, UCOL_TERTIARY);
2147
2148    for(k = 0; k<4; k++) {
2149      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2150      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2151      log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0], caseTestAttributes[k][1]);
2152      for (i = 0; i < 3 ; i++) {
2153        for(j = i+1; j<4; j++) {
2154          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2155        }
2156      }
2157    }
2158    ucol_close(myCollation);
2159
2160    myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIARY,NULL, &status);
2161    if(U_FAILURE(status)){
2162        log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
2163        return;
2164    }
2165    log_verbose("Testing different case settings with custom rules\n");
2166    ucol_setStrength(myCollation, UCOL_TERTIARY);
2167
2168    for(k = 0; k<4; k++) {
2169      ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);
2170      ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);
2171      for (i = 0; i < 3 ; i++) {
2172        for(j = i+1; j<4; j++) {
2173          log_verbose("k:%d, i:%d, j:%d\n", k, i, j);
2174          doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j-1]);
2175          iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]), &status);
2176          backAndForth(iter);
2177          ucol_closeElements(iter);
2178          iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]), &status);
2179          backAndForth(iter);
2180          ucol_closeElements(iter);
2181        }
2182      }
2183    }
2184    ucol_close(myCollation);
2185    {
2186      const static char *lowerFirst[] = {
2187        "h",
2188        "H",
2189        "ch",
2190        "Ch",
2191        "CH",
2192        "cha",
2193        "chA",
2194        "Cha",
2195        "ChA",
2196        "CHa",
2197        "CHA",
2198        "i",
2199        "I"
2200      };
2201
2202      const static char *upperFirst[] = {
2203        "H",
2204        "h",
2205        "CH",
2206        "Ch",
2207        "ch",
2208        "CHA",
2209        "CHa",
2210        "ChA",
2211        "Cha",
2212        "chA",
2213        "cha",
2214        "I",
2215        "i"
2216      };
2217      log_verbose("mixed case test\n");
2218      log_verbose("lower first, case level off\n");
2219      genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2220      log_verbose("upper first, case level off\n");
2221      genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2222      log_verbose("lower first, case level on\n");
2223      genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowerFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));
2224      log_verbose("upper first, case level on\n");
2225      genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", upperFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));
2226    }
2227
2228}
2229
2230static void TestIncrementalNormalize(void) {
2231
2232    /*UChar baseA     =0x61;*/
2233    UChar baseA     =0x41;
2234/*    UChar baseB     = 0x42;*/
2235    static const UChar ccMix[]   = {0x316, 0x321, 0x300};
2236    /*UChar ccMix[]   = {0x61, 0x61, 0x61};*/
2237    /*
2238        0x316 is combining grave accent below, cc=220
2239        0x321 is combining palatalized hook below, cc=202
2240        0x300 is combining grave accent, cc=230
2241    */
2242
2243#define MAXSLEN 2000
2244    /*int          maxSLen   = 64000;*/
2245    int          sLen;
2246    int          i;
2247
2248    UCollator        *coll;
2249    UErrorCode       status = U_ZERO_ERROR;
2250    UCollationResult result;
2251
2252    int32_t myQ = getTestOption(QUICK_OPTION);
2253
2254    if(getTestOption(QUICK_OPTION) < 0) {
2255        setTestOption(QUICK_OPTION, 1);
2256    }
2257
2258    {
2259        /* Test 1.  Run very long unnormalized strings, to force overflow of*/
2260        /*          most buffers along the way.*/
2261        UChar            strA[MAXSLEN+1];
2262        UChar            strB[MAXSLEN+1];
2263
2264        coll = ucol_open("en_US", &status);
2265        if(status == U_FILE_ACCESS_ERROR) {
2266          log_data_err("Is your data around?\n");
2267          return;
2268        } else if(U_FAILURE(status)) {
2269          log_err("Error opening collator\n");
2270          return;
2271        }
2272        ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2273
2274        /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/
2275        /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/
2276        /*for (sLen = 1000; sLen<1001; sLen++) {*/
2277        for (sLen = 500; sLen<501; sLen++) {
2278        /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/
2279            strA[0] = baseA;
2280            strB[0] = baseA;
2281            for (i=1; i<=sLen-1; i++) {
2282                strA[i] = ccMix[i % 3];
2283                strB[sLen-i] = ccMix[i % 3];
2284            }
2285            strA[sLen]   = 0;
2286            strB[sLen]   = 0;
2287
2288            ucol_setStrength(coll, UCOL_TERTIARY);   /* Do test with default strength, which runs*/
2289            doTest(coll, strA, strB, UCOL_EQUAL);    /*   optimized functions in the impl*/
2290            ucol_setStrength(coll, UCOL_IDENTICAL);   /* Do again with the slow, general impl.*/
2291            doTest(coll, strA, strB, UCOL_EQUAL);
2292        }
2293    }
2294
2295    setTestOption(QUICK_OPTION, myQ);
2296
2297
2298    /*  Test 2:  Non-normal sequence in a string that extends to the last character*/
2299    /*         of the string.  Checks a couple of edge cases.*/
2300
2301    {
2302        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};
2303        static const UChar strB[] = {0x41, 0xc0, 0x316, 0};
2304        ucol_setStrength(coll, UCOL_TERTIARY);
2305        doTest(coll, strA, strB, UCOL_EQUAL);
2306    }
2307
2308    /*  Test 3:  Non-normal sequence is terminated by a surrogate pair.*/
2309
2310    {
2311      /* New UCA  3.1.1.
2312       * test below used a code point from Desseret, which sorts differently
2313       * than d800 dc00
2314       */
2315        /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
2316        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
2317        static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
2318        ucol_setStrength(coll, UCOL_TERTIARY);
2319        doTest(coll, strA, strB, UCOL_GREATER);
2320    }
2321
2322    /*  Test 4:  Imbedded nulls do not terminate a string when length is specified.*/
2323
2324    {
2325        static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};
2326        static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};
2327        char  sortKeyA[50];
2328        char  sortKeyAz[50];
2329        char  sortKeyB[50];
2330        char  sortKeyBz[50];
2331        int   r;
2332
2333        /* there used to be -3 here. Hmmmm.... */
2334        /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/
2335        result = ucol_strcoll(coll, strA, 3, strB, 3);
2336        if (result != UCOL_GREATER) {
2337            log_err("ERROR 1 in test 4\n");
2338        }
2339        result = ucol_strcoll(coll, strA, -1, strB, -1);
2340        if (result != UCOL_EQUAL) {
2341            log_err("ERROR 2 in test 4\n");
2342        }
2343
2344        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2345        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2346        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2347        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2348
2349        r = strcmp(sortKeyA, sortKeyAz);
2350        if (r <= 0) {
2351            log_err("Error 3 in test 4\n");
2352        }
2353        r = strcmp(sortKeyA, sortKeyB);
2354        if (r <= 0) {
2355            log_err("Error 4 in test 4\n");
2356        }
2357        r = strcmp(sortKeyAz, sortKeyBz);
2358        if (r != 0) {
2359            log_err("Error 5 in test 4\n");
2360        }
2361
2362        ucol_setStrength(coll, UCOL_IDENTICAL);
2363        ucol_getSortKey(coll, strA,  3, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2364        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2365        ucol_getSortKey(coll, strB,  3, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2366        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2367
2368        r = strcmp(sortKeyA, sortKeyAz);
2369        if (r <= 0) {
2370            log_err("Error 6 in test 4\n");
2371        }
2372        r = strcmp(sortKeyA, sortKeyB);
2373        if (r <= 0) {
2374            log_err("Error 7 in test 4\n");
2375        }
2376        r = strcmp(sortKeyAz, sortKeyBz);
2377        if (r != 0) {
2378            log_err("Error 8 in test 4\n");
2379        }
2380        ucol_setStrength(coll, UCOL_TERTIARY);
2381    }
2382
2383
2384    /*  Test 5:  Null characters in non-normal source strings.*/
2385
2386    {
2387        static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00};
2388        static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00};
2389        char  sortKeyA[50];
2390        char  sortKeyAz[50];
2391        char  sortKeyB[50];
2392        char  sortKeyBz[50];
2393        int   r;
2394
2395        result = ucol_strcoll(coll, strA, 6, strB, 6);
2396        if (result != UCOL_GREATER) {
2397            log_err("ERROR 1 in test 5\n");
2398        }
2399        result = ucol_strcoll(coll, strA, -1, strB, -1);
2400        if (result != UCOL_EQUAL) {
2401            log_err("ERROR 2 in test 5\n");
2402        }
2403
2404        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2405        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2406        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2407        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2408
2409        r = strcmp(sortKeyA, sortKeyAz);
2410        if (r <= 0) {
2411            log_err("Error 3 in test 5\n");
2412        }
2413        r = strcmp(sortKeyA, sortKeyB);
2414        if (r <= 0) {
2415            log_err("Error 4 in test 5\n");
2416        }
2417        r = strcmp(sortKeyAz, sortKeyBz);
2418        if (r != 0) {
2419            log_err("Error 5 in test 5\n");
2420        }
2421
2422        ucol_setStrength(coll, UCOL_IDENTICAL);
2423        ucol_getSortKey(coll, strA,  6, (uint8_t *)sortKeyA, sizeof(sortKeyA));
2424        ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz));
2425        ucol_getSortKey(coll, strB,  6, (uint8_t *)sortKeyB, sizeof(sortKeyB));
2426        ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz));
2427
2428        r = strcmp(sortKeyA, sortKeyAz);
2429        if (r <= 0) {
2430            log_err("Error 6 in test 5\n");
2431        }
2432        r = strcmp(sortKeyA, sortKeyB);
2433        if (r <= 0) {
2434            log_err("Error 7 in test 5\n");
2435        }
2436        r = strcmp(sortKeyAz, sortKeyBz);
2437        if (r != 0) {
2438            log_err("Error 8 in test 5\n");
2439        }
2440        ucol_setStrength(coll, UCOL_TERTIARY);
2441    }
2442
2443
2444    /*  Test 6:  Null character as base of a non-normal combining sequence.*/
2445
2446    {
2447        static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00};
2448        static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00};
2449
2450        result = ucol_strcoll(coll, strA, 5, strB, 5);
2451        if (result != UCOL_LESS) {
2452            log_err("Error 1 in test 6\n");
2453        }
2454        result = ucol_strcoll(coll, strA, -1, strB, -1);
2455        if (result != UCOL_EQUAL) {
2456            log_err("Error 2 in test 6\n");
2457        }
2458    }
2459
2460    ucol_close(coll);
2461}
2462
2463
2464
2465#if 0
2466static void TestGetCaseBit(void) {
2467  static const char *caseBitData[] = {
2468    "a", "A", "ch", "Ch", "CH",
2469      "\\uFF9E", "\\u0009"
2470  };
2471
2472  static const uint8_t results[] = {
2473    UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPPER_CASE,
2474      UCOL_UPPER_CASE, UCOL_LOWER_CASE
2475  };
2476
2477  uint32_t i, blen = 0;
2478  UChar b[256] = {0};
2479  UErrorCode status = U_ZERO_ERROR;
2480  UCollator *UCA = ucol_open("", &status);
2481  uint8_t res = 0;
2482
2483  for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {
2484    blen = u_unescape(caseBitData[i], b, 256);
2485    res = ucol_uprv_getCaseBits(UCA, b, blen, &status);
2486    if(results[i] != res) {
2487      log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]);
2488    }
2489  }
2490}
2491#endif
2492
2493static void TestHangulTailoring(void) {
2494    static const char *koreanData[] = {
2495        "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53ef", "\\u5475",
2496            "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\\u67b7", "\\u67ef",
2497            "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\\u8857", "\\u8888",
2498            "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",
2499            "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\\u659D", "\\u698E",
2500            "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\\u8B0C"
2501    };
2502
2503    const char *rules =
2504        "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<< \\u53ef <<< \\u5475 "
2505        "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "
2506        "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "
2507        "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "
2508        "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "
2509        "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";
2510
2511
2512  UErrorCode status = U_ZERO_ERROR;
2513  UChar rlz[2048] = { 0 };
2514  uint32_t rlen = u_unescape(rules, rlz, 2048);
2515
2516  UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
2517  if(status == U_FILE_ACCESS_ERROR) {
2518    log_data_err("Is your data around?\n");
2519    return;
2520  } else if(U_FAILURE(status)) {
2521    log_err("Error opening collator\n");
2522    return;
2523  }
2524
2525  log_verbose("Using start of korean rules\n");
2526
2527  if(U_SUCCESS(status)) {
2528    genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2529  } else {
2530    log_err("Unable to open collator with rules %s\n", rules);
2531  }
2532
2533  log_verbose("Setting jamoSpecial to TRUE and testing once more\n");
2534  ((UCATableHeader *)coll->image)->jamoSpecial = TRUE; /* don't try this at home  */
2535  genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2536
2537  ucol_close(coll);
2538
2539  log_verbose("Using ko__LOTUS locale\n");
2540  genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(koreanData[0]));
2541}
2542
2543static void TestCompressOverlap(void) {
2544    UChar       secstr[150];
2545    UChar       tertstr[150];
2546    UErrorCode  status = U_ZERO_ERROR;
2547    UCollator  *coll;
2548    char        result[200];
2549    uint32_t    resultlen;
2550    int         count = 0;
2551    char       *tempptr;
2552
2553    coll = ucol_open("", &status);
2554
2555    if (U_FAILURE(status)) {
2556        log_err_status(status, "Collator can't be created -> %s\n", u_errorName(status));
2557        return;
2558    }
2559    while (count < 149) {
2560        secstr[count] = 0x0020; /* [06, 05, 05] */
2561        tertstr[count] = 0x0020;
2562        count ++;
2563    }
2564
2565    /* top down compression ----------------------------------- */
2566    secstr[count] = 0x0332; /* [, 87, 05] */
2567    tertstr[count] = 0x3000; /* [06, 05, 07] */
2568
2569    /* no compression secstr should have 150 secondary bytes, tertstr should
2570    have 150 tertiary bytes.
2571    with correct overlapping compression, secstr should have 4 secondary
2572    bytes, tertstr should have > 2 tertiary bytes */
2573    resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2574    tempptr = uprv_strchr(result, 1) + 1;
2575    while (*(tempptr + 1) != 1) {
2576        /* the last secondary collation element is not checked since it is not
2577        part of the compression */
2578        if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {
2579            log_err("Secondary compression overlapped\n");
2580        }
2581        tempptr ++;
2582    }
2583
2584    /* tertiary top/bottom/common for en_US is similar to the secondary
2585    top/bottom/common */
2586    resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2587    tempptr = uprv_strrchr(result, 1) + 1;
2588    while (*(tempptr + 1) != 0) {
2589        /* the last secondary collation element is not checked since it is not
2590        part of the compression */
2591        if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {
2592            log_err("Tertiary compression overlapped\n");
2593        }
2594        tempptr ++;
2595    }
2596
2597    /* bottom up compression ------------------------------------- */
2598    secstr[count] = 0;
2599    tertstr[count] = 0;
2600    resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);
2601    tempptr = uprv_strchr(result, 1) + 1;
2602    while (*(tempptr + 1) != 1) {
2603        /* the last secondary collation element is not checked since it is not
2604        part of the compression */
2605        if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {
2606            log_err("Secondary compression overlapped\n");
2607        }
2608        tempptr ++;
2609    }
2610
2611    /* tertiary top/bottom/common for en_US is similar to the secondary
2612    top/bottom/common */
2613    resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);
2614    tempptr = uprv_strrchr(result, 1) + 1;
2615    while (*(tempptr + 1) != 0) {
2616        /* the last secondary collation element is not checked since it is not
2617        part of the compression */
2618        if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {
2619            log_err("Tertiary compression overlapped\n");
2620        }
2621        tempptr ++;
2622    }
2623
2624    ucol_close(coll);
2625}
2626
2627static void TestCyrillicTailoring(void) {
2628  static const char *test[] = {
2629    "\\u0410b",
2630      "\\u0410\\u0306a",
2631      "\\u04d0A"
2632  };
2633
2634    /* Russian overrides contractions, so this test is not valid anymore */
2635    /*genericLocaleStarter("ru", test, 3);*/
2636
2637    genericLocaleStarter("root", test, 3);
2638    genericRulesStarter("&\\u0410 = \\u0410", test, 3);
2639    genericRulesStarter("&Z < \\u0410", test, 3);
2640    genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);
2641    genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);
2642    genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);
2643    genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);
2644}
2645
2646static void TestSuppressContractions(void) {
2647
2648  static const char *testNoCont2[] = {
2649      "\\u0410\\u0302a",
2650      "\\u0410\\u0306b",
2651      "\\u0410c"
2652  };
2653  static const char *testNoCont[] = {
2654      "a\\u0410",
2655      "A\\u0410\\u0306",
2656      "\\uFF21\\u0410\\u0302"
2657  };
2658
2659  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3);
2660  genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3);
2661}
2662
2663static void TestContraction(void) {
2664    const static char *testrules[] = {
2665        "&A = AB / B",
2666        "&A = A\\u0306/\\u0306",
2667        "&c = ch / h"
2668    };
2669    const static UChar testdata[][2] = {
2670        {0x0041 /* 'A' */, 0x0042 /* 'B' */},
2671        {0x0041 /* 'A' */, 0x0306 /* combining breve */},
2672        {0x0063 /* 'c' */, 0x0068 /* 'h' */}
2673    };
2674    const static UChar testdata2[][2] = {
2675        {0x0063 /* 'c' */, 0x0067 /* 'g' */},
2676        {0x0063 /* 'c' */, 0x0068 /* 'h' */},
2677        {0x0063 /* 'c' */, 0x006C /* 'l' */}
2678    };
2679    const static char *testrules3[] = {
2680        "&z < xyz &xyzw << B",
2681        "&z < xyz &xyz << B / w",
2682        "&z < ch &achm << B",
2683        "&z < ch &a << B / chm",
2684        "&\\ud800\\udc00w << B",
2685        "&\\ud800\\udc00 << B / w",
2686        "&a\\ud800\\udc00m << B",
2687        "&a << B / \\ud800\\udc00m",
2688    };
2689
2690    UErrorCode  status   = U_ZERO_ERROR;
2691    UCollator  *coll;
2692    UChar       rule[256] = {0};
2693    uint32_t    rlen     = 0;
2694    int         i;
2695
2696    for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2697        UCollationElements *iter1;
2698        int j = 0;
2699        log_verbose("Rule %s for testing\n", testrules[i]);
2700        rlen = u_unescape(testrules[i], rule, 32);
2701        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2702        if (U_FAILURE(status)) {
2703            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2704            return;
2705        }
2706        iter1 = ucol_openElements(coll, testdata[i], 2, &status);
2707        if (U_FAILURE(status)) {
2708            log_err("Collation iterator creation failed\n");
2709            return;
2710        }
2711        while (j < 2) {
2712            UCollationElements *iter2 = ucol_openElements(coll,
2713                                                         &(testdata[i][j]),
2714                                                         1, &status);
2715            uint32_t ce;
2716            if (U_FAILURE(status)) {
2717                log_err("Collation iterator creation failed\n");
2718                return;
2719            }
2720            ce = ucol_next(iter2, &status);
2721            while (ce != UCOL_NULLORDER) {
2722                if ((uint32_t)ucol_next(iter1, &status) != ce) {
2723                    log_err("Collation elements in contraction split does not match\n");
2724                    return;
2725                }
2726                ce = ucol_next(iter2, &status);
2727            }
2728            j ++;
2729            ucol_closeElements(iter2);
2730        }
2731        if (ucol_next(iter1, &status) != UCOL_NULLORDER) {
2732            log_err("Collation elements not exhausted\n");
2733            return;
2734        }
2735        ucol_closeElements(iter1);
2736        ucol_close(coll);
2737    }
2738
2739    rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);
2740    coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2741    if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {
2742        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2743                testdata2[0][0], testdata2[0][1], testdata2[1][0],
2744                testdata2[1][1]);
2745        return;
2746    }
2747    if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {
2748        log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",
2749                testdata2[1][0], testdata2[1][1], testdata2[2][0],
2750                testdata2[2][1]);
2751        return;
2752    }
2753    ucol_close(coll);
2754
2755    for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {
2756        UCollator          *coll1,
2757                           *coll2;
2758        UCollationElements *iter1,
2759                           *iter2;
2760        UChar               ch = 0x0042 /* 'B' */;
2761        uint32_t            ce;
2762        rlen = u_unescape(testrules3[i], rule, 32);
2763        coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2764        rlen = u_unescape(testrules3[i + 1], rule, 32);
2765        coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2766        if (U_FAILURE(status)) {
2767            log_err("Collator creation failed %s\n", testrules[i]);
2768            return;
2769        }
2770        iter1 = ucol_openElements(coll1, &ch, 1, &status);
2771        iter2 = ucol_openElements(coll2, &ch, 1, &status);
2772        if (U_FAILURE(status)) {
2773            log_err("Collation iterator creation failed\n");
2774            return;
2775        }
2776        ce = ucol_next(iter1, &status);
2777        if (U_FAILURE(status)) {
2778            log_err("Retrieving ces failed\n");
2779            return;
2780        }
2781        while (ce != UCOL_NULLORDER) {
2782            if (ce != (uint32_t)ucol_next(iter2, &status)) {
2783                log_err("CEs does not match\n");
2784                return;
2785            }
2786            ce = ucol_next(iter1, &status);
2787            if (U_FAILURE(status)) {
2788                log_err("Retrieving ces failed\n");
2789                return;
2790            }
2791        }
2792        if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
2793            log_err("CEs not exhausted\n");
2794            return;
2795        }
2796        ucol_closeElements(iter1);
2797        ucol_closeElements(iter2);
2798        ucol_close(coll1);
2799        ucol_close(coll2);
2800    }
2801}
2802
2803static void TestExpansion(void) {
2804    const static char *testrules[] = {
2805        "&J << K / B & K << M",
2806        "&J << K / B << M"
2807    };
2808    const static UChar testdata[][3] = {
2809        {0x004A /*'J'*/, 0x0041 /*'A'*/, 0},
2810        {0x004D /*'M'*/, 0x0041 /*'A'*/, 0},
2811        {0x004B /*'K'*/, 0x0041 /*'A'*/, 0},
2812        {0x004B /*'K'*/, 0x0043 /*'C'*/, 0},
2813        {0x004A /*'J'*/, 0x0043 /*'C'*/, 0},
2814        {0x004D /*'M'*/, 0x0043 /*'C'*/, 0}
2815    };
2816
2817    UErrorCode  status   = U_ZERO_ERROR;
2818    UCollator  *coll;
2819    UChar       rule[256] = {0};
2820    uint32_t    rlen     = 0;
2821    int         i;
2822
2823    for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {
2824        int j = 0;
2825        log_verbose("Rule %s for testing\n", testrules[i]);
2826        rlen = u_unescape(testrules[i], rule, 32);
2827        coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);
2828        if (U_FAILURE(status)) {
2829            log_err_status(status, "Collator creation failed %s -> %s\n", testrules[i], u_errorName(status));
2830            return;
2831        }
2832
2833        for (j = 0; j < 5; j ++) {
2834            doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);
2835        }
2836        ucol_close(coll);
2837    }
2838}
2839
2840#if 0
2841/* this test tests the current limitations of the engine */
2842/* it always fail, so it is disabled by default */
2843static void TestLimitations(void) {
2844  /* recursive expansions */
2845  {
2846    static const char *rule = "&a=b/c&d=c/e";
2847    static const char *tlimit01[] = {"add","b","adf"};
2848    static const char *tlimit02[] = {"aa","b","af"};
2849    log_verbose("recursive expansions\n");
2850    genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2851    genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2852  }
2853  /* contractions spanning expansions */
2854  {
2855    static const char *rule = "&a<<<c/e&g<<<eh";
2856    static const char *tlimit01[] = {"ad","c","af","f","ch","h"};
2857    static const char *tlimit02[] = {"ad","c","ch","af","f","h"};
2858    log_verbose("contractions spanning expansions\n");
2859    genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));
2860    genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));
2861  }
2862  /* normalization: nulls in contractions */
2863  {
2864    static const char *rule = "&a<<<\\u0000\\u0302";
2865    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2866    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2867    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2868    static const UColAttributeValue valOn[] = { UCOL_ON };
2869    static const UColAttributeValue valOff[] = { UCOL_OFF };
2870
2871    log_verbose("NULL in contractions\n");
2872    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2873    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2874    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2875    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2876
2877  }
2878  /* normalization: contractions spanning normalization */
2879  {
2880    static const char *rule = "&a<<<\\u0000\\u0302";
2881    static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};
2882    static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};
2883    static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };
2884    static const UColAttributeValue valOn[] = { UCOL_ON };
2885    static const UColAttributeValue valOff[] = { UCOL_OFF };
2886
2887    log_verbose("contractions spanning normalization\n");
2888    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);
2889    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);
2890    genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);
2891    genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);
2892
2893  }
2894  /* variable top:  */
2895  {
2896    /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/
2897    static const char *rule = "&\\u2010<x<[variable top]=z";
2898    /*static const char *rule3 = "&' '<x<[variable top]=z";*/
2899    static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" };
2900    static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};
2901    static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" };
2902    static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };
2903    static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };
2904    static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIARY };
2905
2906    log_verbose("variable top\n");
2907    genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimit03[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2908    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2909    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2910    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2911    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));
2912
2913  }
2914  /* case level */
2915  {
2916    static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";
2917    static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};
2918    static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};
2919    static const UColAttribute att[] = { UCOL_CASE_FIRST};
2920    static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};
2921    /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/
2922    log_verbose("case level\n");
2923    genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2924    genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOn, sizeof(att)/sizeof(att[0]));
2925    /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2926    /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/
2927  }
2928
2929}
2930#endif
2931
2932static void TestBocsuCoverage(void) {
2933  UErrorCode status = U_ZERO_ERROR;
2934  const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u0041";
2935  UChar       test[256] = {0};
2936  uint32_t    tlen     = u_unescape(testString, test, 32);
2937  uint8_t key[256]     = {0};
2938  uint32_t klen         = 0;
2939
2940  UCollator *coll = ucol_open("", &status);
2941  if(U_SUCCESS(status)) {
2942  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
2943
2944  klen = ucol_getSortKey(coll, test, tlen, key, 256);
2945
2946  ucol_close(coll);
2947  } else {
2948    log_data_err("Couldn't open UCA\n");
2949  }
2950}
2951
2952static void TestVariableTopSetting(void) {
2953  UErrorCode status = U_ZERO_ERROR;
2954  const UChar *current = NULL;
2955  uint32_t varTopOriginal = 0, varTop1, varTop2;
2956  UCollator *coll = ucol_open("", &status);
2957  if(U_SUCCESS(status)) {
2958
2959  uint32_t strength = 0;
2960  uint16_t specs = 0;
2961  uint32_t chOffset = 0;
2962  uint32_t chLen = 0;
2963  uint32_t exOffset = 0;
2964  uint32_t exLen = 0;
2965  uint32_t oldChOffset = 0;
2966  uint32_t oldChLen = 0;
2967  uint32_t oldExOffset = 0;
2968  uint32_t oldExLen = 0;
2969  uint32_t prefixOffset = 0;
2970  uint32_t prefixLen = 0;
2971
2972  UBool startOfRules = TRUE;
2973  UColTokenParser src;
2974  UColOptionSet opts;
2975
2976  UChar *rulesCopy = NULL;
2977  uint32_t rulesLen;
2978
2979  UCollationResult result;
2980
2981  UChar first[256] = { 0 };
2982  UChar second[256] = { 0 };
2983  UParseError parseError;
2984  int32_t myQ = getTestOption(QUICK_OPTION);
2985
2986  uprv_memset(&src, 0, sizeof(UColTokenParser));
2987
2988  src.opts = &opts;
2989
2990  if(getTestOption(QUICK_OPTION) <= 0) {
2991    setTestOption(QUICK_OPTION, 1);
2992  }
2993
2994  /* this test will fail when normalization is turned on */
2995  /* therefore we always turn off exhaustive mode for it */
2996  { /* QUICK > 0*/
2997    log_verbose("Slide variable top over UCARules\n");
2998    rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);
2999    rulesCopy = (UChar *)uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
3000    rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE);
3001
3002    if(U_SUCCESS(status) && rulesLen > 0) {
3003      ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
3004      src.current = src.source = rulesCopy;
3005      src.end = rulesCopy+rulesLen;
3006      src.extraCurrent = src.end;
3007      src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
3008
3009	  /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
3010	   the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
3011      while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
3012        strength = src.parsedToken.strength;
3013        chOffset = src.parsedToken.charsOffset;
3014        chLen = src.parsedToken.charsLen;
3015        exOffset = src.parsedToken.extensionOffset;
3016        exLen = src.parsedToken.extensionLen;
3017        prefixOffset = src.parsedToken.prefixOffset;
3018        prefixLen = src.parsedToken.prefixLen;
3019        specs = src.parsedToken.flags;
3020
3021        startOfRules = FALSE;
3022        {
3023          log_verbose("%04X %d ", *(src.source+chOffset), chLen);
3024        }
3025        if(strength == UCOL_PRIMARY) {
3026          status = U_ZERO_ERROR;
3027          varTopOriginal = ucol_getVariableTop(coll, &status);
3028          varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);
3029          if(U_FAILURE(status)) {
3030            char buffer[256];
3031            char *buf = buffer;
3032            uint32_t i = 0, j;
3033            uint32_t CE = UCOL_NO_MORE_CES;
3034
3035            /* before we start screaming, let's see if there is a problem with the rules */
3036            UErrorCode collIterateStatus = U_ZERO_ERROR;
3037            collIterate *s = uprv_new_collIterate(&collIterateStatus);
3038            uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &collIterateStatus);
3039
3040            CE = ucol_getNextCE(coll, s, &status);
3041
3042            for(i = 0; i < oldChLen; i++) {
3043              j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));
3044              buf += j;
3045            }
3046            if(status == U_PRIMARY_TOO_LONG_ERROR) {
3047              log_verbose("= Expected failure for %s =", buffer);
3048            } else {
3049              if(uprv_collIterateAtEnd(s)) {
3050                log_err("Unexpected failure setting variable top at offset %d. Error %s. Codepoints: %s\n",
3051                  oldChOffset, u_errorName(status), buffer);
3052              } else {
3053                log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",
3054                  buffer);
3055              }
3056            }
3057            uprv_delete_collIterate(s);
3058          }
3059          varTop2 = ucol_getVariableTop(coll, &status);
3060          if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {
3061            log_err("cannot retrieve set varTop value!\n");
3062            continue;
3063          }
3064
3065          if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {
3066
3067            u_strncpy(first, src.source+oldChOffset, oldChLen);
3068            u_strncpy(first+oldChLen, src.source+chOffset, chLen);
3069            u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);
3070            first[2*oldChLen+chLen] = 0;
3071
3072            if(oldExLen == 0) {
3073              u_strncpy(second, src.source+chOffset, chLen);
3074              second[chLen] = 0;
3075            } else { /* This is skipped momentarily, but should work once UCARules are fully UCA conformant */
3076              u_strncpy(second, src.source+oldExOffset, oldExLen);
3077              u_strncpy(second+oldChLen, src.source+chOffset, chLen);
3078              u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen);
3079              second[2*oldExLen+chLen] = 0;
3080            }
3081            result = ucol_strcoll(coll, first, -1, second, -1);
3082            if(result == UCOL_EQUAL) {
3083              doTest(coll, first, second, UCOL_EQUAL);
3084            } else {
3085              log_verbose("Suspicious strcoll result for %04X and %04X\n", *(src.source+oldChOffset), *(src.source+chOffset));
3086            }
3087          }
3088        }
3089        if(strength != UCOL_TOK_RESET) {
3090          oldChOffset = chOffset;
3091          oldChLen = chLen;
3092          oldExOffset = exOffset;
3093          oldExLen = exLen;
3094        }
3095      }
3096      status = U_ZERO_ERROR;
3097    }
3098    else {
3099      log_err("Unexpected failure getting rules %s\n", u_errorName(status));
3100      return;
3101    }
3102    if (U_FAILURE(status)) {
3103        log_err("Error parsing rules %s\n", u_errorName(status));
3104        return;
3105    }
3106    status = U_ZERO_ERROR;
3107  }
3108
3109  setTestOption(QUICK_OPTION, myQ);
3110
3111  log_verbose("Testing setting variable top to contractions\n");
3112  {
3113    /* uint32_t tailoredCE = UCOL_NOT_FOUND; */
3114    /*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));*/
3115    UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
3116    while(*conts != 0) {
3117      if((*(conts+2) == 0) || (*(conts+1)==0)) { /* contracts or pre-context contractions */
3118        varTop1 = ucol_setVariableTop(coll, conts, -1, &status);
3119      } else {
3120        varTop1 = ucol_setVariableTop(coll, conts, 3, &status);
3121      }
3122      if(U_FAILURE(status)) {
3123        if(status == U_PRIMARY_TOO_LONG_ERROR) {
3124          /* ucol_setVariableTop() is documented to not accept 3-byte primaries,
3125           * therefore it is not an error when it complains about them. */
3126          log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",
3127                      *conts, *(conts+1), *(conts+2));
3128        } else {
3129          log_err("Couldn't set variable top to a contraction %04X %04X %04X - %s\n",
3130                  *conts, *(conts+1), *(conts+2), u_errorName(status));
3131        }
3132        status = U_ZERO_ERROR;
3133      }
3134      conts+=3;
3135    }
3136
3137    status = U_ZERO_ERROR;
3138
3139    first[0] = 0x0040;
3140    first[1] = 0x0050;
3141    first[2] = 0x0000;
3142
3143    ucol_setVariableTop(coll, first, -1, &status);
3144
3145    if(U_SUCCESS(status)) {
3146      log_err("Invalid contraction succeded in setting variable top!\n");
3147    }
3148
3149  }
3150
3151  log_verbose("Test restoring variable top\n");
3152
3153  status = U_ZERO_ERROR;
3154  ucol_restoreVariableTop(coll, varTopOriginal, &status);
3155  if(varTopOriginal != ucol_getVariableTop(coll, &status)) {
3156    log_err("Couldn't restore old variable top\n");
3157  }
3158
3159  log_verbose("Testing calling with error set\n");
3160
3161  status = U_INTERNAL_PROGRAM_ERROR;
3162  varTop1 = ucol_setVariableTop(coll, first, 1, &status);
3163  varTop2 = ucol_getVariableTop(coll, &status);
3164  ucol_restoreVariableTop(coll, varTop2, &status);
3165  varTop1 = ucol_setVariableTop(NULL, first, 1, &status);
3166  varTop2 = ucol_getVariableTop(NULL, &status);
3167  ucol_restoreVariableTop(NULL, varTop2, &status);
3168  if(status != U_INTERNAL_PROGRAM_ERROR) {
3169    log_err("Bad reaction to passed error!\n");
3170  }
3171  uprv_free(src.source);
3172  ucol_close(coll);
3173  } else {
3174    log_data_err("Couldn't open UCA collator\n");
3175  }
3176
3177}
3178
3179static void TestNonChars(void) {
3180  static const char *test[] = {
3181      "\\u0000",  /* ignorable */
3182      "\\uFFFE",  /* special merge-sort character with minimum non-ignorable weights */
3183      "\\uFDD0", "\\uFDEF",
3184      "\\U0001FFFE", "\\U0001FFFF",  /* UCA 6.0: noncharacters are treated like unassigned, */
3185      "\\U0002FFFE", "\\U0002FFFF",  /* not like ignorable. */
3186      "\\U0003FFFE", "\\U0003FFFF",
3187      "\\U0004FFFE", "\\U0004FFFF",
3188      "\\U0005FFFE", "\\U0005FFFF",
3189      "\\U0006FFFE", "\\U0006FFFF",
3190      "\\U0007FFFE", "\\U0007FFFF",
3191      "\\U0008FFFE", "\\U0008FFFF",
3192      "\\U0009FFFE", "\\U0009FFFF",
3193      "\\U000AFFFE", "\\U000AFFFF",
3194      "\\U000BFFFE", "\\U000BFFFF",
3195      "\\U000CFFFE", "\\U000CFFFF",
3196      "\\U000DFFFE", "\\U000DFFFF",
3197      "\\U000EFFFE", "\\U000EFFFF",
3198      "\\U000FFFFE", "\\U000FFFFF",
3199      "\\U0010FFFE", "\\U0010FFFF",
3200      "\\uFFFF"  /* special character with maximum primary weight */
3201  };
3202  UErrorCode status = U_ZERO_ERROR;
3203  UCollator *coll = ucol_open("en_US", &status);
3204
3205  log_verbose("Test non characters\n");
3206
3207  if(U_SUCCESS(status)) {
3208    genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);
3209  } else {
3210    log_err_status(status, "Unable to open collator\n");
3211  }
3212
3213  ucol_close(coll);
3214}
3215
3216static void TestExtremeCompression(void) {
3217  static char *test[4];
3218  int32_t j = 0, i = 0;
3219
3220  for(i = 0; i<4; i++) {
3221    test[i] = (char *)malloc(2048*sizeof(char));
3222  }
3223
3224  for(j = 20; j < 500; j++) {
3225    for(i = 0; i<4; i++) {
3226      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3227      test[i][j-1] = (char)('a'+i);
3228      test[i][j] = 0;
3229    }
3230    genericLocaleStarter("en_US", (const char **)test, 4);
3231  }
3232
3233
3234  for(i = 0; i<4; i++) {
3235    free(test[i]);
3236  }
3237}
3238
3239#if 0
3240static void TestExtremeCompression(void) {
3241  static char *test[4];
3242  int32_t j = 0, i = 0;
3243  UErrorCode status = U_ZERO_ERROR;
3244  UCollator *coll = ucol_open("en_US", status);
3245  for(i = 0; i<4; i++) {
3246    test[i] = (char *)malloc(2048*sizeof(char));
3247  }
3248  for(j = 10; j < 2048; j++) {
3249    for(i = 0; i<4; i++) {
3250      uprv_memset(test[i], 'a', (j-2)*sizeof(char));
3251      test[i][j-1] = (char)('a'+i);
3252      test[i][j] = 0;
3253    }
3254  }
3255  genericLocaleStarter("en_US", (const char **)test, 4);
3256
3257  for(j = 10; j < 2048; j++) {
3258    for(i = 0; i<1; i++) {
3259      uprv_memset(test[i], 'a', (j-1)*sizeof(char));
3260      test[i][j] = 0;
3261    }
3262  }
3263  for(i = 0; i<4; i++) {
3264    free(test[i]);
3265  }
3266}
3267#endif
3268
3269static void TestSurrogates(void) {
3270  static const char *test[] = {
3271    "z","\\ud900\\udc25",  "\\ud805\\udc50",
3272       "\\ud800\\udc00y",  "\\ud800\\udc00r",
3273       "\\ud800\\udc00f",  "\\ud800\\udc00",
3274       "\\ud800\\udc00c", "\\ud800\\udc00b",
3275       "\\ud800\\udc00fa", "\\ud800\\udc00fb",
3276       "\\ud800\\udc00a",
3277       "c", "b"
3278  };
3279
3280  static const char *rule =
3281    "&z < \\ud900\\udc25   < \\ud805\\udc50"
3282       "< \\ud800\\udc00y  < \\ud800\\udc00r"
3283       "< \\ud800\\udc00f  << \\ud800\\udc00"
3284       "< \\ud800\\udc00fa << \\ud800\\udc00fb"
3285       "< \\ud800\\udc00a  < c < b" ;
3286
3287  genericRulesStarter(rule, test, 14);
3288}
3289
3290/* This is a test for prefix implementation, used by JIS X 4061 collation rules */
3291static void TestPrefix(void) {
3292  uint32_t i;
3293
3294  static const struct {
3295    const char *rules;
3296    const char *data[50];
3297    const uint32_t len;
3298  } tests[] = {
3299    { "&z <<< z|a",
3300      {"zz", "za"}, 2 },
3301
3302    { "&z <<< z|   a",
3303      {"zz", "za"}, 2 },
3304    { "[strength I]"
3305      "&a=\\ud900\\udc25"
3306      "&z<<<\\ud900\\udc25|a",
3307      {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },
3308  };
3309
3310
3311  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3312    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3313  }
3314}
3315
3316/* This test uses data suplied by Masashiko Maedera to test the implementation */
3317/* JIS X 4061 collation order implementation                                   */
3318static void TestNewJapanese(void) {
3319
3320  static const char * const test1[] = {
3321      "\\u30b7\\u30e3\\u30fc\\u30ec",
3322      "\\u30b7\\u30e3\\u30a4",
3323      "\\u30b7\\u30e4\\u30a3",
3324      "\\u30b7\\u30e3\\u30ec",
3325      "\\u3061\\u3087\\u3053",
3326      "\\u3061\\u3088\\u3053",
3327      "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",
3328      "\\u3066\\u30fc\\u305f",
3329      "\\u30c6\\u30fc\\u30bf",
3330      "\\u30c6\\u30a7\\u30bf",
3331      "\\u3066\\u3048\\u305f",
3332      "\\u3067\\u30fc\\u305f",
3333      "\\u30c7\\u30fc\\u30bf",
3334      "\\u30c7\\u30a7\\u30bf",
3335      "\\u3067\\u3048\\u305f",
3336      "\\u3066\\u30fc\\u305f\\u30fc",
3337      "\\u30c6\\u30fc\\u30bf\\u30a1",
3338      "\\u30c6\\u30a7\\u30bf\\u30fc",
3339      "\\u3066\\u3047\\u305f\\u3041",
3340      "\\u3066\\u3048\\u305f\\u30fc",
3341      "\\u3067\\u30fc\\u305f\\u30fc",
3342      "\\u30c7\\u30fc\\u30bf\\u30a1",
3343      "\\u3067\\u30a7\\u305f\\u30a1",
3344      "\\u30c7\\u3047\\u30bf\\u3041",
3345      "\\u30c7\\u30a8\\u30bf\\u30a2",
3346      "\\u3072\\u3086",
3347      "\\u3073\\u3085\\u3042",
3348      "\\u3074\\u3085\\u3042",
3349      "\\u3073\\u3085\\u3042\\u30fc",
3350      "\\u30d3\\u30e5\\u30a2\\u30fc",
3351      "\\u3074\\u3085\\u3042\\u30fc",
3352      "\\u30d4\\u30e5\\u30a2\\u30fc",
3353      "\\u30d2\\u30e5\\u30a6",
3354      "\\u30d2\\u30e6\\u30a6",
3355      "\\u30d4\\u30e5\\u30a6\\u30a2",
3356      "\\u3073\\u3085\\u30fc\\u3042\\u30fc",
3357      "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",
3358      "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",
3359      "\\u3072\\u3085\\u3093",
3360      "\\u3074\\u3085\\u3093",
3361      "\\u3075\\u30fc\\u308a",
3362      "\\u30d5\\u30fc\\u30ea",
3363      "\\u3075\\u3045\\u308a",
3364      "\\u3075\\u30a5\\u308a",
3365      "\\u3075\\u30a5\\u30ea",
3366      "\\u30d5\\u30a6\\u30ea",
3367      "\\u3076\\u30fc\\u308a",
3368      "\\u30d6\\u30fc\\u30ea",
3369      "\\u3076\\u3045\\u308a",
3370      "\\u30d6\\u30a5\\u308a",
3371      "\\u3077\\u3046\\u308a",
3372      "\\u30d7\\u30a6\\u30ea",
3373      "\\u3075\\u30fc\\u308a\\u30fc",
3374      "\\u30d5\\u30a5\\u30ea\\u30fc",
3375      "\\u3075\\u30a5\\u308a\\u30a3",
3376      "\\u30d5\\u3045\\u308a\\u3043",
3377      "\\u30d5\\u30a6\\u30ea\\u30fc",
3378      "\\u3075\\u3046\\u308a\\u3043",
3379      "\\u30d6\\u30a6\\u30ea\\u30a4",
3380      "\\u3077\\u30fc\\u308a\\u30fc",
3381      "\\u3077\\u30a5\\u308a\\u30a4",
3382      "\\u3077\\u3046\\u308a\\u30fc",
3383      "\\u30d7\\u30a6\\u30ea\\u30a4",
3384      "\\u30d5\\u30fd",
3385      "\\u3075\\u309e",
3386      "\\u3076\\u309d",
3387      "\\u3076\\u3075",
3388      "\\u3076\\u30d5",
3389      "\\u30d6\\u3075",
3390      "\\u30d6\\u30d5",
3391      "\\u3076\\u309e",
3392      "\\u3076\\u3077",
3393      "\\u30d6\\u3077",
3394      "\\u3077\\u309d",
3395      "\\u30d7\\u30fd",
3396      "\\u3077\\u3075",
3397};
3398
3399  static const char *test2[] = {
3400    "\\u306f\\u309d", /* H\\u309d */
3401    "\\u30cf\\u30fd", /* K\\u30fd */
3402    "\\u306f\\u306f", /* HH */
3403    "\\u306f\\u30cf", /* HK */
3404    "\\u30cf\\u30cf", /* KK */
3405    "\\u306f\\u309e", /* H\\u309e */
3406    "\\u30cf\\u30fe", /* K\\u30fe */
3407    "\\u306f\\u3070", /* HH\\u309b */
3408    "\\u30cf\\u30d0", /* KK\\u309b */
3409    "\\u306f\\u3071", /* HH\\u309c */
3410    "\\u30cf\\u3071", /* KH\\u309c */
3411    "\\u30cf\\u30d1", /* KK\\u309c */
3412    "\\u3070\\u309d", /* H\\u309b\\u309d */
3413    "\\u30d0\\u30fd", /* K\\u309b\\u30fd */
3414    "\\u3070\\u306f", /* H\\u309bH */
3415    "\\u30d0\\u30cf", /* K\\u309bK */
3416    "\\u3070\\u309e", /* H\\u309b\\u309e */
3417    "\\u30d0\\u30fe", /* K\\u309b\\u30fe */
3418    "\\u3070\\u3070", /* H\\u309bH\\u309b */
3419    "\\u30d0\\u3070", /* K\\u309bH\\u309b */
3420    "\\u30d0\\u30d0", /* K\\u309bK\\u309b */
3421    "\\u3070\\u3071", /* H\\u309bH\\u309c */
3422    "\\u30d0\\u30d1", /* K\\u309bK\\u309c */
3423    "\\u3071\\u309d", /* H\\u309c\\u309d */
3424    "\\u30d1\\u30fd", /* K\\u309c\\u30fd */
3425    "\\u3071\\u306f", /* H\\u309cH */
3426    "\\u30d1\\u30cf", /* K\\u309cK */
3427    "\\u3071\\u3070", /* H\\u309cH\\u309b */
3428    "\\u3071\\u30d0", /* H\\u309cK\\u309b */
3429    "\\u30d1\\u30d0", /* K\\u309cK\\u309b */
3430    "\\u3071\\u3071", /* H\\u309cH\\u309c */
3431    "\\u30d1\\u30d1", /* K\\u309cK\\u309c */
3432  };
3433  /*
3434  static const char *test3[] = {
3435    "\\u221er\\u221e",
3436    "\\u221eR#",
3437    "\\u221et\\u221e",
3438    "#r\\u221e",
3439    "#R#",
3440    "#t%",
3441    "#T%",
3442    "8t\\u221e",
3443    "8T\\u221e",
3444    "8t#",
3445    "8T#",
3446    "8t%",
3447    "8T%",
3448    "8t8",
3449    "8T8",
3450    "\\u03c9r\\u221e",
3451    "\\u03a9R%",
3452    "rr\\u221e",
3453    "rR\\u221e",
3454    "Rr\\u221e",
3455    "RR\\u221e",
3456    "RT%",
3457    "rt8",
3458    "tr\\u221e",
3459    "tr8",
3460    "TR8",
3461    "tt8",
3462    "\\u30b7\\u30e3\\u30fc\\u30ec",
3463  };
3464  */
3465  static const UColAttribute att[] = { UCOL_STRENGTH };
3466  static const UColAttributeValue val[] = { UCOL_QUATERNARY };
3467
3468  static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HANDLING};
3469  static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };
3470
3471  genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), att, val, 1);
3472  genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), att, val, 1);
3473  /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/
3474  genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), attShifted, valShifted, 2);
3475  genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), attShifted, valShifted, 2);
3476}
3477
3478static void TestStrCollIdenticalPrefix(void) {
3479  const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";
3480  const char* test[] = {
3481    "ab\\ud9b0\\udc70",
3482    "ab\\ud9b0\\udc71"
3483  };
3484  genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_EQUAL);
3485}
3486/* Contractions should have all their canonically equivalent */
3487/* strings included */
3488static void TestContractionClosure(void) {
3489  static const struct {
3490    const char *rules;
3491    const char *data[10];
3492    const uint32_t len;
3493  } tests[] = {
3494    {   "&b=\\u00e4\\u00e4",
3495      { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\\u00e4" }, 5},
3496    {   "&b=\\u00C5",
3497      { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},
3498  };
3499  uint32_t i;
3500
3501
3502  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3503    genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, UCOL_EQUAL);
3504  }
3505}
3506
3507/* This tests also fails*/
3508static void TestBeforePrefixFailure(void) {
3509  static const struct {
3510    const char *rules;
3511    const char *data[10];
3512    const uint32_t len;
3513  } tests[] = {
3514    { "&g <<< a"
3515      "&[before 3]\\uff41 <<< x",
3516      {"x", "\\uff41"}, 2 },
3517    {   "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3518        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3519        "&[before 3]\\u30a7<<<\\u30a9",
3520      {"\\u30a9", "\\u30a7"}, 2 },
3521    {   "&[before 3]\\u30a7<<<\\u30a9"
3522        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3523        "&\\u30A8=\\u30A8=\\u3048=\\uff74",
3524      {"\\u30a9", "\\u30a7"}, 2 },
3525  };
3526  uint32_t i;
3527
3528
3529  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3530    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3531  }
3532
3533#if 0
3534  const char* rule1 =
3535        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3536        "&\\u30A8=\\u30A8=\\u3048=\\uff74"
3537        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc";
3538  const char* rule2 =
3539        "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"
3540        "&\\u30A7=\\u30A7=\\u3047=\\uff6a"
3541        "&\\u30A8=\\u30A8=\\u3048=\\uff74";
3542  const char* test[] = {
3543      "\\u30c6\\u30fc\\u30bf",
3544      "\\u30c6\\u30a7\\u30bf",
3545  };
3546  genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));
3547  genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));
3548/* this piece of code should be in some sort of verbose mode     */
3549/* it gets the collation elements for elements and prints them   */
3550/* This is useful when trying to see whether the problem is      */
3551  {
3552    UErrorCode status = U_ZERO_ERROR;
3553    uint32_t i = 0;
3554    UCollationElements *it = NULL;
3555    uint32_t CE;
3556    UChar string[256];
3557    uint32_t uStringLen;
3558    UCollator *coll = NULL;
3559
3560    uStringLen = u_unescape(rule1, string, 256);
3561
3562    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3563
3564    /*coll = ucol_open("ja_JP_JIS", &status);*/
3565    it = ucol_openElements(coll, string, 0, &status);
3566
3567    for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {
3568      log_verbose("%s\n", test[i]);
3569      uStringLen = u_unescape(test[i], string, 256);
3570      ucol_setText(it, string, uStringLen, &status);
3571
3572      while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {
3573        log_verbose("%08X\n", CE);
3574      }
3575      log_verbose("\n");
3576
3577    }
3578
3579    ucol_closeElements(it);
3580    ucol_close(coll);
3581  }
3582#endif
3583}
3584
3585static void TestPrefixCompose(void) {
3586  const char* rule1 =
3587        "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc";
3588  /*
3589  const char* test[] = {
3590      "\\u30c6\\u30fc\\u30bf",
3591      "\\u30c6\\u30a7\\u30bf",
3592  };
3593  */
3594  {
3595    UErrorCode status = U_ZERO_ERROR;
3596    /*uint32_t i = 0;*/
3597    /*UCollationElements *it = NULL;*/
3598/*    uint32_t CE;*/
3599    UChar string[256];
3600    uint32_t uStringLen;
3601    UCollator *coll = NULL;
3602
3603    uStringLen = u_unescape(rule1, string, 256);
3604
3605    coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
3606    ucol_close(coll);
3607  }
3608
3609
3610}
3611
3612/*
3613[last variable] last variable value
3614[last primary ignorable] largest CE for primary ignorable
3615[last secondary ignorable] largest CE for secondary ignorable
3616[last tertiary ignorable] largest CE for tertiary ignorable
3617[top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8)
3618*/
3619
3620static void TestRuleOptions(void) {
3621  /* values here are hardcoded and are correct for the current UCA
3622   * when the UCA changes, one might be forced to change these
3623   * values.
3624   */
3625
3626  /*
3627   * These strings contain the last character before [variable top]
3628   * and the first and second characters (by primary weights) after it.
3629   * See FractionalUCA.txt. For example:
3630      [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR
3631      [variable top = 0C FE]
3632      [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT
3633     and
3634      00B4; [0D 0C, 05, 05]
3635   *
3636   * Note: Starting with UCA 6.0, the [variable top] collation element
3637   * is not the weight of any character or string,
3638   * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].
3639   */
3640#define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"
3641#define FIRST_REGULAR_CHAR_STRING "\\u0060"
3642#define SECOND_REGULAR_CHAR_STRING "\\u00B4"
3643
3644  /*
3645   * This string has to match the character that has the [last regular] weight
3646   * which changes with each UCA version.
3647   * See the bottom of FractionalUCA.txt which says something like
3648      [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032
3649   *
3650   * Note: Starting with UCA 6.0, the [last regular] collation element
3651   * is not the weight of any character or string,
3652   * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].
3653   */
3654#define LAST_REGULAR_CHAR_STRING "\\U0001342E"
3655
3656  static const struct {
3657    const char *rules;
3658    const char *data[10];
3659    const uint32_t len;
3660  } tests[] = {
3661    /* - all befores here amount to zero */
3662    { "&[before 3][first tertiary ignorable]<<<a",
3663        { "\\u0000", "a"}, 2
3664    }, /* you cannot go before first tertiary ignorable */
3665
3666    { "&[before 3][last tertiary ignorable]<<<a",
3667        { "\\u0000", "a"}, 2
3668    }, /* you cannot go before last tertiary ignorable */
3669
3670    { "&[before 3][first secondary ignorable]<<<a",
3671        { "\\u0000", "a"}, 2
3672    }, /* you cannot go before first secondary ignorable */
3673
3674    { "&[before 3][last secondary ignorable]<<<a",
3675        { "\\u0000", "a"}, 2
3676    }, /* you cannot go before first secondary ignorable */
3677
3678    /* 'normal' befores */
3679
3680    { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a",
3681        {  "c", "b", "\\u0332", "a" }, 4
3682    },
3683
3684    /* we don't have a code point that corresponds to
3685     * the last primary ignorable
3686     */
3687    { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",
3688        {  "\\u0332", "\\u20e3", "c", "b", "a" }, 5
3689    },
3690
3691    { "&[before 3][first variable]<<<c<<<b &[first variable]<a",
3692        {  "c", "b", "\\u0009", "a", "\\u000a" }, 5
3693    },
3694
3695    { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",
3696        { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_REGULAR_CHAR_STRING }, 5
3697    },
3698
3699    { "&[first regular]<a"
3700      "&[before 1][first regular]<b",
3701      { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4
3702    },
3703
3704    { "&[before 1][last regular]<b"
3705      "&[last regular]<a",
3706        { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4
3707    },
3708
3709    { "&[before 1][first implicit]<b"
3710      "&[first implicit]<a",
3711        { "b", "\\u4e00", "a", "\\u4e01"}, 4
3712    },
3713
3714    { "&[before 1][last implicit]<b"
3715      "&[last implicit]<a",
3716        { "b", "\\U0010FFFD", "a" }, 3
3717    },
3718
3719    { "&[last variable]<z"
3720      "&[last primary ignorable]<x"
3721      "&[last secondary ignorable]<<y"
3722      "&[last tertiary ignorable]<<<w"
3723      "&[top]<u",
3724      {"\\ufffb",  "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"}, 7
3725    }
3726
3727  };
3728  uint32_t i;
3729
3730  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3731    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3732  }
3733}
3734
3735
3736static void TestOptimize(void) {
3737  /* this is not really a test - just trying out
3738   * whether copying of UCA contents will fail
3739   * Cannot really test, since the functionality
3740   * remains the same.
3741   */
3742  static const struct {
3743    const char *rules;
3744    const char *data[10];
3745    const uint32_t len;
3746  } tests[] = {
3747    /* - all befores here amount to zero */
3748    { "[optimize [\\uAC00-\\uD7FF]]",
3749    { "a", "b"}, 2}
3750  };
3751  uint32_t i;
3752
3753  for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {
3754    genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);
3755  }
3756}
3757
3758/*
3759cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.
3760weiv    ucol_strcollIter?
3761cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021
3762weiv    these are the input strings?
3763cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on, we have s1 > s2
3764weiv    will check - could be a problem with utf-8 iterator
3765cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2
3766weiv    hmmm
3767cycheng@ca.ibm.c... note that we have a standalone high surrogate
3768weiv    that doesn't sound right
3769cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000
3770weiv    so you have two strings, you convert them to utf-8 and to utf-16BE
3771cycheng@ca.ibm.c... yes
3772weiv    and then do the comparison
3773cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be
3774weiv    utf-16 strings look like a little endian ones in the example you sent me
3775weiv    It could be a bug - let me try to test it out
3776cycheng@ca.ibm.c... ok
3777cycheng@ca.ibm.c... we can wait till the conf. call
3778cycheng@ca.ibm.c... next weke
3779weiv    that would be great
3780weiv    hmmm
3781weiv    I might be wrong
3782weiv    let me play with it some more
3783cycheng@ca.ibm.c... ok
3784cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062  and s4 = 0x0e400021. both are in utf-16be
3785cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that's built for db2
3786cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be
3787weiv    ok
3788cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data
3789weiv    thanks
3790cycheng@ca.ibm.c... the 4 strings we sent are just samples
3791*/
3792#if 0
3793static void Alexis(void) {
3794  UErrorCode status = U_ZERO_ERROR;
3795  UCollator *coll = ucol_open("", &status);
3796
3797
3798  const char utf16be[2][4] = {
3799    { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },
3800    { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }
3801  };
3802
3803  const char utf8[2][4] = {
3804    { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },
3805    { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },
3806  };
3807
3808  UCharIterator iterU161, iterU162;
3809  UCharIterator iterU81, iterU82;
3810
3811  UCollationResult resU16, resU8;
3812
3813  uiter_setUTF16BE(&iterU161, utf16be[0], 4);
3814  uiter_setUTF16BE(&iterU162, utf16be[1], 4);
3815
3816  uiter_setUTF8(&iterU81, utf8[0], 4);
3817  uiter_setUTF8(&iterU82, utf8[1], 4);
3818
3819  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3820
3821  resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);
3822  resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);
3823
3824
3825  if(resU16 != resU8) {
3826    log_err("different results\n");
3827  }
3828
3829  ucol_close(coll);
3830}
3831#endif
3832
3833#define CMSCOLL_ALEXIS2_BUFFER_SIZE 256
3834static void Alexis2(void) {
3835  UErrorCode status = U_ZERO_ERROR;
3836  UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3837  char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3838  char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SIZE];
3839  int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8LenT = 0;
3840
3841  UConverter *conv = NULL;
3842
3843  UCharIterator U16BEItS, U16BEItT;
3844  UCharIterator U8ItS, U8ItT;
3845
3846  UCollationResult resU16, resU16BE, resU8;
3847
3848  static const char* const pairs[][2] = {
3849    { "\\ud800\\u0021", "\\uFFFC\\u0062"},
3850    { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },
3851    { "\\u0E40\\u0021", "\\u00A1\\u0021"},
3852    { "\\u0E40\\u0021", "\\uFE57\\u0062"},
3853    { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},
3854    { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},
3855    { "\\u0020", "\\u0020\\u0000"}
3856/*
38575F20 (my result here)
38585F204E008E3F
38595F20 (your result here)
3860*/
3861  };
3862
3863  int32_t i = 0;
3864
3865  UCollator *coll = ucol_open("", &status);
3866  if(status == U_FILE_ACCESS_ERROR) {
3867    log_data_err("Is your data around?\n");
3868    return;
3869  } else if(U_FAILURE(status)) {
3870    log_err("Error opening collator\n");
3871    return;
3872  }
3873  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
3874  conv = ucnv_open("UTF16BE", &status);
3875  for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
3876    U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3877    U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);
3878
3879    resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);
3880
3881    log_verbose("Result of strcoll is %i\n", resU16);
3882
3883    U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);
3884    U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);
3885
3886    /* use the original sizes, as the result from converter is in bytes */
3887    uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);
3888    uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);
3889
3890    resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);
3891
3892    log_verbose("Result of U16BE is %i\n", resU16BE);
3893
3894    if(resU16 != resU16BE) {
3895      log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", pairs[i][0], pairs[i][1]);
3896    }
3897
3898    u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16LenS, &status);
3899    u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16LenT, &status);
3900
3901    uiter_setUTF8(&U8ItS, U8Source, U8LenS);
3902    uiter_setUTF8(&U8ItT, U8Target, U8LenT);
3903
3904    resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);
3905
3906    if(resU16 != resU8) {
3907      log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pairs[i][0], pairs[i][1]);
3908    }
3909
3910  }
3911
3912  ucol_close(coll);
3913  ucnv_close(conv);
3914}
3915
3916static void TestHebrewUCA(void) {
3917  UErrorCode status = U_ZERO_ERROR;
3918  static const char *first[] = {
3919    "d790d6b8d79cd795d6bcd7a9",
3920    "d790d79cd79ed7a7d799d799d7a1",
3921    "d790d6b4d79ed795d6bcd7a9",
3922  };
3923
3924  char utf8String[3][256];
3925  UChar utf16String[3][256];
3926
3927  int32_t i = 0, j = 0;
3928  int32_t sizeUTF8[3];
3929  int32_t sizeUTF16[3];
3930
3931  UCollator *coll = ucol_open("", &status);
3932  if (U_FAILURE(status)) {
3933      log_err_status(status, "Could not open UCA collation %s\n", u_errorName(status));
3934      return;
3935  }
3936  /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/
3937
3938  for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {
3939    sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);
3940    u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i], &status);
3941    log_verbose("%i: ");
3942    for(j = 0; j < sizeUTF16[i]; j++) {
3943      /*log_verbose("\\u%04X", utf16String[i][j]);*/
3944      log_verbose("%04X", utf16String[i][j]);
3945    }
3946    log_verbose("\n");
3947  }
3948  for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {
3949    for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {
3950      doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);
3951    }
3952  }
3953
3954  ucol_close(coll);
3955
3956}
3957
3958static void TestPartialSortKeyTermination(void) {
3959  static const char* cases[] = {
3960    "\\u1234\\u1234\\udc00",
3961    "\\udc00\\ud800\\ud800"
3962  };
3963
3964  int32_t i = sizeof(UCollator);
3965
3966  UErrorCode status = U_ZERO_ERROR;
3967
3968  UCollator *coll = ucol_open("", &status);
3969
3970  UCharIterator iter;
3971
3972  UChar currCase[256];
3973  int32_t length = 0;
3974  int32_t pKeyLen = 0;
3975
3976  uint8_t key[256];
3977
3978  for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
3979    uint32_t state[2] = {0, 0};
3980    length = u_unescape(cases[i], currCase, 256);
3981    uiter_setString(&iter, currCase, length);
3982    pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);
3983
3984    log_verbose("Done\n");
3985
3986  }
3987  ucol_close(coll);
3988}
3989
3990static void TestSettings(void) {
3991  static const char* cases[] = {
3992    "apple",
3993      "Apple"
3994  };
3995
3996  static const char* locales[] = {
3997    "",
3998      "en"
3999  };
4000
4001  UErrorCode status = U_ZERO_ERROR;
4002
4003  int32_t i = 0, j = 0;
4004
4005  UChar source[256], target[256];
4006  int32_t sLen = 0, tLen = 0;
4007
4008  UCollator *collateObject = NULL;
4009  for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {
4010    collateObject = ucol_open(locales[i], &status);
4011    ucol_setStrength(collateObject, UCOL_PRIMARY);
4012    ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);
4013    for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {
4014      sLen = u_unescape(cases[j-1], source, 256);
4015      source[sLen] = 0;
4016      tLen = u_unescape(cases[j], target, 256);
4017      source[tLen] = 0;
4018      doTest(collateObject, source, target, UCOL_EQUAL);
4019    }
4020    ucol_close(collateObject);
4021  }
4022}
4023
4024static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCollator *target) {
4025    UErrorCode status = U_ZERO_ERROR;
4026    int32_t errorNo = 0;
4027    /*const UChar *sourceRules = NULL;*/
4028    /*int32_t sourceRulesLen = 0;*/
4029    UColAttributeValue french = UCOL_OFF;
4030    int32_t cloneSize = 0;
4031
4032    if(!ucol_equals(source, target)) {
4033        log_err("Same collators, different address not equal\n");
4034        errorNo++;
4035    }
4036    ucol_close(target);
4037    if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {
4038        /* currently, safeClone is implemented through getRules/openRules
4039        * so it is the same as the test below - I will comment that test out.
4040        */
4041        /* real thing */
4042        target = ucol_safeClone(source, NULL, &cloneSize, &status);
4043        if(U_FAILURE(status)) {
4044            log_err("Error creating clone\n");
4045            errorNo++;
4046            return errorNo;
4047        }
4048        if(!ucol_equals(source, target)) {
4049            log_err("Collator different from it's clone\n");
4050            errorNo++;
4051        }
4052        french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);
4053        if(french == UCOL_ON) {
4054            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
4055        } else {
4056            ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
4057        }
4058        if(U_FAILURE(status)) {
4059            log_err("Error setting attributes\n");
4060            errorNo++;
4061            return errorNo;
4062        }
4063        if(ucol_equals(source, target)) {
4064            log_err("Collators same even when options changed\n");
4065            errorNo++;
4066        }
4067        ucol_close(target);
4068        /* commented out since safeClone uses exactly the same technique */
4069        /*
4070        sourceRules = ucol_getRules(source, &sourceRulesLen);
4071        target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4072        if(U_FAILURE(status)) {
4073        log_err("Error instantiating target from rules\n");
4074        errorNo++;
4075        return errorNo;
4076        }
4077        if(!ucol_equals(source, target)) {
4078        log_err("Collator different from collator that was created from the same rules\n");
4079        errorNo++;
4080        }
4081        ucol_close(target);
4082        */
4083    }
4084    return errorNo;
4085}
4086
4087
4088static void TestEquals(void) {
4089    /* ucol_equals is not currently a public API. There is a chance that it will become
4090    * something like this, but currently it is only used by RuleBasedCollator::operator==
4091    */
4092    /* test whether the two collators instantiated from the same locale are equal */
4093    UErrorCode status = U_ZERO_ERROR;
4094    UParseError parseError;
4095    int32_t noOfLoc = uloc_countAvailable();
4096    const char *locName = NULL;
4097    UCollator *source = NULL, *target = NULL;
4098    int32_t i = 0;
4099
4100    const char* rules[] = {
4101        "&l < lj <<< Lj <<< LJ",
4102        "&n < nj <<< Nj <<< NJ",
4103        "&ae <<< \\u00e4",
4104        "&AE <<< \\u00c4"
4105    };
4106    /*
4107    const char* badRules[] = {
4108    "&l <<< Lj",
4109    "&n < nj <<< nJ <<< NJ",
4110    "&a <<< \\u00e4",
4111    "&AE <<< \\u00c4 <<< x"
4112    };
4113    */
4114
4115    UChar sourceRules[1024], targetRules[1024];
4116    int32_t sourceRulesSize = 0, targetRulesSize = 0;
4117    int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);
4118
4119    for(i = 0; i < rulesSize; i++) {
4120        sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 1024 - sourceRulesSize);
4121        targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRulesSize, 1024 - targetRulesSize);
4122    }
4123
4124    source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4125    if(status == U_FILE_ACCESS_ERROR) {
4126        log_data_err("Is your data around?\n");
4127        return;
4128    } else if(U_FAILURE(status)) {
4129        log_err("Error opening collator\n");
4130        return;
4131    }
4132    target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
4133    if(!ucol_equals(source, target)) {
4134        log_err("Equivalent collators not equal!\n");
4135    }
4136    ucol_close(source);
4137    ucol_close(target);
4138
4139    source = ucol_open("root", &status);
4140    target = ucol_open("root", &status);
4141    log_verbose("Testing root\n");
4142    if(!ucol_equals(source, source)) {
4143        log_err("Same collator not equal\n");
4144    }
4145    if(TestEqualsForCollator(locName, source, target)) {
4146        log_err("Errors for root\n", locName);
4147    }
4148    ucol_close(source);
4149
4150    for(i = 0; i<noOfLoc; i++) {
4151        status = U_ZERO_ERROR;
4152        locName = uloc_getAvailable(i);
4153        /*if(hasCollationElements(locName)) {*/
4154        log_verbose("Testing equality for locale %s\n", locName);
4155        source = ucol_open(locName, &status);
4156        target = ucol_open(locName, &status);
4157        if (U_FAILURE(status)) {
4158            log_err("Error opening collator for locale %s  %s\n", locName, u_errorName(status));
4159            continue;
4160        }
4161        if(TestEqualsForCollator(locName, source, target)) {
4162            log_err("Errors for locale %s\n", locName);
4163        }
4164        ucol_close(source);
4165        /*}*/
4166    }
4167}
4168
4169static void TestJ2726(void) {
4170    UChar a[2] = { 0x61, 0x00 }; /*"a"*/
4171    UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/
4172    UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/
4173    UErrorCode status = U_ZERO_ERROR;
4174    UCollator *coll = ucol_open("en", &status);
4175    ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
4176    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4177    doTest(coll, a, aSpace, UCOL_EQUAL);
4178    doTest(coll, aSpace, a, UCOL_EQUAL);
4179    doTest(coll, a, spaceA, UCOL_EQUAL);
4180    doTest(coll, spaceA, a, UCOL_EQUAL);
4181    doTest(coll, spaceA, aSpace, UCOL_EQUAL);
4182    doTest(coll, aSpace, spaceA, UCOL_EQUAL);
4183    ucol_close(coll);
4184}
4185
4186static void NullRule(void) {
4187    UChar r[3] = {0};
4188    UErrorCode status = U_ZERO_ERROR;
4189    UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4190    if(U_SUCCESS(status)) {
4191        log_err("This should have been an error!\n");
4192        ucol_close(coll);
4193    } else {
4194        status = U_ZERO_ERROR;
4195    }
4196    coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);
4197    if(U_FAILURE(status)) {
4198        log_err_status(status, "Empty rules should have produced a valid collator -> %s\n", u_errorName(status));
4199    } else {
4200        ucol_close(coll);
4201    }
4202}
4203
4204/**
4205 * Test for CollationElementIterator previous and next for the whole set of
4206 * unicode characters with normalization on.
4207 */
4208static void TestNumericCollation(void)
4209{
4210    UErrorCode status = U_ZERO_ERROR;
4211
4212    const static char *basicTestStrings[]={
4213    "hello1",
4214    "hello2",
4215    "hello2002",
4216    "hello2003",
4217    "hello123456",
4218    "hello1234567",
4219    "hello10000000",
4220    "hello100000000",
4221    "hello1000000000",
4222    "hello10000000000",
4223    };
4224
4225    const static char *preZeroTestStrings[]={
4226    "avery10000",
4227    "avery010000",
4228    "avery0010000",
4229    "avery00010000",
4230    "avery000010000",
4231    "avery0000010000",
4232    "avery00000010000",
4233    "avery000000010000",
4234    };
4235
4236    const static char *thirtyTwoBitNumericStrings[]={
4237    "avery42949672960",
4238    "avery42949672961",
4239    "avery42949672962",
4240    "avery429496729610"
4241    };
4242
4243     const static char *longNumericStrings[]={
4244     /* Some of these sort out of the order that would expected if digits-as-numbers handled arbitrarily-long digit strings.
4245        In fact, a single collation element can represent a maximum of 254 digits as a number. Digit strings longer than that
4246        are treated as multiple collation elements. */
4247    "num9234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123z", /*253digits, num + 9.23E252 + z */
4248    "num10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*254digits, num + 1.00E253 */
4249    "num100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order but expected */
4250    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 1.23E253 */
4251    "num123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345", /*255digits, num + 1.23E253 + 5 */
4252    "num1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456", /*256digits, num + 1.23E253 + 56 */
4253    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567", /*257digits, num + 1.23E253 + 567 */
4254    "num12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order but expected */
4255    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but expected */
4256    "num92345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order but expected */
4257    };
4258
4259    const static char *supplementaryDigits[] = {
4260      "\\uD835\\uDFCE", /* 0 */
4261      "\\uD835\\uDFCF", /* 1 */
4262      "\\uD835\\uDFD0", /* 2 */
4263      "\\uD835\\uDFD1", /* 3 */
4264      "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */
4265      "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */
4266      "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */
4267      "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */
4268      "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */
4269      "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */
4270    };
4271
4272    const static char *foreignDigits[] = {
4273      "\\u0661",
4274        "\\u0662",
4275        "\\u0663",
4276      "\\u0661\\u0660",
4277      "\\u0661\\u0662",
4278      "\\u0661\\u0663",
4279      "\\u0662\\u0660",
4280      "\\u0662\\u0662",
4281      "\\u0662\\u0663",
4282      "\\u0663\\u0660",
4283      "\\u0663\\u0662",
4284      "\\u0663\\u0663"
4285    };
4286
4287    const static char *evenZeroes[] = {
4288      "2000",
4289      "2001",
4290        "2002",
4291        "2003"
4292    };
4293
4294    UColAttribute att = UCOL_NUMERIC_COLLATION;
4295    UColAttributeValue val = UCOL_ON;
4296
4297    /* Open our collator. */
4298    UCollator* coll = ucol_open("root", &status);
4299    if (U_FAILURE(status)){
4300        log_err_status(status, "ERROR: in using ucol_open() -> %s\n",
4301              myErrorName(status));
4302        return;
4303    }
4304    genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestStrings)/sizeof(basicTestStrings[0]), &att, &val, 1);
4305    genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(thirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1);
4306    genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumericStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);
4307    genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits)/sizeof(foreignDigits[0]), &att, &val, 1);
4308    genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(supplementaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);
4309    genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeof(evenZeroes[0]), &att, &val, 1);
4310
4311    /* Setting up our collator to do digits. */
4312    ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
4313    if (U_FAILURE(status)){
4314        log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n",
4315              myErrorName(status));
4316        return;
4317    }
4318
4319    /*
4320       Testing that prepended zeroes still yield the correct collation behavior.
4321       We expect that every element in our strings array will be equal.
4322    */
4323    genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestStrings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);
4324
4325    ucol_close(coll);
4326}
4327
4328static void TestTibetanConformance(void)
4329{
4330    const char* test[] = {
4331        "\\u0FB2\\u0591\\u0F71\\u0061",
4332        "\\u0FB2\\u0F71\\u0061"
4333    };
4334
4335    UErrorCode status = U_ZERO_ERROR;
4336    UCollator *coll = ucol_open("", &status);
4337    UChar source[100];
4338    UChar target[100];
4339    int result;
4340    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4341    if (U_SUCCESS(status)) {
4342        u_unescape(test[0], source, 100);
4343        u_unescape(test[1], target, 100);
4344        doTest(coll, source, target, UCOL_EQUAL);
4345        result = ucol_strcoll(coll, source, -1,   target, -1);
4346        log_verbose("result %d\n", result);
4347        if (UCOL_EQUAL != result) {
4348            log_err("Tibetan comparison error\n");
4349        }
4350    }
4351    ucol_close(coll);
4352
4353    genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);
4354}
4355
4356static void TestPinyinProblem(void) {
4357    static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B50" };
4358    genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));
4359}
4360
4361#define TST_UCOL_MAX_INPUT 0x220001
4362#define topByte 0xFF000000;
4363#define bottomByte 0xFF;
4364#define fourBytes 0xFFFFFFFF;
4365
4366
4367static void showImplicit(UChar32 i) {
4368    if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {
4369        log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));
4370    }
4371}
4372
4373static void TestImplicitGeneration(void) {
4374    UErrorCode status = U_ZERO_ERROR;
4375    UChar32 last = 0;
4376    UChar32 current;
4377    UChar32 i = 0, j = 0;
4378    UChar32 roundtrip = 0;
4379    UChar32 lastBottom = 0;
4380    UChar32 currentBottom = 0;
4381    UChar32 lastTop = 0;
4382    UChar32 currentTop = 0;
4383
4384    UCollator *coll = ucol_open("root", &status);
4385    if(U_FAILURE(status)) {
4386        log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4387        return;
4388    }
4389
4390    uprv_uca_getRawFromImplicit(0xE20303E7);
4391
4392    for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {
4393        current = uprv_uca_getImplicitFromRaw(i) & fourBytes;
4394
4395        /* check that it round-trips AND that all intervening ones are illegal*/
4396        roundtrip = uprv_uca_getRawFromImplicit(current);
4397        if (roundtrip != i) {
4398            log_err("No roundtrip %08X\n", i);
4399        }
4400        if (last != 0) {
4401            for (j = last + 1; j < current; ++j) {
4402                roundtrip = uprv_uca_getRawFromImplicit(j);
4403                /* raise an error if it *doesn't* find an error*/
4404                if (roundtrip != -1) {
4405                    log_err("Fails to recognize illegal %08X\n", j);
4406                }
4407            }
4408        }
4409        /* now do other consistency checks*/
4410        lastBottom = last & bottomByte;
4411        currentBottom = current & bottomByte;
4412        lastTop = last & topByte;
4413        currentTop = current & topByte;
4414
4415        /* print out some values for spot-checking*/
4416        if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
4417            showImplicit(i-3);
4418            showImplicit(i-2);
4419            showImplicit(i-1);
4420            showImplicit(i);
4421            showImplicit(i+1);
4422            showImplicit(i+2);
4423        }
4424        last = current;
4425
4426        if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {
4427            log_err("No raw <-> code point roundtrip for 0x%08X\n", i);
4428        }
4429    }
4430    showImplicit(TST_UCOL_MAX_INPUT-2);
4431    showImplicit(TST_UCOL_MAX_INPUT-1);
4432    showImplicit(TST_UCOL_MAX_INPUT);
4433    ucol_close(coll);
4434}
4435
4436/**
4437 * Iterate through the given iterator, checking to see that all the strings
4438 * in the expected array are present.
4439 * @param expected array of strings we expect to see, or NULL
4440 * @param expectedCount number of elements of expected, or 0
4441 */
4442static int32_t checkUEnumeration(const char* msg,
4443                                 UEnumeration* iter,
4444                                 const char** expected,
4445                                 int32_t expectedCount) {
4446    UErrorCode ec = U_ZERO_ERROR;
4447    int32_t i = 0, n, j, bit;
4448    int32_t seenMask = 0;
4449
4450    U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */
4451    n = uenum_count(iter, &ec);
4452    if (!assertSuccess("count", &ec)) return -1;
4453    log_verbose("%s = [", msg);
4454    for (;; ++i) {
4455        const char* s = uenum_next(iter, NULL, &ec);
4456        if (!assertSuccess("snext", &ec) || s == NULL) break;
4457        if (i != 0) log_verbose(",");
4458        log_verbose("%s", s);
4459        /* check expected list */
4460        for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4461            if ((seenMask&bit) == 0 &&
4462                uprv_strcmp(s, expected[j]) == 0) {
4463                seenMask |= bit;
4464                break;
4465            }
4466        }
4467    }
4468    log_verbose("] (%d)\n", i);
4469    assertTrue("count verified", i==n);
4470    /* did we see all expected strings? */
4471    for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {
4472        if ((seenMask&bit)!=0) {
4473            log_verbose("Ok: \"%s\" seen\n", expected[j]);
4474        } else {
4475            log_err("FAIL: \"%s\" not seen\n", expected[j]);
4476        }
4477    }
4478    return n;
4479}
4480
4481/**
4482 * Test new API added for separate collation tree.
4483 */
4484static void TestSeparateTrees(void) {
4485    UErrorCode ec = U_ZERO_ERROR;
4486    UEnumeration *e = NULL;
4487    int32_t n = -1;
4488    UBool isAvailable;
4489    char loc[256];
4490
4491    static const char* AVAIL[] = { "en", "de" };
4492
4493    static const char* KW[] = { "collation" };
4494
4495    static const char* KWVAL[] = { "phonebook", "stroke" };
4496
4497#if !UCONFIG_NO_SERVICE
4498    e = ucol_openAvailableLocales(&ec);
4499    if (e != NULL) {
4500        assertSuccess("ucol_openAvailableLocales", &ec);
4501        assertTrue("ucol_openAvailableLocales!=0", e!=0);
4502        n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL));
4503        /* Don't need to check n because we check list */
4504        uenum_close(e);
4505    } else {
4506        log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you missing data?)\n", u_errorName(ec));
4507    }
4508#endif
4509
4510    e = ucol_getKeywords(&ec);
4511    if (e != NULL) {
4512        assertSuccess("ucol_getKeywords", &ec);
4513        assertTrue("ucol_getKeywords!=0", e!=0);
4514        n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));
4515        /* Don't need to check n because we check list */
4516        uenum_close(e);
4517    } else {
4518        log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing data?)\n", u_errorName(ec));
4519    }
4520
4521    e = ucol_getKeywordValues(KW[0], &ec);
4522    if (e != NULL) {
4523        assertSuccess("ucol_getKeywordValues", &ec);
4524        assertTrue("ucol_getKeywordValues!=0", e!=0);
4525        n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));
4526        /* Don't need to check n because we check list */
4527        uenum_close(e);
4528    } else {
4529        log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missing data?)\n", u_errorName(ec));
4530    }
4531
4532    /* Try setting a warning before calling ucol_getKeywordValues */
4533    ec = U_USING_FALLBACK_WARNING;
4534    e = ucol_getKeywordValues(KW[0], &ec);
4535    if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {
4536        assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);
4537        n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e, KWVAL, LEN(KWVAL));
4538        /* Don't need to check n because we check list */
4539        uenum_close(e);
4540    }
4541
4542    /*
4543U_DRAFT int32_t U_EXPORT2
4544ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
4545                             const char* locale, UBool* isAvailable,
4546                             UErrorCode* status);
4547}
4548*/
4549    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",
4550                                     &isAvailable, &ec);
4551    if (assertSuccess("getFunctionalEquivalent", &ec)) {
4552        assertEquals("getFunctionalEquivalent(de)", "de", loc);
4553        assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
4554                   isAvailable == TRUE);
4555    }
4556
4557    n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",
4558                                     &isAvailable, &ec);
4559    if (assertSuccess("getFunctionalEquivalent", &ec)) {
4560        assertEquals("getFunctionalEquivalent(de_DE)", "de", loc);
4561        assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",
4562                   isAvailable == TRUE);
4563    }
4564}
4565
4566/* supercedes TestJ784 */
4567static void TestBeforePinyin(void) {
4568    const static char rules[] = {
4569        "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<\\u00E0<<<\\u00C0"
4570        "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<\\u00E8<<<\\u00C8"
4571        "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<\\u00EC<<<\\u00CC"
4572        "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<\\u00F2<<<\\u00D2"
4573        "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<\\u00F9<<<\\u00D9"
4574        "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<\\u01DB<<\\u00FC"
4575    };
4576
4577    const static char *test[] = {
4578        "l\\u0101",
4579        "la",
4580        "l\\u0101n",
4581        "lan ",
4582        "l\\u0113",
4583        "le",
4584        "l\\u0113n",
4585        "len"
4586    };
4587
4588    const static char *test2[] = {
4589        "x\\u0101",
4590        "x\\u0100",
4591        "X\\u0101",
4592        "X\\u0100",
4593        "x\\u00E1",
4594        "x\\u00C1",
4595        "X\\u00E1",
4596        "X\\u00C1",
4597        "x\\u01CE",
4598        "x\\u01CD",
4599        "X\\u01CE",
4600        "X\\u01CD",
4601        "x\\u00E0",
4602        "x\\u00C0",
4603        "X\\u00E0",
4604        "X\\u00C0",
4605        "xa",
4606        "xA",
4607        "Xa",
4608        "XA",
4609        "x\\u0101x",
4610        "x\\u0100x",
4611        "x\\u00E1x",
4612        "x\\u00C1x",
4613        "x\\u01CEx",
4614        "x\\u01CDx",
4615        "x\\u00E0x",
4616        "x\\u00C0x",
4617        "xax",
4618        "xAx"
4619    };
4620
4621    genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4622    genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));
4623    genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));
4624    genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));
4625}
4626
4627static void TestBeforeTightening(void) {
4628    static const struct {
4629        const char *rules;
4630        UErrorCode expectedStatus;
4631    } tests[] = {
4632        { "&[before 1]a<x", U_ZERO_ERROR },
4633        { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },
4634        { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },
4635        { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },
4636        { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },
4637        { "&[before 2]a<<x",U_ZERO_ERROR },
4638        { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },
4639        { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },
4640        { "&[before 3]a<x",U_INVALID_FORMAT_ERROR  },
4641        { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR  },
4642        { "&[before 3]a<<<x",U_ZERO_ERROR },
4643        { "&[before 3]a=x",U_INVALID_FORMAT_ERROR  },
4644        { "&[before I]a = x",U_INVALID_FORMAT_ERROR }
4645    };
4646
4647    int32_t i = 0;
4648
4649    UErrorCode status = U_ZERO_ERROR;
4650    UChar rlz[RULE_BUFFER_LEN] = { 0 };
4651    uint32_t rlen = 0;
4652
4653    UCollator *coll = NULL;
4654
4655
4656    for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4657        rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);
4658        coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4659        if(status != tests[i].expectedStatus) {
4660            log_err_status(status, "Opening a collator with rules %s returned error code %s, expected %s\n",
4661                tests[i].rules, u_errorName(status), u_errorName(tests[i].expectedStatus));
4662        }
4663        ucol_close(coll);
4664        status = U_ZERO_ERROR;
4665    }
4666
4667}
4668
4669#if 0
4670&m < a
4671&[before 1] a < x <<< X << q <<< Q < z
4672assert: m <<< M < x <<< X << q <<< Q < z < a < n
4673
4674&m < a
4675&[before 2] a << x <<< X << q <<< Q < z
4676assert: m <<< M < x <<< X << q <<< Q << a < z < n
4677
4678&m < a
4679&[before 3] a <<< x <<< X << q <<< Q < z
4680assert: m <<< M < x <<< X <<< a << q <<< Q < z < n
4681
4682
4683&m << a
4684&[before 1] a < x <<< X << q <<< Q < z
4685assert: x <<< X << q <<< Q < z < m <<< M << a < n
4686
4687&m << a
4688&[before 2] a << x <<< X << q <<< Q < z
4689assert: m <<< M << x <<< X << q <<< Q << a < z < n
4690
4691&m << a
4692&[before 3] a <<< x <<< X << q <<< Q < z
4693assert: m <<< M << x <<< X <<< a << q <<< Q < z < n
4694
4695
4696&m <<< a
4697&[before 1] a < x <<< X << q <<< Q < z
4698assert: x <<< X << q <<< Q < z < n < m <<< a <<< M
4699
4700&m <<< a
4701&[before 2] a << x <<< X << q <<< Q < z
4702assert:  x <<< X << q <<< Q << m <<< a <<< M < z < n
4703
4704&m <<< a
4705&[before 3] a <<< x <<< X << q <<< Q < z
4706assert: m <<< x <<< X <<< a <<< M  << q <<< Q < z < n
4707
4708
4709&[before 1] s < x <<< X << q <<< Q < z
4710assert: r <<< R < x <<< X << q <<< Q < z < s < n
4711
4712&[before 2] s << x <<< X << q <<< Q < z
4713assert: r <<< R < x <<< X << q <<< Q << s < z < n
4714
4715&[before 3] s <<< x <<< X << q <<< Q < z
4716assert: r <<< R < x <<< X <<< s << q <<< Q < z < n
4717
4718
4719&[before 1] \u24DC < x <<< X << q <<< Q < z
4720assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M
4721
4722&[before 2] \u24DC << x <<< X << q <<< Q < z
4723assert:  x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n
4724
4725&[before 3] \u24DC <<< x <<< X << q <<< Q < z
4726assert: m <<< x <<< X <<< \u24DC <<< M  << q <<< Q < z < n
4727#endif
4728
4729
4730#if 0
4731/* requires features not yet supported */
4732static void TestMoreBefore(void) {
4733    static const struct {
4734        const char* rules;
4735        const char* order[16];
4736        int32_t size;
4737    } tests[] = {
4738        { "&m < a &[before 1] a < x <<< X << q <<< Q < z",
4739        { "m","M","x","X","q","Q","z","a","n" }, 9},
4740        { "&m < a &[before 2] a << x <<< X << q <<< Q < z",
4741        { "m","M","x","X","q","Q","a","z","n" }, 9},
4742        { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",
4743        { "m","M","x","X","a","q","Q","z","n" }, 9},
4744        { "&m << a &[before 1] a < x <<< X << q <<< Q < z",
4745        { "x","X","q","Q","z","m","M","a","n" }, 9},
4746        { "&m << a &[before 2] a << x <<< X << q <<< Q < z",
4747        { "m","M","x","X","q","Q","a","z","n" }, 9},
4748        { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",
4749        { "m","M","x","X","a","q","Q","z","n" }, 9},
4750        { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",
4751        { "x","X","q","Q","z","n","m","a","M" }, 9},
4752        { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",
4753        { "x","X","q","Q","m","a","M","z","n" }, 9},
4754        { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",
4755        { "m","x","X","a","M","q","Q","z","n" }, 9},
4756        { "&[before 1] s < x <<< X << q <<< Q < z",
4757        { "r","R","x","X","q","Q","z","s","n" }, 9},
4758        { "&[before 2] s << x <<< X << q <<< Q < z",
4759        { "r","R","x","X","q","Q","s","z","n" }, 9},
4760        { "&[before 3] s <<< x <<< X << q <<< Q < z",
4761        { "r","R","x","X","s","q","Q","z","n" }, 9},
4762        { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",
4763        { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},
4764        { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",
4765        { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},
4766        { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",
4767        { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}
4768    };
4769
4770    int32_t i = 0;
4771
4772    for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {
4773        genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);
4774    }
4775}
4776#endif
4777
4778static void TestTailorNULL( void ) {
4779    const static char* rule = "&a <<< '\\u0000'";
4780    UErrorCode status = U_ZERO_ERROR;
4781    UChar rlz[RULE_BUFFER_LEN] = { 0 };
4782    uint32_t rlen = 0;
4783    UChar a = 1, null = 0;
4784    UCollationResult res = UCOL_EQUAL;
4785
4786    UCollator *coll = NULL;
4787
4788
4789    rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);
4790    coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);
4791
4792    if(U_FAILURE(status)) {
4793        log_err_status(status, "Could not open default collator! -> %s\n", u_errorName(status));
4794    } else {
4795        res = ucol_strcoll(coll, &a, 1, &null, 1);
4796
4797        if(res != UCOL_LESS) {
4798            log_err("NULL was not tailored properly!\n");
4799        }
4800    }
4801
4802    ucol_close(coll);
4803}
4804
4805static void
4806TestUpperFirstQuaternary(void)
4807{
4808  const char* tests[] = { "B", "b", "Bb", "bB" };
4809  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };
4810  UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };
4811  genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4812}
4813
4814static void
4815TestJ4960(void)
4816{
4817  const char* tests[] = { "\\u00e2T", "aT" };
4818  UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };
4819  UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };
4820  const char* tests2[] = { "a", "A" };
4821  const char* rule = "&[first tertiary ignorable]=A=a";
4822  UColAttribute att2[] = { UCOL_CASE_LEVEL };
4823  UColAttributeValue attVals2[] = { UCOL_ON };
4824  /* Test whether we correctly ignore primary ignorables on case level when */
4825  /* we have only primary & case level */
4826  genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);
4827  /* Test whether ICU4J will make case level for sortkeys that have primary strength */
4828  /* and case level */
4829  genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0]), att, attVals, sizeof(att)/sizeof(att[0]));
4830  /* Test whether completely ignorable letters have case level info (they shouldn't) */
4831  genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(tests2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);
4832}
4833
4834static void
4835TestJ5223(void)
4836{
4837  static const char *test = "this is a test string";
4838  UChar ustr[256];
4839  int32_t ustr_length = u_unescape(test, ustr, 256);
4840  unsigned char sortkey[256];
4841  int32_t sortkey_length;
4842  UErrorCode status = U_ZERO_ERROR;
4843  static UCollator *coll = NULL;
4844  coll = ucol_open("root", &status);
4845  if(U_FAILURE(status)) {
4846    log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
4847    return;
4848  }
4849  ucol_setStrength(coll, UCOL_PRIMARY);
4850  ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
4851  ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
4852  if (U_FAILURE(status)) {
4853    log_err("Failed setting atributes\n");
4854    return;
4855  }
4856  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);
4857  if (sortkey_length > 256) return;
4858
4859  /* we mark the position where the null byte should be written in advance */
4860  sortkey[sortkey_length-1] = 0xAA;
4861
4862  /* we set the buffer size one byte higher than needed */
4863  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4864    sortkey_length+1);
4865
4866  /* no error occurs (for me) */
4867  if (sortkey[sortkey_length-1] == 0xAA) {
4868    log_err("Hit bug at first try\n");
4869  }
4870
4871  /* we mark the position where the null byte should be written again */
4872  sortkey[sortkey_length-1] = 0xAA;
4873
4874  /* this time we set the buffer size to the exact amount needed */
4875  sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,
4876    sortkey_length);
4877
4878  /* now the trailing null byte is not written */
4879  if (sortkey[sortkey_length-1] == 0xAA) {
4880    log_err("Hit bug at second try\n");
4881  }
4882
4883  ucol_close(coll);
4884}
4885
4886/* Regression test for Thai partial sort key problem */
4887static void
4888TestJ5232(void)
4889{
4890    const static char *test[] = {
4891        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",
4892        "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"
4893    };
4894
4895    genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));
4896}
4897
4898static void
4899TestJ5367(void)
4900{
4901    const static char *test[] = { "a", "y" };
4902    const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";
4903    genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));
4904}
4905
4906static void
4907TestVI5913(void)
4908{
4909    UErrorCode status = U_ZERO_ERROR;
4910    int32_t i, j;
4911    UCollator *coll =NULL;
4912    uint8_t  resColl[100], expColl[100];
4913    int32_t  rLen, tLen, ruleLen, sLen, kLen;
4914    UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0};  /* &a<0x1FF3-omega with Ypogegrammeni*/
4915    UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0};  /* &z<s with caron*/
4916    UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0};  /* &z<a+e with circumflex.*/
4917    static const UChar tData[][20]={
4918        {0x1EAC, 0},
4919        {0x0041, 0x0323, 0x0302, 0},
4920        {0x1EA0, 0x0302, 0},
4921        {0x00C2, 0x0323, 0},
4922        {0x1ED8, 0},  /* O with dot and circumflex */
4923        {0x1ECC, 0x0302, 0},
4924        {0x1EB7, 0},
4925        {0x1EA1, 0x0306, 0},
4926    };
4927    static const UChar tailorData[][20]={
4928        {0x1FA2, 0},  /* Omega with 3 combining marks */
4929        {0x03C9, 0x0313, 0x0300, 0x0345, 0},
4930        {0x1FF3, 0x0313, 0x0300, 0},
4931        {0x1F60, 0x0300, 0x0345, 0},
4932        {0x1F62, 0x0345, 0},
4933        {0x1FA0, 0x0300, 0},
4934    };
4935    static const UChar tailorData2[][20]={
4936        {0x1E63, 0x030C, 0},  /* s with dot below + caron */
4937        {0x0073, 0x0323, 0x030C, 0},
4938        {0x0073, 0x030C, 0x0323, 0},
4939    };
4940    static const UChar tailorData3[][20]={
4941        {0x007a, 0},  /*  z */
4942        {0x0061, 0x0065, 0},  /*  a + e */
4943        {0x0061, 0x00ea, 0}, /* a + e with circumflex */
4944        {0x0061, 0x1EC7, 0},  /* a+ e with dot below and circumflex */
4945        {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumflex */
4946        {0x0061, 0x00EA, 0x0323, 0},  /* a + e with circumflex + combining dot below */
4947        {0x00EA, 0x0323, 0},  /* e with circumflex + combining dot below */
4948        {0x00EA, 0},  /* e with circumflex  */
4949    };
4950
4951    /* Test Vietnamese sort. */
4952    coll = ucol_open("vi", &status);
4953    if(U_FAILURE(status)) {
4954        log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
4955        return;
4956    }
4957    log_verbose("\n\nVI collation:");
4958    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tData[2])) ) {
4959        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4960    }
4961    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tData[3])) ) {
4962        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4963    }
4964    if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tData[4])) ) {
4965        log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");
4966    }
4967    if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tData[6])) ) {
4968        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4969    }
4970
4971    for (j=0; j<8; j++) {
4972        tLen = u_strlen(tData[j]);
4973        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
4974        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4975        for(i = 0; i<rLen; i++) {
4976            log_verbose(" %02X", resColl[i]);
4977        }
4978    }
4979
4980    ucol_close(coll);
4981
4982    /* Test Romanian sort. */
4983    coll = ucol_open("ro", &status);
4984    log_verbose("\n\nRO collation:");
4985    if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tData[1])) ) {
4986        log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");
4987    }
4988    if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tData[5])) ) {
4989        log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");
4990    }
4991    if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tData[7])) ) {
4992        log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");
4993    }
4994
4995    for (j=4; j<8; j++) {
4996        tLen = u_strlen(tData[j]);
4997        log_verbose("\n Data :%s  \tlen: %d key: ", tData[j], tLen);
4998        rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);
4999        for(i = 0; i<rLen; i++) {
5000            log_verbose(" %02X", resColl[i]);
5001        }
5002    }
5003    ucol_close(coll);
5004
5005    /* Test the precomposed Greek character with 3 combining marks. */
5006    log_verbose("\n\nTailoring test: Greek character with 3 combining marks");
5007    ruleLen = u_strlen(rule);
5008    coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5009    if (U_FAILURE(status)) {
5010        log_err("ucol_openRules failed with %s\n", u_errorName(status));
5011        return;
5012    }
5013    sLen = u_strlen(tailorData[0]);
5014    for (j=1; j<6; j++) {
5015        tLen = u_strlen(tailorData[j]);
5016        if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen))  {
5017            log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);
5018        }
5019    }
5020    /* Test getSortKey. */
5021    tLen = u_strlen(tailorData[0]);
5022    kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);
5023    for (j=0; j<6; j++) {
5024        tLen = u_strlen(tailorData[j]);
5025        rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);
5026        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5027            log_err("\n Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5028            for(i = 0; i<rLen; i++) {
5029                log_err(" %02X", resColl[i]);
5030            }
5031        }
5032    }
5033    ucol_close(coll);
5034
5035    log_verbose("\n\nTailoring test for s with caron:");
5036    ruleLen = u_strlen(rule2);
5037    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5038    tLen = u_strlen(tailorData2[0]);
5039    kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);
5040    for (j=1; j<3; j++) {
5041        tLen = u_strlen(tailorData2[j]);
5042        rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);
5043        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5044            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5045            for(i = 0; i<rLen; i++) {
5046                log_err(" %02X", resColl[i]);
5047            }
5048        }
5049    }
5050    ucol_close(coll);
5051
5052    log_verbose("\n\nTailoring test for &z< ae with circumflex:");
5053    ruleLen = u_strlen(rule3);
5054    coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5055    tLen = u_strlen(tailorData3[3]);
5056    kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);
5057    for (j=4; j<6; j++) {
5058        tLen = u_strlen(tailorData3[j]);
5059        rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);
5060
5061        if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=0 ) {
5062            log_err("\n After tailoring Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5063            for(i = 0; i<rLen; i++) {
5064                log_err(" %02X", resColl[i]);
5065            }
5066        }
5067
5068        log_verbose("\n Test Data[%d] :%s  \tlen: %d key: ", j, tailorData[j], tLen);
5069         for(i = 0; i<rLen; i++) {
5070             log_verbose(" %02X", resColl[i]);
5071         }
5072    }
5073    ucol_close(coll);
5074}
5075
5076static void
5077TestTailor6179(void)
5078{
5079    UErrorCode status = U_ZERO_ERROR;
5080    int32_t i;
5081    UCollator *coll =NULL;
5082    uint8_t  resColl[100];
5083    int32_t  rLen, tLen, ruleLen;
5084    /* &[last primary ignorable]<< a  &[first primary ignorable]<<b */
5085    UChar rule1[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,
5086            0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x20,0x61,0x20,
5087            0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x79,0x20,
5088            0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x20, 0};
5089    /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */
5090    UChar rule2[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x61,
5091            0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x3C,
5092            0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,
5093            0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,
5094            0x3C,0x3C,0x20,0x62,0};
5095
5096    UChar tData1[][20]={
5097        {0x61, 0},
5098        {0x62, 0},
5099        { 0xFDD0,0x009E, 0}
5100    };
5101    UChar tData2[][20]={
5102            {0x61, 0},
5103            {0x62, 0},
5104            { 0xFDD0,0x009E, 0}
5105     };
5106
5107    /*
5108     * These values from FractionalUCA.txt will change,
5109     * and need to be updated here.
5110     */
5111    uint8_t firstPrimaryIgnCE[6]={1, 87, 1, 5, 1, 0};
5112    uint8_t lastPrimaryIgnCE[6]={1, 0xE3, 0xC9, 1, 5, 0};
5113    uint8_t firstSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0};
5114    uint8_t lastSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0};
5115
5116    /* Test [Last Primary ignorable] */
5117
5118    log_verbose("\n\nTailoring test: &[last primary ignorable]<<a  &[first primary ignorable]<<b ");
5119    ruleLen = u_strlen(rule1);
5120    coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5121    if (U_FAILURE(status)) {
5122        log_err_status(status, "Tailoring test: &[last primary ignorable] failed! -> %s\n", u_errorName(status));
5123        return;
5124    }
5125    tLen = u_strlen(tData1[0]);
5126    rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);
5127    if (uprv_memcmp(resColl, lastPrimaryIgnCE, uprv_min(rLen,6)) < 0) {
5128        log_err("\n Data[%d] :%s  \tlen: %d key: ", 0, tData1[0], rLen);
5129        for(i = 0; i<rLen; i++) {
5130            log_err(" %02X", resColl[i]);
5131        }
5132    }
5133    tLen = u_strlen(tData1[1]);
5134    rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);
5135    if (uprv_memcmp(resColl, firstPrimaryIgnCE, uprv_min(rLen, 6)) < 0) {
5136        log_err("\n Data[%d] :%s  \tlen: %d key: ", 1, tData1[1], rLen);
5137        for(i = 0; i<rLen; i++) {
5138            log_err(" %02X", resColl[i]);
5139        }
5140    }
5141    ucol_close(coll);
5142
5143
5144    /* Test [Last Secondary ignorable] */
5145    log_verbose("\n\nTailoring test: &[last secondary ignorable]<<<a  &[first secondary ignorable]<<<b ");
5146    ruleLen = u_strlen(rule1);
5147    coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5148    if (U_FAILURE(status)) {
5149        log_err("Tailoring test: &[last primary ignorable] failed!");
5150        return;
5151    }
5152    tLen = u_strlen(tData2[0]);
5153    rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);
5154    log_verbose("\n Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
5155    for(i = 0; i<rLen; i++) {
5156        log_verbose(" %02X", resColl[i]);
5157    }
5158    if (uprv_memcmp(resColl, lastSecondaryIgnCE, uprv_min(rLen, 3)) < 0) {
5159        log_err("\n Data[%d] :%s  \tlen: %d key: ", 0, tData2[0], rLen);
5160        for(i = 0; i<rLen; i++) {
5161            log_err(" %02X", resColl[i]);
5162        }
5163    }
5164    tLen = u_strlen(tData2[1]);
5165    rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);
5166    log_verbose("\n Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
5167    for(i = 0; i<rLen; i++) {
5168        log_verbose(" %02X", resColl[i]);
5169    }
5170    if (uprv_memcmp(resColl, firstSecondaryIgnCE, uprv_min(rLen, 4)) < 0) {
5171        log_err("\n Data[%d] :%s  \tlen: %d key: ", 1, tData2[1], rLen);
5172        for(i = 0; i<rLen; i++) {
5173            log_err(" %02X", resColl[i]);
5174        }
5175    }
5176    ucol_close(coll);
5177}
5178
5179static void
5180TestUCAPrecontext(void)
5181{
5182    UErrorCode status = U_ZERO_ERROR;
5183    int32_t i, j;
5184    UCollator *coll =NULL;
5185    uint8_t  resColl[100], prevColl[100];
5186    int32_t  rLen, tLen, ruleLen;
5187    UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */
5188    UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};
5189    /* & l middle-dot << a  a is an expansion. */
5190
5191    UChar tData1[][20]={
5192            { 0xb7, 0},  /* standalone middle dot(0xb7) */
5193            { 0x387, 0}, /* standalone middle dot(0x387) */
5194            { 0x61, 0},  /* a */
5195            { 0x6C, 0},  /* l */
5196            { 0x4C, 0x0332, 0},  /* l with [first primary ignorable] */
5197            { 0x6C, 0xb7, 0},  /* l with middle dot(0xb7) */
5198            { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */
5199            { 0x4C, 0xb7, 0},  /* L with middle dot(0xb7) */
5200            { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */
5201            { 0x6C, 0x61, 0x387, 0}, /* la  with middle dot(0x387) */
5202            { 0x4C, 0x61, 0xb7, 0},  /* La with middle dot(0xb7) */
5203     };
5204
5205    log_verbose("\n\nEN collation:");
5206    coll = ucol_open("en", &status);
5207    if (U_FAILURE(status)) {
5208        log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_errorName(status));
5209        return;
5210    }
5211    for (j=0; j<11; j++) {
5212        tLen = u_strlen(tData1[j]);
5213        rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5214        if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5215            log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5216                    j, tData1[j]);
5217        }
5218        log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5219        for(i = 0; i<rLen; i++) {
5220            log_verbose(" %02X", resColl[i]);
5221        }
5222        uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5223     }
5224     ucol_close(coll);
5225
5226
5227     log_verbose("\n\nJA collation:");
5228     coll = ucol_open("ja", &status);
5229     if (U_FAILURE(status)) {
5230         log_err("Tailoring test: &z <<a|- failed!");
5231         return;
5232     }
5233     for (j=0; j<11; j++) {
5234         tLen = u_strlen(tData1[j]);
5235         rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5236         if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5237             log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5238                     j, tData1[j]);
5239         }
5240         log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5241         for(i = 0; i<rLen; i++) {
5242             log_verbose(" %02X", resColl[i]);
5243         }
5244         uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5245      }
5246      ucol_close(coll);
5247
5248
5249      log_verbose("\n\nTailoring test: & middle dot < a ");
5250      ruleLen = u_strlen(rule1);
5251      coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5252      if (U_FAILURE(status)) {
5253          log_err("Tailoring test: & middle dot < a failed!");
5254          return;
5255      }
5256      for (j=0; j<11; j++) {
5257          tLen = u_strlen(tData1[j]);
5258          rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5259          if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5260              log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5261                      j, tData1[j]);
5262          }
5263          log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5264          for(i = 0; i<rLen; i++) {
5265              log_verbose(" %02X", resColl[i]);
5266          }
5267          uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5268       }
5269       ucol_close(coll);
5270
5271
5272       log_verbose("\n\nTailoring test: & l middle-dot << a ");
5273       ruleLen = u_strlen(rule2);
5274       coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);
5275       if (U_FAILURE(status)) {
5276           log_err("Tailoring test: & l middle-dot << a failed!");
5277           return;
5278       }
5279       for (j=0; j<11; j++) {
5280           tLen = u_strlen(tData1[j]);
5281           rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);
5282           if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0)) {
5283               log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",
5284                       j, tData1[j]);
5285           }
5286           if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) {
5287               log_err("\n Expecting smaller key than previous test case: Data[%d] :%s.",
5288                       j, tData1[j]);
5289           }
5290           log_verbose("\n Data[%d] :%s  \tlen: %d key: ", j, tData1[j], rLen);
5291           for(i = 0; i<rLen; i++) {
5292               log_verbose(" %02X", resColl[i]);
5293           }
5294           uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));
5295        }
5296        ucol_close(coll);
5297}
5298
5299static void
5300TestOutOfBuffer5468(void)
5301{
5302    static const char *test = "\\u4e00";
5303    UChar ustr[256];
5304    int32_t ustr_length = u_unescape(test, ustr, 256);
5305    unsigned char shortKeyBuf[1];
5306    int32_t sortkey_length;
5307    UErrorCode status = U_ZERO_ERROR;
5308    static UCollator *coll = NULL;
5309
5310    coll = ucol_open("root", &status);
5311    if(U_FAILURE(status)) {
5312      log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));
5313      return;
5314    }
5315    ucol_setStrength(coll, UCOL_PRIMARY);
5316    ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
5317    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5318    if (U_FAILURE(status)) {
5319      log_err("Failed setting atributes\n");
5320      return;
5321    }
5322
5323    sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeof(shortKeyBuf));
5324    if (sortkey_length != 4) {
5325        log_err("expecting length of sortKey is 4  got:%d ", sortkey_length);
5326    }
5327    log_verbose("length of sortKey is %d", sortkey_length);
5328    ucol_close(coll);
5329}
5330
5331#define TSKC_DATA_SIZE 5
5332#define TSKC_BUF_SIZE  50
5333static void
5334TestSortKeyConsistency(void)
5335{
5336    UErrorCode icuRC = U_ZERO_ERROR;
5337    UCollator* ucol;
5338    UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};
5339
5340    uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5341    uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];
5342    int32_t i, j, i2;
5343
5344    ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);
5345    if (U_FAILURE(icuRC))
5346    {
5347        log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_errorName(icuRC));
5348        return;
5349    }
5350
5351    for (i = 0; i < TSKC_DATA_SIZE; i++)
5352    {
5353        UCharIterator uiter;
5354        uint32_t state[2] = { 0, 0 };
5355        int32_t dataLen = i+1;
5356        for (j=0; j<TSKC_BUF_SIZE; j++)
5357            bufFull[i][j] = bufPart[i][j] = 0;
5358
5359        /* Full sort key */
5360        ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);
5361
5362        /* Partial sort key */
5363        uiter_setString(&uiter, data, dataLen);
5364        ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &icuRC);
5365        if (U_FAILURE(icuRC))
5366        {
5367            log_err("ucol_nextSortKeyPart failed\n");
5368            ucol_close(ucol);
5369            return;
5370        }
5371
5372        for (i2=0; i2<i; i2++)
5373        {
5374            UBool fullMatch = TRUE;
5375            UBool partMatch = TRUE;
5376            for (j=0; j<TSKC_BUF_SIZE; j++)
5377            {
5378                fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);
5379                partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);
5380            }
5381            if (fullMatch != partMatch) {
5382                log_err(fullMatch ? "full key was consistent, but partial key changed\n"
5383                                  : "partial key was consistent, but full key changed\n");
5384                ucol_close(ucol);
5385                return;
5386            }
5387        }
5388    }
5389
5390    /*=============================================*/
5391   ucol_close(ucol);
5392}
5393
5394/* ticket: 6101 */
5395static void TestCroatianSortKey(void) {
5396    const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";
5397    UErrorCode status = U_ZERO_ERROR;
5398    UCollator *ucol;
5399    UCharIterator iter;
5400
5401    static const UChar text[] = { 0x0044, 0xD81A };
5402
5403    size_t length = sizeof(text)/sizeof(*text);
5404
5405    uint8_t textSortKey[32];
5406    size_t lenSortKey = 32;
5407    size_t actualSortKeyLen;
5408    uint32_t uStateInfo[2] = { 0, 0 };
5409
5410    ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);
5411    if (U_FAILURE(status)) {
5412        log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));
5413        return;
5414    }
5415
5416    uiter_setString(&iter, text, length);
5417
5418    actualSortKeyLen = ucol_nextSortKeyPart(
5419        ucol, &iter, (uint32_t*)uStateInfo,
5420        textSortKey, lenSortKey, &status
5421        );
5422
5423    if (actualSortKeyLen == lenSortKey) {
5424        log_err("ucol_nextSortKeyPart did not give correct result in Croatian test.\n");
5425    }
5426
5427    ucol_close(ucol);
5428}
5429
5430/* ticket: 6140 */
5431/* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since
5432 * they are both Hiragana and Katakana
5433 */
5434#define SORTKEYLEN 50
5435static void TestHiragana(void) {
5436    UErrorCode status = U_ZERO_ERROR;
5437    UCollator* ucol;
5438    UCollationResult strcollresult;
5439    UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */
5440    UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };
5441    int32_t data1Len = sizeof(data1)/sizeof(*data1);
5442    int32_t data2Len = sizeof(data2)/sizeof(*data2);
5443    int32_t i, j;
5444    uint8_t sortKey1[SORTKEYLEN];
5445    uint8_t sortKey2[SORTKEYLEN];
5446
5447    UCharIterator uiter1;
5448    UCharIterator uiter2;
5449    uint32_t state1[2] = { 0, 0 };
5450    uint32_t state2[2] = { 0, 0 };
5451    int32_t keySize1;
5452    int32_t keySize2;
5453
5454    ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,
5455            &status);
5456    if (U_FAILURE(status)) {
5457        log_err_status(status, "Error status: %s; Unable to open collator from short string.\n", u_errorName(status));
5458        return;
5459    }
5460
5461    /* Start of full sort keys */
5462    /* Full sort key1 */
5463    keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);
5464    /* Full sort key2 */
5465    keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);
5466    if (keySize1 == keySize2) {
5467        for (i = 0; i < keySize1; i++) {
5468            if (sortKey1[i] != sortKey2[i]) {
5469                log_err("Full sort keys are different. Should be equal.");
5470            }
5471        }
5472    } else {
5473        log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2);
5474    }
5475    /* End of full sort keys */
5476
5477    /* Start of partial sort keys */
5478    /* Partial sort key1 */
5479    uiter_setString(&uiter1, data1, data1Len);
5480    keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);
5481    /* Partial sort key2 */
5482    uiter_setString(&uiter2, data2, data2Len);
5483    keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);
5484    if (U_SUCCESS(status) && keySize1 == keySize2) {
5485        for (j = 0; j < keySize1; j++) {
5486            if (sortKey1[j] != sortKey2[j]) {
5487                log_err("Partial sort keys are different. Should be equal");
5488            }
5489        }
5490    } else {
5491        log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %d", u_errorName(status), keySize1, keySize2);
5492    }
5493    /* End of partial sort keys */
5494
5495    /* Start of strcoll */
5496    /* Use ucol_strcoll() to determine ordering */
5497    strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);
5498    if (strcollresult != UCOL_EQUAL) {
5499        log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");
5500    }
5501
5502    ucol_close(ucol);
5503}
5504
5505/* Convenient struct for running collation tests */
5506typedef struct {
5507  const UChar source[MAX_TOKEN_LEN];  /* String on left */
5508  const UChar target[MAX_TOKEN_LEN];  /* String on right */
5509  UCollationResult result;            /* -1, 0 or +1, depending on collation */
5510} OneTestCase;
5511
5512/*
5513 * Utility function to test one collation test case.
5514 * @param testcases Array of test cases.
5515 * @param n_testcases Size of the array testcases.
5516 * @param str_rules Array of rules.  These rules should be specifying the same rule in different formats.
5517 * @param n_rules Size of the array str_rules.
5518 */
5519static void doTestOneTestCase(const OneTestCase testcases[],
5520                              int n_testcases,
5521                              const char* str_rules[],
5522                              int n_rules)
5523{
5524  int rule_no, testcase_no;
5525  UChar rule[500];
5526  int32_t length = 0;
5527  UErrorCode status = U_ZERO_ERROR;
5528  UParseError parse_error;
5529  UCollator  *myCollation;
5530
5531  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5532
5533    length = u_unescape(str_rules[rule_no], rule, 500);
5534    if (length == 0) {
5535        log_err("ERROR: The rule cannot be unescaped: %s\n");
5536        return;
5537    }
5538    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5539    if(U_FAILURE(status)){
5540        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5541        return;
5542    }
5543    log_verbose("Testing the <<* syntax\n");
5544    ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
5545    ucol_setStrength(myCollation, UCOL_TERTIARY);
5546    for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {
5547      doTest(myCollation,
5548             testcases[testcase_no].source,
5549             testcases[testcase_no].target,
5550             testcases[testcase_no].result
5551             );
5552    }
5553    ucol_close(myCollation);
5554  }
5555}
5556
5557const static OneTestCase rangeTestcases[] = {
5558  { {0x0061},                            {0x0062},                          UCOL_LESS }, /* "a" < "b" */
5559  { {0x0062},                            {0x0063},                          UCOL_LESS }, /* "b" < "c" */
5560  { {0x0061},                            {0x0063},                          UCOL_LESS }, /* "a" < "c" */
5561
5562  { {0x0062},                            {0x006b},                          UCOL_LESS }, /* "b" << "k" */
5563  { {0x006b},                            {0x006c},                          UCOL_LESS }, /* "k" << "l" */
5564  { {0x0062},                            {0x006c},                          UCOL_LESS }, /* "b" << "l" */
5565  { {0x0061},                            {0x006c},                          UCOL_LESS }, /* "a" < "l" */
5566  { {0x0061},                            {0x006d},                          UCOL_LESS },  /* "a" < "m" */
5567
5568  { {0x0079},                            {0x006d},                          UCOL_LESS },  /* "y" < "f" */
5569  { {0x0079},                            {0x0067},                          UCOL_LESS },  /* "y" < "g" */
5570  { {0x0061},                            {0x0068},                          UCOL_LESS },  /* "y" < "h" */
5571  { {0x0061},                            {0x0065},                          UCOL_LESS },  /* "g" < "e" */
5572
5573  { {0x0061},                            {0x0031},                          UCOL_EQUAL }, /* "a" = "1" */
5574  { {0x0061},                            {0x0032},                          UCOL_EQUAL }, /* "a" = "2" */
5575  { {0x0061},                            {0x0033},                          UCOL_EQUAL }, /* "a" = "3" */
5576  { {0x0061},                            {0x0066},                          UCOL_LESS }, /* "a" < "f" */
5577  { {0x006c, 0x0061},                    {0x006b, 0x0062},                  UCOL_LESS },  /* "la" < "123" */
5578  { {0x0061, 0x0061, 0x0061},            {0x0031, 0x0032, 0x0033},          UCOL_EQUAL }, /* "aaa" = "123" */
5579  { {0x0062},                            {0x007a},                          UCOL_LESS },  /* "b" < "z" */
5580  { {0x0061, 0x007a, 0x0062},            {0x0032, 0x0079, 0x006d},          UCOL_LESS }, /* "azm" = "2yc" */
5581};
5582
5583static int nRangeTestcases = LEN(rangeTestcases);
5584
5585const static OneTestCase rangeTestcasesSupplemental[] = {
5586  { {0xfffe},                            {0xffff},                          UCOL_LESS }, /* U+FFFE < U+FFFF */
5587  { {0xffff},                            {0xd800, 0xdc00},                  UCOL_LESS }, /* U+FFFF < U+10000 */
5588  { {0xd800, 0xdc00},                    {0xd800, 0xdc01},                  UCOL_LESS }, /* U+10000 < U+10001 */
5589  { {0xfffe},                            {0xd800, 0xdc01},                  UCOL_LESS }, /* U+FFFE < U+10001 */
5590  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
5591  { {0xd800, 0xdc01},                    {0xd800, 0xdc02},                  UCOL_LESS }, /* U+10000 < U+10001 */
5592  { {0xfffe},                            {0xd800, 0xdc02},                  UCOL_LESS }, /* U+FFFE < U+10001 */
5593};
5594
5595static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);
5596
5597const static OneTestCase rangeTestcasesQwerty[] = {
5598  { {0x0071},                            {0x0077},                          UCOL_LESS }, /* "q" < "w" */
5599  { {0x0077},                            {0x0065},                          UCOL_LESS }, /* "w" < "e" */
5600
5601  { {0x0079},                            {0x0075},                          UCOL_LESS }, /* "y" < "u" */
5602  { {0x0071},                            {0x0075},                          UCOL_LESS }, /* "q" << "u" */
5603
5604  { {0x0074},                            {0x0069},                          UCOL_LESS }, /* "t" << "i" */
5605  { {0x006f},                            {0x0070},                          UCOL_LESS }, /* "o" << "p" */
5606
5607  { {0x0079},                            {0x0065},                          UCOL_LESS },  /* "y" < "e" */
5608  { {0x0069},                            {0x0075},                          UCOL_LESS },  /* "i" < "u" */
5609
5610  { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},
5611    {0x0077, 0x0065, 0x0072, 0x0065},                                       UCOL_LESS }, /* "quest" < "were" */
5612  { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},
5613    {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},                               UCOL_LESS }, /* "quack" < "quest" */
5614};
5615
5616static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);
5617
5618static void TestSameStrengthList(void)
5619{
5620  const char* strRules[] = {
5621    /* Normal */
5622    "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z  &y<f<g<h<e &a=1=2=3",
5623
5624    /* Lists */
5625    "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123",
5626  };
5627  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5628}
5629
5630static void TestSameStrengthListQuoted(void)
5631{
5632  const char* strRules[] = {
5633    /* Lists with quoted characters */
5634    "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123",
5635    "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123",
5636
5637    "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0032\\u0033",
5638    "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\u0031\\u0032\\u0033'",
5639
5640    "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz  &y<*fghe &a=*\\u0031\\u0032\\u0033",
5641    "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz  &y<*fghe &a=*'\\u0031\\u0032\\u0033'",
5642  };
5643  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5644}
5645
5646static void TestSameStrengthListSupplemental(void)
5647{
5648  const char* strRules[] = {
5649    "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",
5650    "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",
5651    "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",
5652    "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",
5653  };
5654  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5655}
5656
5657static void TestSameStrengthListQwerty(void)
5658{
5659  const char* strRules[] = {
5660    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
5661    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
5662    "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",
5663    "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",
5664    "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064",
5665
5666    /* Quoted characters also will work if two quoted characters are not consecutive.  */
5667    "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u0074<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
5668
5669    /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */
5670    /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",*/
5671
5672 };
5673  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5674}
5675
5676static void TestSameStrengthListQuotedQwerty(void)
5677{
5678  const char* strRules[] = {
5679    "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d",   /* Normal */
5680    "&q<*wer &w<<*tyu &t<<<*iop &o=*asd",             /* Lists  */
5681    "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'",   /* Lists with quotes */
5682
5683    /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */
5684    /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */
5685   };
5686  doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(strRules));
5687}
5688
5689static void TestSameStrengthListRanges(void)
5690{
5691  const char* strRules[] = {
5692    "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3",
5693  };
5694  doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));
5695}
5696
5697static void TestSameStrengthListSupplementalRanges(void)
5698{
5699  const char* strRules[] = {
5700    "&\\ufffe<*\\uffff-\\U00010002",
5701  };
5702  doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, strRules, LEN(strRules));
5703}
5704
5705static void TestSpecialCharacters(void)
5706{
5707  const char* strRules[] = {
5708    /* Normal */
5709    "&';'<'+'<','<'-'<'&'<'*'",
5710
5711    /* List */
5712    "&';'<*'+,-&*'",
5713
5714    /* Range */
5715    "&';'<*'+'-'-&*'",
5716  };
5717
5718  const static OneTestCase specialCharacterStrings[] = {
5719    { {0x003b}, {0x002b}, UCOL_LESS },  /* ; < + */
5720    { {0x002b}, {0x002c}, UCOL_LESS },  /* + < , */
5721    { {0x002c}, {0x002d}, UCOL_LESS },  /* , < - */
5722    { {0x002d}, {0x0026}, UCOL_LESS },  /* - < & */
5723  };
5724  doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRules, LEN(strRules));
5725}
5726
5727static void TestPrivateUseCharacters(void)
5728{
5729  const char* strRules[] = {
5730    /* Normal */
5731    "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",
5732    "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",
5733  };
5734
5735  const static OneTestCase privateUseCharacterStrings[] = {
5736    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5737    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5738    { {0xe2d9}, {0xe2da}, UCOL_LESS },
5739    { {0xe2da}, {0xe2db}, UCOL_LESS },
5740    { {0xe2db}, {0xe2dc}, UCOL_LESS },
5741    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5742  };
5743  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5744}
5745
5746static void TestPrivateUseCharactersInList(void)
5747{
5748  const char* strRules[] = {
5749    /* List */
5750    "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",
5751    /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */
5752    "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",
5753  };
5754
5755  const static OneTestCase privateUseCharacterStrings[] = {
5756    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5757    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5758    { {0xe2d9}, {0xe2da}, UCOL_LESS },
5759    { {0xe2da}, {0xe2db}, UCOL_LESS },
5760    { {0xe2db}, {0xe2dc}, UCOL_LESS },
5761    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5762  };
5763  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5764}
5765
5766static void TestPrivateUseCharactersInRange(void)
5767{
5768  const char* strRules[] = {
5769    /* Range */
5770    "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",
5771    "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",
5772    /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */
5773  };
5774
5775  const static OneTestCase privateUseCharacterStrings[] = {
5776    { {0x5ea7}, {0xe2d8}, UCOL_LESS },
5777    { {0xe2d8}, {0xe2d9}, UCOL_LESS },
5778    { {0xe2d9}, {0xe2da}, UCOL_LESS },
5779    { {0xe2da}, {0xe2db}, UCOL_LESS },
5780    { {0xe2db}, {0xe2dc}, UCOL_LESS },
5781    { {0xe2dc}, {0x4e8d}, UCOL_LESS },
5782  };
5783  doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
5784}
5785
5786static void TestInvalidListsAndRanges(void)
5787{
5788  const char* invalidRules[] = {
5789    /* Range not in starred expression */
5790    "&\\ufffe<\\uffff-\\U00010002",
5791
5792    /* Range without start */
5793    "&a<*-c",
5794
5795    /* Range without end */
5796    "&a<*b-",
5797
5798    /* More than one hyphen */
5799    "&a<*b-g-l",
5800
5801    /* Range in the wrong order */
5802    "&a<*k-b",
5803
5804  };
5805
5806  UChar rule[500];
5807  UErrorCode status = U_ZERO_ERROR;
5808  UParseError parse_error;
5809  int n_rules = LEN(invalidRules);
5810  int rule_no;
5811  int length;
5812  UCollator  *myCollation;
5813
5814  for (rule_no = 0; rule_no < n_rules; ++rule_no) {
5815
5816    length = u_unescape(invalidRules[rule_no], rule, 500);
5817    if (length == 0) {
5818        log_err("ERROR: The rule cannot be unescaped: %s\n");
5819        return;
5820    }
5821    myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_error, &status);
5822    if(!U_FAILURE(status)){
5823      log_err("ERROR: Could not cause a failure as expected: \n");
5824    }
5825    status = U_ZERO_ERROR;
5826  }
5827}
5828
5829/*
5830 * This test ensures that characters placed before a character in a different script have the same lead byte
5831 * in their collation key before and after script reordering.
5832 */
5833static void TestBeforeRuleWithScriptReordering(void)
5834{
5835    UParseError error;
5836    UErrorCode status = U_ZERO_ERROR;
5837    UCollator  *myCollation;
5838    char srules[500] = "&[before 1]\\u03b1 < \\u0e01";
5839    UChar rules[500];
5840    uint32_t rulesLength = 0;
5841    int32_t reorderCodes[1] = {USCRIPT_GREEK};
5842    UCollationResult collResult;
5843
5844    uint8_t baseKey[256];
5845    uint32_t baseKeyLength;
5846    uint8_t beforeKey[256];
5847    uint32_t beforeKeyLength;
5848
5849    UChar base[] = { 0x03b1 }; /* base */
5850    int32_t baseLen = sizeof(base)/sizeof(*base);
5851
5852    UChar before[] = { 0x0e01 }; /* ko kai */
5853    int32_t beforeLen = sizeof(before)/sizeof(*before);
5854
5855    /*UChar *data[] = { before, base };
5856    genericRulesStarter(srules, data, 2);*/
5857
5858    log_verbose("Testing the &[before 1] rule with [reorder grek]\n");
5859
5860
5861    /* build collator */
5862    log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");
5863
5864    rulesLength = u_unescape(srules, rules, LEN(rules));
5865    myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &error, &status);
5866    if(U_FAILURE(status)) {
5867        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
5868        return;
5869    }
5870
5871    /* check collation results - before rule applied but not script reordering */
5872    collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5873    if (collResult != UCOL_GREATER) {
5874        log_err("Collation result not correct before script reordering = %d\n", collResult);
5875    }
5876
5877    /* check the lead byte of the collation keys before script reordering */
5878    baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5879    beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5880    if (baseKey[0] != beforeKey[0]) {
5881      log_err("Different lead byte for sort keys using before rule and before script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5882   }
5883
5884    /* reorder the scripts */
5885    ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);
5886    if(U_FAILURE(status)) {
5887        log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
5888        return;
5889    }
5890
5891    /* check collation results - before rule applied and after script reordering */
5892    collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);
5893    if (collResult != UCOL_GREATER) {
5894        log_err("Collation result not correct after script reordering = %d\n", collResult);
5895    }
5896
5897    /* check the lead byte of the collation keys after script reordering */
5898    ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);
5899    ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);
5900    if (baseKey[0] != beforeKey[0]) {
5901        log_err("Different lead byte for sort keys using before fule and after script reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);
5902    }
5903
5904    ucol_close(myCollation);
5905}
5906
5907/*
5908 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.
5909 */
5910static void TestNonLeadBytesDuringCollationReordering(void)
5911{
5912    UErrorCode status = U_ZERO_ERROR;
5913    UCollator  *myCollation;
5914    int32_t reorderCodes[1] = {USCRIPT_GREEK};
5915
5916    uint8_t baseKey[256];
5917    uint32_t baseKeyLength;
5918    uint8_t reorderKey[256];
5919    uint32_t reorderKeyLength;
5920
5921    UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };
5922
5923    uint32_t i;
5924
5925
5926    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
5927
5928    /* build collator tertiary */
5929    myCollation = ucol_open("", &status);
5930    ucol_setStrength(myCollation, UCOL_TERTIARY);
5931    if(U_FAILURE(status)) {
5932        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5933        return;
5934    }
5935    baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5936
5937    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5938    if(U_FAILURE(status)) {
5939        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5940        return;
5941    }
5942    reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5943
5944    if (baseKeyLength != reorderKeyLength) {
5945        log_err("Key lengths not the same during reordering.\n");
5946        return;
5947    }
5948
5949    for (i = 1; i < baseKeyLength; i++) {
5950        if (baseKey[i] != reorderKey[i]) {
5951            log_err("Collation key bytes not the same at position %d.\n", i);
5952            return;
5953        }
5954    }
5955    ucol_close(myCollation);
5956
5957    /* build collator quaternary */
5958    myCollation = ucol_open("", &status);
5959    ucol_setStrength(myCollation, UCOL_QUATERNARY);
5960    if(U_FAILURE(status)) {
5961        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
5962        return;
5963    }
5964    baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), baseKey, 256);
5965
5966    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
5967    if(U_FAILURE(status)) {
5968        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
5969        return;
5970    }
5971    reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);
5972
5973    if (baseKeyLength != reorderKeyLength) {
5974        log_err("Key lengths not the same during reordering.\n");
5975        return;
5976    }
5977
5978    for (i = 1; i < baseKeyLength; i++) {
5979        if (baseKey[i] != reorderKey[i]) {
5980            log_err("Collation key bytes not the same at position %d.\n", i);
5981            return;
5982        }
5983    }
5984    ucol_close(myCollation);
5985}
5986
5987/*
5988 * Test reordering API.
5989 */
5990static void TestReorderingAPI(void)
5991{
5992    UErrorCode status = U_ZERO_ERROR;
5993    UCollator  *myCollation;
5994    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5995    int32_t duplicateReorderCodes[] = {USCRIPT_CUNEIFORM, USCRIPT_GREEK, UCOL_REORDER_CODE_CURRENCY, USCRIPT_EGYPTIAN_HIEROGLYPHS};
5996    int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
5997    UCollationResult collResult;
5998    int32_t retrievedReorderCodesLength;
5999    int32_t retrievedReorderCodes[10];
6000    UChar greekString[] = { 0x03b1 };
6001    UChar punctuationString[] = { 0x203e };
6002    int loopIndex;
6003
6004    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6005
6006    /* build collator tertiary */
6007    myCollation = ucol_open("", &status);
6008    ucol_setStrength(myCollation, UCOL_TERTIARY);
6009    if(U_FAILURE(status)) {
6010        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6011        return;
6012    }
6013
6014    /* set the reorderding */
6015    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6016    if (U_FAILURE(status)) {
6017        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6018        return;
6019    }
6020
6021    /* get the reordering */
6022    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6023    if (status != U_BUFFER_OVERFLOW_ERROR) {
6024        log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
6025        return;
6026    }
6027    status = U_ZERO_ERROR;
6028    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6029        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6030        return;
6031    }
6032    /* now let's really get it */
6033    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6034    if (U_FAILURE(status)) {
6035        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6036        return;
6037    }
6038    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6039        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6040        return;
6041    }
6042    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6043        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6044            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6045            return;
6046        }
6047    }
6048    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6049    if (collResult != UCOL_LESS) {
6050        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6051        return;
6052    }
6053
6054    /* clear the reordering */
6055    ucol_setReorderCodes(myCollation, NULL, 0, &status);
6056    if (U_FAILURE(status)) {
6057        log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
6058        return;
6059    }
6060
6061    /* get the reordering again */
6062    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6063    if (retrievedReorderCodesLength != 0) {
6064        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6065        return;
6066    }
6067
6068    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6069    if (collResult != UCOL_GREATER) {
6070        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6071        return;
6072    }
6073
6074    /* test for error condition on duplicate reorder codes */
6075    ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorderCodes), &status);
6076    if (!U_FAILURE(status)) {
6077        log_err_status(status, "ERROR: setting duplicate reorder codes did not generate a failure\n");
6078        return;
6079    }
6080
6081    status = U_ZERO_ERROR;
6082    /* test for reorder codes after a reset code */
6083    ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reorderCodesStartingWithDefault), &status);
6084    if (!U_FAILURE(status)) {
6085        log_err_status(status, "ERROR: reorderd code sequence starting with default and having following codes didn't cause an error\n");
6086        return;
6087    }
6088
6089    ucol_close(myCollation);
6090}
6091
6092/*
6093 * Test reordering API.
6094 */
6095static void TestReorderingAPIWithRuleCreatedCollator(void)
6096{
6097    UErrorCode status = U_ZERO_ERROR;
6098    UCollator  *myCollation;
6099    UChar rules[90];
6100    int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};
6101    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6102    UCollationResult collResult;
6103    int32_t retrievedReorderCodesLength;
6104    int32_t retrievedReorderCodes[10];
6105    UChar greekString[] = { 0x03b1 };
6106    UChar punctuationString[] = { 0x203e };
6107    UChar hanString[] = { 0x65E5, 0x672C };
6108    int loopIndex;
6109
6110    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6111
6112    /* build collator from rules */
6113    u_uastrcpy(rules, "[reorder Hani Grek]");
6114    myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERTIARY, NULL, &status);
6115    if(U_FAILURE(status)) {
6116        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6117        return;
6118    }
6119
6120    /* get the reordering */
6121    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6122    if (U_FAILURE(status)) {
6123        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6124        return;
6125    }
6126    if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {
6127        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));
6128        return;
6129    }
6130    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6131        if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {
6132            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6133            return;
6134        }
6135    }
6136    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanString, LEN(hanString));
6137    if (collResult != UCOL_GREATER) {
6138        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6139        return;
6140    }
6141
6142
6143    /* set the reorderding */
6144    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6145    if (U_FAILURE(status)) {
6146        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6147        return;
6148    }
6149
6150    /* get the reordering */
6151    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6152    if (status != U_BUFFER_OVERFLOW_ERROR) {
6153        log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));
6154        return;
6155    }
6156    status = U_ZERO_ERROR;
6157    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6158        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6159        return;
6160    }
6161    /* now let's really get it */
6162    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6163    if (U_FAILURE(status)) {
6164        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6165        return;
6166    }
6167    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6168        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6169        return;
6170    }
6171    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6172        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6173            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6174            return;
6175        }
6176    }
6177    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6178    if (collResult != UCOL_LESS) {
6179        log_err_status(status, "ERROR: collation result should have been UCOL_LESS\n");
6180        return;
6181    }
6182
6183    /* clear the reordering */
6184    ucol_setReorderCodes(myCollation, NULL, 0, &status);
6185    if (U_FAILURE(status)) {
6186        log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myErrorName(status));
6187        return;
6188    }
6189
6190    /* get the reordering again */
6191    retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &status);
6192    if (retrievedReorderCodesLength != 0) {
6193        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);
6194        return;
6195    }
6196
6197    collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctuationString, LEN(punctuationString));
6198    if (collResult != UCOL_GREATER) {
6199        log_err_status(status, "ERROR: collation result should have been UCOL_GREATER\n");
6200        return;
6201    }
6202
6203    ucol_close(myCollation);
6204}
6205
6206static int compareUScriptCodes(const void * a, const void * b)
6207{
6208  return ( *(int32_t*)a - *(int32_t*)b );
6209}
6210
6211static void TestEquivalentReorderingScripts(void) {
6212    UErrorCode status = U_ZERO_ERROR;
6213    int32_t equivalentScripts[50];
6214    int32_t equivalentScriptsLength;
6215    int loopIndex;
6216    int32_t equivalentScriptsResult[] = {
6217        USCRIPT_BOPOMOFO,
6218        USCRIPT_LISU,
6219        USCRIPT_LYCIAN,
6220        USCRIPT_CARIAN,
6221        USCRIPT_LYDIAN,
6222        USCRIPT_YI,
6223        USCRIPT_OLD_ITALIC,
6224        USCRIPT_GOTHIC,
6225        USCRIPT_DESERET,
6226        USCRIPT_SHAVIAN,
6227        USCRIPT_OSMANYA,
6228        USCRIPT_LINEAR_B,
6229        USCRIPT_CYPRIOT,
6230        USCRIPT_OLD_SOUTH_ARABIAN,
6231        USCRIPT_AVESTAN,
6232        USCRIPT_IMPERIAL_ARAMAIC,
6233        USCRIPT_INSCRIPTIONAL_PARTHIAN,
6234        USCRIPT_INSCRIPTIONAL_PAHLAVI,
6235        USCRIPT_UGARITIC,
6236        USCRIPT_OLD_PERSIAN,
6237        USCRIPT_CUNEIFORM,
6238        USCRIPT_EGYPTIAN_HIEROGLYPHS
6239    };
6240
6241    qsort(equivalentScriptsResult, LEN(equivalentScriptsResult), sizeof(int32_t), compareUScriptCodes);
6242
6243    /* UScript.GOTHIC */
6244    equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);
6245    if (U_FAILURE(status)) {
6246        log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
6247        return;
6248    }
6249    /*
6250    fprintf(stdout, "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
6251    fprintf(stdout, "equivalentScriptsLength = %d\n", equivalentScriptsLength);
6252    for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6253        fprintf(stdout, "%d = %x\n", loopIndex, equivalentScripts[loopIndex]);
6254    }
6255    */
6256    if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
6257        log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
6258        return;
6259    }
6260    for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6261        if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
6262            log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
6263            return;
6264        }
6265    }
6266
6267    /* UScript.SHAVIAN */
6268    equivalentScriptsLength = ucol_getEquivalentReorderCodes(USCRIPT_SHAVIAN, equivalentScripts, LEN(equivalentScripts), &status);
6269    if (U_FAILURE(status)) {
6270        log_err_status(status, "ERROR: retrieving equivalent reorder codes: %s\n", myErrorName(status));
6271        return;
6272    }
6273    if (equivalentScriptsLength != LEN(equivalentScriptsResult)) {
6274        log_err_status(status, "ERROR: retrieved equivalent script length wrong: expected = %d, was = %d\n", LEN(equivalentScriptsResult), equivalentScriptsLength);
6275        return;
6276    }
6277    for (loopIndex = 0; loopIndex < equivalentScriptsLength; loopIndex++) {
6278        if (equivalentScriptsResult[loopIndex] != equivalentScripts[loopIndex]) {
6279            log_err_status(status, "ERROR: equivalent scripts results don't match: expected = %d, was = %d\n", equivalentScriptsResult[loopIndex], equivalentScripts[loopIndex]);
6280            return;
6281        }
6282    }
6283}
6284
6285static void TestReorderingAcrossCloning(void)
6286{
6287    UErrorCode status = U_ZERO_ERROR;
6288    UCollator  *myCollation;
6289    int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};
6290    UCollator *clonedCollation;
6291    int32_t bufferSize;
6292    int32_t retrievedReorderCodesLength;
6293    int32_t retrievedReorderCodes[10];
6294    int loopIndex;
6295
6296    log_verbose("Testing non-lead bytes in a sort key with and without reordering\n");
6297
6298    /* build collator tertiary */
6299    myCollation = ucol_open("", &status);
6300    ucol_setStrength(myCollation, UCOL_TERTIARY);
6301    if(U_FAILURE(status)) {
6302        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6303        return;
6304    }
6305
6306    /* set the reorderding */
6307    ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);
6308    if (U_FAILURE(status)) {
6309        log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName(status));
6310        return;
6311    }
6312
6313    /* clone the collator */
6314    clonedCollation = ucol_safeClone(myCollation, NULL, &bufferSize, &status);
6315    if (U_FAILURE(status)) {
6316        log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(status));
6317        return;
6318    }
6319
6320    /* get the reordering */
6321    retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrievedReorderCodes, LEN(retrievedReorderCodes), &status);
6322    if (U_FAILURE(status)) {
6323        log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName(status));
6324        return;
6325    }
6326    if (retrievedReorderCodesLength != LEN(reorderCodes)) {
6327        log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));
6328        return;
6329    }
6330    for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {
6331        if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {
6332            log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);
6333            return;
6334        }
6335    }
6336
6337    /*uprv_free(buffer);*/
6338    ucol_close(myCollation);
6339    ucol_close(clonedCollation);
6340}
6341
6342/*
6343 * Utility function to test one collation reordering test case set.
6344 * @param testcases Array of test cases.
6345 * @param n_testcases Size of the array testcases.
6346 * @param reorderTokens Array of reordering codes.
6347 * @param reorderTokensLen Size of the array reorderTokens.
6348 */
6349static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)
6350{
6351    uint32_t testCaseNum;
6352    UErrorCode status = U_ZERO_ERROR;
6353    UCollator  *myCollation;
6354
6355    myCollation = ucol_open("", &status);
6356    if (U_FAILURE(status)) {
6357        log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorName(status));
6358        return;
6359    }
6360    ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);
6361    if(U_FAILURE(status)) {
6362        log_err_status(status, "ERROR: while setting script order: %s\n", myErrorName(status));
6363        return;
6364    }
6365
6366    for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {
6367        doTest(myCollation,
6368            testCases[testCaseNum].source,
6369            testCases[testCaseNum].target,
6370            testCases[testCaseNum].result
6371        );
6372    }
6373    ucol_close(myCollation);
6374}
6375
6376static void TestGreekFirstReorder(void)
6377{
6378    const char* strRules[] = {
6379        "[reorder Grek]"
6380    };
6381
6382    const int32_t apiRules[] = {
6383        USCRIPT_GREEK
6384    };
6385
6386    const static OneTestCase privateUseCharacterStrings[] = {
6387        { {0x0391}, {0x0391}, UCOL_EQUAL },
6388        { {0x0041}, {0x0391}, UCOL_GREATER },
6389        { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },
6390        { {0x0060}, {0x0391}, UCOL_LESS },
6391        { {0x0391}, {0xe2dc}, UCOL_LESS },
6392        { {0x0391}, {0x0060}, UCOL_GREATER },
6393    };
6394
6395    /* Test rules creation */
6396    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6397
6398    /* Test collation reordering API */
6399    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6400}
6401
6402static void TestGreekLastReorder(void)
6403{
6404    const char* strRules[] = {
6405        "[reorder Zzzz Grek]"
6406    };
6407
6408    const int32_t apiRules[] = {
6409        USCRIPT_UNKNOWN, USCRIPT_GREEK
6410    };
6411
6412    const static OneTestCase privateUseCharacterStrings[] = {
6413        { {0x0391}, {0x0391}, UCOL_EQUAL },
6414        { {0x0041}, {0x0391}, UCOL_LESS },
6415        { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },
6416        { {0x0060}, {0x0391}, UCOL_LESS },
6417        { {0x0391}, {0xe2dc}, UCOL_GREATER },
6418    };
6419
6420    /* Test rules creation */
6421    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6422
6423    /* Test collation reordering API */
6424    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6425}
6426
6427static void TestNonScriptReorder(void)
6428{
6429    const char* strRules[] = {
6430        "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"
6431    };
6432
6433    const int32_t apiRules[] = {
6434        USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
6435        UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
6436        UCOL_REORDER_CODE_CURRENCY
6437    };
6438
6439    const static OneTestCase privateUseCharacterStrings[] = {
6440        { {0x0391}, {0x0041}, UCOL_LESS },
6441        { {0x0041}, {0x0391}, UCOL_GREATER },
6442        { {0x0060}, {0x0041}, UCOL_LESS },
6443        { {0x0060}, {0x0391}, UCOL_GREATER },
6444        { {0x0024}, {0x0041}, UCOL_GREATER },
6445    };
6446
6447    /* Test rules creation */
6448    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6449
6450    /* Test collation reordering API */
6451    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6452}
6453
6454static void TestHaniReorder(void)
6455{
6456    const char* strRules[] = {
6457        "[reorder Hani]"
6458    };
6459    const int32_t apiRules[] = {
6460        USCRIPT_HAN
6461    };
6462
6463    const static OneTestCase privateUseCharacterStrings[] = {
6464        { {0x4e00}, {0x0041}, UCOL_LESS },
6465        { {0x4e00}, {0x0060}, UCOL_GREATER },
6466        { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },
6467        { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },
6468        { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },
6469        { {0xfa27}, {0x0041}, UCOL_LESS },
6470        { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },
6471    };
6472
6473    /* Test rules creation */
6474    doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));
6475
6476    /* Test collation reordering API */
6477    doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), apiRules, LEN(apiRules));
6478}
6479
6480static void TestMultipleReorder(void)
6481{
6482    const char* strRules[] = {
6483        "[reorder Grek Zzzz DIGIT Latn Hani]"
6484    };
6485
6486    const int32_t apiRules[] = {
6487        USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN
6488    };
6489
6490    const static OneTestCase collationTestCases[] = {
6491        { {0x0391}, {0x0041}, UCOL_LESS},
6492        { {0x0031}, {0x0041}, UCOL_LESS},
6493        { {0x0041}, {0x4e00}, UCOL_LESS},
6494    };
6495
6496    /* Test rules creation */
6497    doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN(strRules));
6498
6499    /* Test collation reordering API */
6500    doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));
6501}
6502
6503static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)
6504{
6505  for (; *a == *b; ++a, ++b) {
6506    if (*a == 0) {
6507      return 0;
6508    }
6509  }
6510  return (*a < *b ? -1 : 1);
6511}
6512
6513static void TestImport(void)
6514{
6515    UCollator* vicoll;
6516    UCollator* escoll;
6517    UCollator* viescoll;
6518    UCollator* importviescoll;
6519    UParseError error;
6520    UErrorCode status = U_ZERO_ERROR;
6521    UChar* virules;
6522    int32_t viruleslength;
6523    UChar* esrules;
6524    int32_t esruleslength;
6525    UChar* viesrules;
6526    int32_t viesruleslength;
6527    char srules[500] = "[import vi][import es]";
6528    UChar rules[500];
6529    uint32_t length = 0;
6530    int32_t itemCount;
6531    int32_t i, k;
6532    UChar32 start;
6533    UChar32 end;
6534    UChar str[500];
6535    int32_t strLength;
6536
6537    uint8_t sk1[500];
6538    uint8_t sk2[500];
6539
6540    UBool b;
6541    USet* tailoredSet;
6542    USet* importTailoredSet;
6543
6544
6545    vicoll = ucol_open("vi", &status);
6546    if(U_FAILURE(status)){
6547        log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErrorName(status));
6548        return;
6549    }
6550
6551    virules = (UChar*) ucol_getRules(vicoll, &viruleslength);
6552    escoll = ucol_open("es", &status);
6553    esrules = (UChar*) ucol_getRules(escoll, &esruleslength);
6554    viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar*));
6555    viesrules[0] = 0;
6556    u_strcat(viesrules, virules);
6557    u_strcat(viesrules, esrules);
6558    viesruleslength = viruleslength + esruleslength;
6559    viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6560
6561    /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6562    length = u_unescape(srules, rules, 500);
6563    importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6564    if(U_FAILURE(status)){
6565        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6566        return;
6567    }
6568
6569    tailoredSet = ucol_getTailoredSet(viescoll, &status);
6570    importTailoredSet = ucol_getTailoredSet(importviescoll, &status);
6571
6572    if(!uset_equals(tailoredSet, importTailoredSet)){
6573        log_err("Tailored sets not equal");
6574    }
6575
6576    uset_close(importTailoredSet);
6577
6578    itemCount = uset_getItemCount(tailoredSet);
6579
6580    for( i = 0; i < itemCount; i++){
6581        strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6582        if(strLength < 2){
6583            for (; start <= end; start++){
6584                k = 0;
6585                U16_APPEND(str, k, 500, start, b);
6586                ucol_getSortKey(viescoll, str, 1, sk1, 500);
6587                ucol_getSortKey(importviescoll, str, 1, sk2, 500);
6588                if(compare_uint8_t_arrays(sk1, sk2) != 0){
6589                    log_err("Sort key for %s not equal\n", str);
6590                    break;
6591                }
6592            }
6593        }else{
6594            ucol_getSortKey(viescoll, str, strLength, sk1, 500);
6595            ucol_getSortKey(importviescoll, str, strLength, sk2, 500);
6596            if(compare_uint8_t_arrays(sk1, sk2) != 0){
6597                log_err("ZZSort key for %s not equal\n", str);
6598                break;
6599            }
6600
6601        }
6602    }
6603
6604    uset_close(tailoredSet);
6605
6606    uprv_free(viesrules);
6607
6608    ucol_close(vicoll);
6609    ucol_close(escoll);
6610    ucol_close(viescoll);
6611    ucol_close(importviescoll);
6612}
6613
6614static void TestImportWithType(void)
6615{
6616    UCollator* vicoll;
6617    UCollator* decoll;
6618    UCollator* videcoll;
6619    UCollator* importvidecoll;
6620    UParseError error;
6621    UErrorCode status = U_ZERO_ERROR;
6622    const UChar* virules;
6623    int32_t viruleslength;
6624    const UChar* derules;
6625    int32_t deruleslength;
6626    UChar* viderules;
6627    int32_t videruleslength;
6628    const char srules[500] = "[import vi][import de-u-co-phonebk]";
6629    UChar rules[500];
6630    uint32_t length = 0;
6631    int32_t itemCount;
6632    int32_t i, k;
6633    UChar32 start;
6634    UChar32 end;
6635    UChar str[500];
6636    int32_t strLength;
6637
6638    uint8_t sk1[500];
6639    uint8_t sk2[500];
6640
6641    USet* tailoredSet;
6642    USet* importTailoredSet;
6643
6644    vicoll = ucol_open("vi", &status);
6645    if(U_FAILURE(status)){
6646        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6647        return;
6648    }
6649    virules = ucol_getRules(vicoll, &viruleslength);
6650    /* decoll = ucol_open("de@collation=phonebook", &status); */
6651    decoll = ucol_open("de-u-co-phonebk", &status);
6652    if(U_FAILURE(status)){
6653        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6654        return;
6655    }
6656
6657
6658    derules = ucol_getRules(decoll, &deruleslength);
6659    viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar*));
6660    viderules[0] = 0;
6661    u_strcat(viderules, virules);
6662    u_strcat(viderules, derules);
6663    videruleslength = viruleslength + deruleslength;
6664    videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY, &error, &status);
6665
6666    /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */
6667    length = u_unescape(srules, rules, 500);
6668    importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &error, &status);
6669    if(U_FAILURE(status)){
6670        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
6671        return;
6672    }
6673
6674    tailoredSet = ucol_getTailoredSet(videcoll, &status);
6675    importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);
6676
6677    if(!uset_equals(tailoredSet, importTailoredSet)){
6678        log_err("Tailored sets not equal");
6679    }
6680
6681    uset_close(importTailoredSet);
6682
6683    itemCount = uset_getItemCount(tailoredSet);
6684
6685    for( i = 0; i < itemCount; i++){
6686        strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status);
6687        if(strLength < 2){
6688            for (; start <= end; start++){
6689                k = 0;
6690                U16_APPEND_UNSAFE(str, k, start);
6691                ucol_getSortKey(videcoll, str, 1, sk1, 500);
6692                ucol_getSortKey(importvidecoll, str, 1, sk2, 500);
6693                if(compare_uint8_t_arrays(sk1, sk2) != 0){
6694                    log_err("Sort key for %s not equal\n", str);
6695                    break;
6696                }
6697            }
6698        }else{
6699            ucol_getSortKey(videcoll, str, strLength, sk1, 500);
6700            ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);
6701            if(compare_uint8_t_arrays(sk1, sk2) != 0){
6702                log_err("Sort key for %s not equal\n", str);
6703                break;
6704            }
6705
6706        }
6707    }
6708
6709    uset_close(tailoredSet);
6710
6711    uprv_free(viderules);
6712
6713    ucol_close(videcoll);
6714    ucol_close(importvidecoll);
6715    ucol_close(vicoll);
6716    ucol_close(decoll);
6717}
6718
6719/* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMICS AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */
6720static const UChar longUpperStr1[]= { /* 155 chars */
6721    0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C,
6722    0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D, 0x20, 0x50, 0x52,
6723    0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45, 0x52, 0x45, 0x4E,
6724    0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49, 0x43, 0x53, 0x2C,
6725    0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53, 0x20, 0x41, 0x4E,
6726    0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x41, 0x4C, 0x20,
6727    0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F, 0x42, 0x4C, 0x45,
6728    0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E, 0x65, 0x20, 0x32,
6729    0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65, 0x72, 0x73, 0x62,
6730    0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61
6731};
6732
6733/* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */
6734static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */
6735    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6736    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6737    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6738    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,
6739    0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20
6740};
6741
6742/* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */
6743static const UChar longUpperStr3[]= { /* 324 chars */
6744    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6745    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6746    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6747    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6748    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6749    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6750    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6751    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6752    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6753    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6754    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,
6755    0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20
6756};
6757
6758#define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))
6759
6760typedef struct {
6761    const UChar * longUpperStrPtr;
6762    int32_t       longUpperStrLen;
6763} LongUpperStrItem;
6764
6765/* String pointers must be in reverse collation order of the corresponding strings */
6766static const LongUpperStrItem longUpperStrItems[] = {
6767    { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },
6768    { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },
6769    { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },
6770    { NULL,          0                           }
6771};
6772
6773enum { kCollKeyLenMax = 800 }; /* longest expected is 749, but may change with collation changes */
6774
6775/* Text fix for #8445; without fix, could have crash due to stack or heap corruption */
6776static void TestCaseLevelBufferOverflow(void)
6777{
6778    UErrorCode status = U_ZERO_ERROR;
6779    UCollator * ucol = ucol_open("root", &status);
6780    if ( U_SUCCESS(status) ) {
6781        ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);
6782        if ( U_SUCCESS(status) ) {
6783            const LongUpperStrItem * itemPtr;
6784            uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];
6785            for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {
6786                int32_t sortKeyLen;
6787                if (itemPtr > longUpperStrItems) {
6788                    uprv_strcpy((char *)sortKeyB, (char *)sortKeyA);
6789                }
6790                sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, itemPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);
6791                if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) {
6792                    log_err("ERROR sort key length from ucol_getSortKey is %d\n", sortKeyLen);
6793                    break;
6794                }
6795                if ( itemPtr > longUpperStrItems ) {
6796                    int compareResult = uprv_strcmp((char *)sortKeyA, (char *)sortKeyB);
6797                    if (compareResult >= 0) {
6798                        log_err("ERROR in sort key comparison result, expected -1, got %d\n", compareResult);
6799                    }
6800                }
6801            }
6802        } else {
6803            log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL on: %s\n", myErrorName(status));
6804        }
6805        ucol_close(ucol);
6806    } else {
6807        log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(status));
6808    }
6809}
6810
6811
6812#define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)
6813
6814void addMiscCollTest(TestNode** root)
6815{
6816    TEST(TestRuleOptions);
6817    TEST(TestBeforePrefixFailure);
6818    TEST(TestContractionClosure);
6819    TEST(TestPrefixCompose);
6820    TEST(TestStrCollIdenticalPrefix);
6821    TEST(TestPrefix);
6822    TEST(TestNewJapanese);
6823    /*TEST(TestLimitations);*/
6824    TEST(TestNonChars);
6825    TEST(TestExtremeCompression);
6826    TEST(TestSurrogates);
6827    /* BEGIN android-removed
6828       To save space, Android does not include the collation tailoring rules.
6829       We skip the tailing tests for collations. */
6830    /* TEST(TestVariableTopSetting); */
6831    /* END android-removed */
6832    TEST(TestBocsuCoverage);
6833    TEST(TestCyrillicTailoring);
6834    TEST(TestCase);
6835    TEST(IncompleteCntTest);
6836    TEST(BlackBirdTest);
6837    TEST(FunkyATest);
6838    TEST(BillFairmanTest);
6839    TEST(RamsRulesTest);
6840    TEST(IsTailoredTest);
6841    TEST(TestCollations);
6842    TEST(TestChMove);
6843    TEST(TestImplicitTailoring);
6844    TEST(TestFCDProblem);
6845    TEST(TestEmptyRule);
6846    /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */
6847    TEST(TestJ815);
6848    /*TEST(TestJ831);*/ /* we changed lv locale */
6849    TEST(TestBefore);
6850    TEST(TestRedundantRules);
6851    TEST(TestExpansionSyntax);
6852    TEST(TestHangulTailoring);
6853    TEST(TestUCARules);
6854    TEST(TestIncrementalNormalize);
6855    TEST(TestComposeDecompose);
6856    TEST(TestCompressOverlap);
6857    TEST(TestContraction);
6858    TEST(TestExpansion);
6859    /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys */
6860    /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported */
6861    TEST(TestOptimize);
6862    TEST(TestSuppressContractions);
6863    TEST(Alexis2);
6864    TEST(TestHebrewUCA);
6865    TEST(TestPartialSortKeyTermination);
6866    TEST(TestSettings);
6867    TEST(TestEquals);
6868    TEST(TestJ2726);
6869    TEST(NullRule);
6870    TEST(TestNumericCollation);
6871    TEST(TestTibetanConformance);
6872    TEST(TestPinyinProblem);
6873    TEST(TestImplicitGeneration);
6874    TEST(TestSeparateTrees);
6875    TEST(TestBeforePinyin);
6876    TEST(TestBeforeTightening);
6877    /*TEST(TestMoreBefore);*/
6878    TEST(TestTailorNULL);
6879    TEST(TestUpperFirstQuaternary);
6880    TEST(TestJ4960);
6881    TEST(TestJ5223);
6882    TEST(TestJ5232);
6883    TEST(TestJ5367);
6884    TEST(TestHiragana);
6885    TEST(TestSortKeyConsistency);
6886    TEST(TestVI5913);  /* VI, RO tailored rules */
6887    TEST(TestCroatianSortKey);
6888    TEST(TestTailor6179);
6889    TEST(TestUCAPrecontext);
6890    TEST(TestOutOfBuffer5468);
6891    TEST(TestSameStrengthList);
6892
6893    TEST(TestSameStrengthListQuoted);
6894    TEST(TestSameStrengthListSupplemental);
6895    TEST(TestSameStrengthListQwerty);
6896    TEST(TestSameStrengthListQuotedQwerty);
6897    TEST(TestSameStrengthListRanges);
6898    TEST(TestSameStrengthListSupplementalRanges);
6899    TEST(TestSpecialCharacters);
6900    TEST(TestPrivateUseCharacters);
6901    TEST(TestPrivateUseCharactersInList);
6902    TEST(TestPrivateUseCharactersInRange);
6903    TEST(TestInvalidListsAndRanges);
6904    /* BEGIN android-removed: Due to Android does not include reverse UCA table.
6905    TEST(TestImport);
6906    TEST(TestImportWithType);
6907    END android-removed */
6908
6909    TEST(TestBeforeRuleWithScriptReordering);
6910    TEST(TestNonLeadBytesDuringCollationReordering);
6911    TEST(TestReorderingAPI);
6912    TEST(TestReorderingAPIWithRuleCreatedCollator);
6913    TEST(TestEquivalentReorderingScripts);
6914    TEST(TestGreekFirstReorder);
6915    TEST(TestGreekLastReorder);
6916    TEST(TestNonScriptReorder);
6917    TEST(TestHaniReorder);
6918    TEST(TestMultipleReorder);
6919    TEST(TestReorderingAcrossCloning);
6920
6921    TEST(TestCaseLevelBufferOverflow);
6922}
6923
6924#endif /* #if !UCONFIG_NO_COLLATION */
6925