1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2009, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File CITERTST.C
9*
10* Modification History:
11* Date      Name               Description
12*           Madhu Katragadda   Ported for C API
13* 02/19/01  synwee             Modified test case for new collation iterator
14*********************************************************************************/
15/*
16 * Collation Iterator tests.
17 * (Let me reiterate my position...)
18 */
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_COLLATION
23
24#include "unicode/ucol.h"
25#include "unicode/uloc.h"
26#include "unicode/uchar.h"
27#include "unicode/ustring.h"
28#include "unicode/putil.h"
29#include "callcoll.h"
30#include "cmemory.h"
31#include "cintltst.h"
32#include "citertst.h"
33#include "ccolltst.h"
34#include "filestrm.h"
35#include "cstring.h"
36#include "ucol_imp.h"
37#include "ucol_tok.h"
38#include <stdio.h>
39
40extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
41
42void addCollIterTest(TestNode** root)
43{
44    addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
45    addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
46    addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
47    addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
48    addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
49    addTest(root, &TestNormalizedUnicodeChar,
50                                "tscoll/citertst/TestNormalizedUnicodeChar");
51    addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
52    addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
53    addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
54    addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
55    addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
56    addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
57    addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
58    addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
59    addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
60}
61
62/* The locales we support */
63
64static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
65
66static void TestBug672() {
67    UErrorCode  status = U_ZERO_ERROR;
68    UChar       pattern[20];
69    UChar       text[50];
70    int         i;
71    int         result[3][3];
72
73    u_uastrcpy(pattern, "resume");
74    u_uastrcpy(text, "Time to resume updating my resume.");
75
76    for (i = 0; i < 3; ++ i) {
77        UCollator          *coll = ucol_open(LOCALES[i], &status);
78        UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
79                                                     &status);
80        UCollationElements *titer = ucol_openElements(coll, text, -1,
81                                                     &status);
82        if (U_FAILURE(status)) {
83            log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
84                    myErrorName(status));
85            return;
86        }
87
88        log_verbose("locale tested %s\n", LOCALES[i]);
89
90        while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
91               U_SUCCESS(status)) {
92        }
93        if (U_FAILURE(status)) {
94            log_err("ERROR: reversing collation iterator :%s\n",
95                    myErrorName(status));
96            return;
97        }
98        ucol_reset(pitr);
99
100        ucol_setOffset(titer, u_strlen(pattern), &status);
101        if (U_FAILURE(status)) {
102            log_err("ERROR: setting offset in collator :%s\n",
103                    myErrorName(status));
104            return;
105        }
106        result[i][0] = ucol_getOffset(titer);
107        log_verbose("Text iterator set to offset %d\n", result[i][0]);
108
109        /* Use previous() */
110        ucol_previous(titer, &status);
111        result[i][1] = ucol_getOffset(titer);
112        log_verbose("Current offset %d after previous\n", result[i][1]);
113
114        /* Add one to index */
115        log_verbose("Adding one to current offset...\n");
116        ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
117        if (U_FAILURE(status)) {
118            log_err("ERROR: setting offset in collator :%s\n",
119                    myErrorName(status));
120            return;
121        }
122        result[i][2] = ucol_getOffset(titer);
123        log_verbose("Current offset in text = %d\n", result[i][2]);
124        ucol_closeElements(pitr);
125        ucol_closeElements(titer);
126        ucol_close(coll);
127    }
128
129    if (uprv_memcmp(result[0], result[1], 3) != 0 ||
130        uprv_memcmp(result[1], result[2], 3) != 0) {
131        log_err("ERROR: Different locales have different offsets at the same character\n");
132    }
133}
134
135
136
137/*  Running this test with normalization enabled showed up a bug in the incremental
138    normalization code. */
139static void TestBug672Normalize() {
140    UErrorCode  status = U_ZERO_ERROR;
141    UChar       pattern[20];
142    UChar       text[50];
143    int         i;
144    int         result[3][3];
145
146    u_uastrcpy(pattern, "resume");
147    u_uastrcpy(text, "Time to resume updating my resume.");
148
149    for (i = 0; i < 3; ++ i) {
150        UCollator          *coll = ucol_open(LOCALES[i], &status);
151        UCollationElements *pitr = NULL;
152        UCollationElements *titer = NULL;
153
154        ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
155
156        pitr = ucol_openElements(coll, pattern, -1, &status);
157        titer = ucol_openElements(coll, text, -1, &status);
158        if (U_FAILURE(status)) {
159            log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
160                    myErrorName(status));
161            return;
162        }
163
164        log_verbose("locale tested %s\n", LOCALES[i]);
165
166        while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
167               U_SUCCESS(status)) {
168        }
169        if (U_FAILURE(status)) {
170            log_err("ERROR: reversing collation iterator :%s\n",
171                    myErrorName(status));
172            return;
173        }
174        ucol_reset(pitr);
175
176        ucol_setOffset(titer, u_strlen(pattern), &status);
177        if (U_FAILURE(status)) {
178            log_err("ERROR: setting offset in collator :%s\n",
179                    myErrorName(status));
180            return;
181        }
182        result[i][0] = ucol_getOffset(titer);
183        log_verbose("Text iterator set to offset %d\n", result[i][0]);
184
185        /* Use previous() */
186        ucol_previous(titer, &status);
187        result[i][1] = ucol_getOffset(titer);
188        log_verbose("Current offset %d after previous\n", result[i][1]);
189
190        /* Add one to index */
191        log_verbose("Adding one to current offset...\n");
192        ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
193        if (U_FAILURE(status)) {
194            log_err("ERROR: setting offset in collator :%s\n",
195                    myErrorName(status));
196            return;
197        }
198        result[i][2] = ucol_getOffset(titer);
199        log_verbose("Current offset in text = %d\n", result[i][2]);
200        ucol_closeElements(pitr);
201        ucol_closeElements(titer);
202        ucol_close(coll);
203    }
204
205    if (uprv_memcmp(result[0], result[1], 3) != 0 ||
206        uprv_memcmp(result[1], result[2], 3) != 0) {
207        log_err("ERROR: Different locales have different offsets at the same character\n");
208    }
209}
210
211
212
213
214/**
215 * Test for CollationElementIterator previous and next for the whole set of
216 * unicode characters.
217 */
218static void TestUnicodeChar()
219{
220    UChar source[0x100];
221    UCollator *en_us;
222    UCollationElements *iter;
223    UErrorCode status = U_ZERO_ERROR;
224    UChar codepoint;
225
226    UChar *test;
227    en_us = ucol_open("en_US", &status);
228    if (U_FAILURE(status)){
229       log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
230              myErrorName(status));
231       return;
232    }
233
234    for (codepoint = 1; codepoint < 0xFFFE;)
235    {
236      test = source;
237
238      while (codepoint % 0xFF != 0)
239      {
240        if (u_isdefined(codepoint))
241          *(test ++) = codepoint;
242        codepoint ++;
243      }
244
245      if (u_isdefined(codepoint))
246        *(test ++) = codepoint;
247
248      if (codepoint != 0xFFFF)
249        codepoint ++;
250
251      *test = 0;
252      iter=ucol_openElements(en_us, source, u_strlen(source), &status);
253      if(U_FAILURE(status)){
254          log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
255              myErrorName(status));
256          ucol_close(en_us);
257          return;
258      }
259      /* A basic test to see if it's working at all */
260      log_verbose("codepoint testing %x\n", codepoint);
261      backAndForth(iter);
262      ucol_closeElements(iter);
263
264      /* null termination test */
265      iter=ucol_openElements(en_us, source, -1, &status);
266      if(U_FAILURE(status)){
267          log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
268              myErrorName(status));
269          ucol_close(en_us);
270          return;
271      }
272      /* A basic test to see if it's working at all */
273      backAndForth(iter);
274      ucol_closeElements(iter);
275    }
276
277    ucol_close(en_us);
278}
279
280/**
281 * Test for CollationElementIterator previous and next for the whole set of
282 * unicode characters with normalization on.
283 */
284static void TestNormalizedUnicodeChar()
285{
286    UChar source[0x100];
287    UCollator *th_th;
288    UCollationElements *iter;
289    UErrorCode status = U_ZERO_ERROR;
290    UChar codepoint;
291
292    UChar *test;
293    /* thai should have normalization on */
294    th_th = ucol_open("th_TH", &status);
295    if (U_FAILURE(status)){
296        log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
297              myErrorName(status));
298        return;
299    }
300
301    for (codepoint = 1; codepoint < 0xFFFE;)
302    {
303      test = source;
304
305      while (codepoint % 0xFF != 0)
306      {
307        if (u_isdefined(codepoint))
308          *(test ++) = codepoint;
309        codepoint ++;
310      }
311
312      if (u_isdefined(codepoint))
313        *(test ++) = codepoint;
314
315      if (codepoint != 0xFFFF)
316        codepoint ++;
317
318      *test = 0;
319      iter=ucol_openElements(th_th, source, u_strlen(source), &status);
320      if(U_FAILURE(status)){
321          log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
322              myErrorName(status));
323            ucol_close(th_th);
324          return;
325      }
326
327      backAndForth(iter);
328      ucol_closeElements(iter);
329
330      iter=ucol_openElements(th_th, source, -1, &status);
331      if(U_FAILURE(status)){
332          log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
333              myErrorName(status));
334            ucol_close(th_th);
335          return;
336      }
337
338      backAndForth(iter);
339      ucol_closeElements(iter);
340    }
341
342    ucol_close(th_th);
343}
344
345/**
346* Test the incremental normalization
347*/
348static void TestNormalization()
349{
350          UErrorCode          status = U_ZERO_ERROR;
351    const char               *str    =
352                            "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
353          UCollator          *coll;
354          UChar               rule[50];
355          int                 rulelen = u_unescape(str, rule, 50);
356          int                 count = 0;
357    const char                *testdata[] =
358                        {"\\u1ED9", "o\\u0323\\u0302",
359                        "\\u0300\\u0315", "\\u0315\\u0300",
360                        "A\\u0300\\u0315B", "A\\u0315\\u0300B",
361                        "A\\u0316\\u0315B", "A\\u0315\\u0316B",
362                        "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
363                        "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
364                        "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
365    int32_t   srclen;
366    UChar source[10];
367    UCollationElements *iter;
368
369    coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
370    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
371    if (U_FAILURE(status)){
372        log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
373              myErrorName(status));
374        return;
375    }
376
377    srclen = u_unescape(testdata[0], source, 10);
378    iter = ucol_openElements(coll, source, srclen, &status);
379    backAndForth(iter);
380    ucol_closeElements(iter);
381
382    srclen = u_unescape(testdata[1], source, 10);
383    iter = ucol_openElements(coll, source, srclen, &status);
384    backAndForth(iter);
385    ucol_closeElements(iter);
386
387    while (count < 12) {
388        srclen = u_unescape(testdata[count], source, 10);
389        iter = ucol_openElements(coll, source, srclen, &status);
390
391        if (U_FAILURE(status)){
392            log_err("ERROR: in creation of collator element iterator\n %s\n",
393                  myErrorName(status));
394            return;
395        }
396        backAndForth(iter);
397        ucol_closeElements(iter);
398
399        iter = ucol_openElements(coll, source, -1, &status);
400
401        if (U_FAILURE(status)){
402            log_err("ERROR: in creation of collator element iterator\n %s\n",
403                  myErrorName(status));
404            return;
405        }
406        backAndForth(iter);
407        ucol_closeElements(iter);
408        count ++;
409    }
410    ucol_close(coll);
411}
412
413/**
414 * Test for CollationElementIterator.previous()
415 *
416 * @bug 4108758 - Make sure it works with contracting characters
417 *
418 */
419static void TestPrevious()
420{
421    UCollator *coll=NULL;
422    UChar rule[50];
423    UChar *source;
424    UCollator *c1, *c2, *c3;
425    UCollationElements *iter;
426    UErrorCode status = U_ZERO_ERROR;
427    UChar test1[50];
428    UChar test2[50];
429
430    u_uastrcpy(test1, "What subset of all possible test cases?");
431    u_uastrcpy(test2, "has the highest probability of detecting");
432    coll = ucol_open("en_US", &status);
433
434    iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
435    log_verbose("English locale testing back and forth\n");
436    if(U_FAILURE(status)){
437        log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
438            myErrorName(status));
439        ucol_close(coll);
440        return;
441    }
442    /* A basic test to see if it's working at all */
443    backAndForth(iter);
444    ucol_closeElements(iter);
445    ucol_close(coll);
446
447    /* Test with a contracting character sequence */
448    u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
449    c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
450
451    log_verbose("Contraction rule testing back and forth with no normalization\n");
452
453    if (c1 == NULL || U_FAILURE(status))
454    {
455        log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
456            myErrorName(status));
457        return;
458    }
459    source=(UChar*)malloc(sizeof(UChar) * 20);
460    u_uastrcpy(source, "abchdcba");
461    iter=ucol_openElements(c1, source, u_strlen(source), &status);
462    if(U_FAILURE(status)){
463        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
464            myErrorName(status));
465        return;
466    }
467    backAndForth(iter);
468    ucol_closeElements(iter);
469    ucol_close(c1);
470
471    /* Test with an expanding character sequence */
472    u_uastrcpy(rule, "&a < b < c/abd < d");
473    c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
474    log_verbose("Expansion rule testing back and forth with no normalization\n");
475    if (c2 == NULL || U_FAILURE(status))
476    {
477        log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
478            myErrorName(status));
479        return;
480    }
481    u_uastrcpy(source, "abcd");
482    iter=ucol_openElements(c2, source, u_strlen(source), &status);
483    if(U_FAILURE(status)){
484        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
485            myErrorName(status));
486        return;
487    }
488    backAndForth(iter);
489    ucol_closeElements(iter);
490    ucol_close(c2);
491    /* Now try both */
492    u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
493    c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,  UCOL_DEFAULT_STRENGTH,NULL, &status);
494    log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
495
496    if (c3 == NULL || U_FAILURE(status))
497    {
498        log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
499            myErrorName(status));
500        return;
501    }
502    u_uastrcpy(source, "abcdbchdc");
503    iter=ucol_openElements(c3, source, u_strlen(source), &status);
504    if(U_FAILURE(status)){
505        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
506            myErrorName(status));
507        return;
508    }
509    backAndForth(iter);
510    ucol_closeElements(iter);
511    ucol_close(c3);
512    source[0] = 0x0e41;
513    source[1] = 0x0e02;
514    source[2] = 0x0e41;
515    source[3] = 0x0e02;
516    source[4] = 0x0e27;
517    source[5] = 0x61;
518    source[6] = 0x62;
519    source[7] = 0x63;
520    source[8] = 0;
521
522    coll = ucol_open("th_TH", &status);
523    log_verbose("Thai locale testing back and forth with normalization\n");
524    iter=ucol_openElements(coll, source, u_strlen(source), &status);
525    if(U_FAILURE(status)){
526        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
527            myErrorName(status));
528        return;
529    }
530    backAndForth(iter);
531    ucol_closeElements(iter);
532    ucol_close(coll);
533
534    /* prev test */
535    source[0] = 0x0061;
536    source[1] = 0x30CF;
537    source[2] = 0x3099;
538    source[3] = 0x30FC;
539    source[4] = 0;
540
541    coll = ucol_open("ja_JP", &status);
542    log_verbose("Japanese locale testing back and forth with normalization\n");
543    iter=ucol_openElements(coll, source, u_strlen(source), &status);
544    if(U_FAILURE(status)){
545        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
546            myErrorName(status));
547        return;
548    }
549    backAndForth(iter);
550    ucol_closeElements(iter);
551    ucol_close(coll);
552
553    free(source);
554}
555
556/**
557 * Test for getOffset() and setOffset()
558 */
559static void TestOffset()
560{
561    UErrorCode status= U_ZERO_ERROR;
562    UCollator *en_us=NULL;
563    UCollationElements *iter, *pristine;
564    int32_t offset;
565    OrderAndOffset *orders;
566    int32_t orderLength=0;
567    int     count = 0;
568    UChar test1[50];
569    UChar test2[50];
570
571    u_uastrcpy(test1, "What subset of all possible test cases?");
572    u_uastrcpy(test2, "has the highest probability of detecting");
573    en_us = ucol_open("en_US", &status);
574    log_verbose("Testing getOffset and setOffset for collations\n");
575    iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
576    if(U_FAILURE(status)){
577        log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
578            myErrorName(status));
579        ucol_close(en_us);
580        return;
581    }
582
583    /* testing boundaries */
584    ucol_setOffset(iter, 0, &status);
585    if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
586        log_err("Error: After setting offset to 0, we should be at the end "
587                "of the backwards iteration");
588    }
589    ucol_setOffset(iter, u_strlen(test1), &status);
590    if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
591        log_err("Error: After setting offset to end of the string, we should "
592                "be at the end of the backwards iteration");
593    }
594
595    /* Run all the way through the iterator, then get the offset */
596
597    orders = getOrders(iter, &orderLength);
598
599    offset = ucol_getOffset(iter);
600
601    if (offset != u_strlen(test1))
602    {
603        log_err("offset at end != length %d vs %d\n", offset,
604            u_strlen(test1) );
605    }
606
607    /* Now set the offset back to the beginning and see if it works */
608    pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
609    if(U_FAILURE(status)){
610        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
611            myErrorName(status));
612    ucol_close(en_us);
613        return;
614    }
615    status = U_ZERO_ERROR;
616
617    ucol_setOffset(iter, 0, &status);
618    if (U_FAILURE(status))
619    {
620        log_err("setOffset failed. %s\n",    myErrorName(status));
621    }
622    else
623    {
624        assertEqual(iter, pristine);
625    }
626
627    ucol_closeElements(pristine);
628    ucol_closeElements(iter);
629    free(orders);
630
631    /* testing offsets in normalization buffer */
632    test1[0] = 0x61;
633    test1[1] = 0x300;
634    test1[2] = 0x316;
635    test1[3] = 0x62;
636    test1[4] = 0;
637    ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
638    iter = ucol_openElements(en_us, test1, 4, &status);
639    if(U_FAILURE(status)){
640        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
641            myErrorName(status));
642        ucol_close(en_us);
643        return;
644    }
645
646    count = 0;
647    while (ucol_next(iter, &status) != UCOL_NULLORDER &&
648        U_SUCCESS(status)) {
649        switch (count) {
650        case 0:
651            if (ucol_getOffset(iter) != 1) {
652                log_err("ERROR: Offset of iteration should be 1\n");
653            }
654            break;
655        case 3:
656            if (ucol_getOffset(iter) != 4) {
657                log_err("ERROR: Offset of iteration should be 4\n");
658            }
659            break;
660        default:
661            if (ucol_getOffset(iter) != 3) {
662                log_err("ERROR: Offset of iteration should be 3\n");
663            }
664        }
665        count ++;
666    }
667
668    ucol_reset(iter);
669    count = 0;
670    while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
671        U_SUCCESS(status)) {
672        switch (count) {
673        case 0:
674        case 1:
675            if (ucol_getOffset(iter) != 3) {
676                log_err("ERROR: Offset of iteration should be 3\n");
677            }
678            break;
679        case 2:
680            if (ucol_getOffset(iter) != 1) {
681                log_err("ERROR: Offset of iteration should be 1\n");
682            }
683            break;
684        default:
685            if (ucol_getOffset(iter) != 0) {
686                log_err("ERROR: Offset of iteration should be 0\n");
687            }
688        }
689        count ++;
690    }
691
692    if(U_FAILURE(status)){
693        log_err("ERROR: in iterating collation elements %s\n",
694            myErrorName(status));
695    }
696
697    ucol_closeElements(iter);
698    ucol_close(en_us);
699}
700
701/**
702 * Test for setText()
703 */
704static void TestSetText()
705{
706    int32_t c,i;
707    UErrorCode status = U_ZERO_ERROR;
708    UCollator *en_us=NULL;
709    UCollationElements *iter1, *iter2;
710    UChar test1[50];
711    UChar test2[50];
712
713    u_uastrcpy(test1, "What subset of all possible test cases?");
714    u_uastrcpy(test2, "has the highest probability of detecting");
715    en_us = ucol_open("en_US", &status);
716    log_verbose("testing setText for Collation elements\n");
717    iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
718    if(U_FAILURE(status)){
719        log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
720            myErrorName(status));
721    ucol_close(en_us);
722        return;
723    }
724    iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
725    if(U_FAILURE(status)){
726        log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
727            myErrorName(status));
728    ucol_close(en_us);
729        return;
730    }
731
732    /* Run through the second iterator just to exercise it */
733    c = ucol_next(iter2, &status);
734    i = 0;
735
736    while ( ++i < 10 && (c != UCOL_NULLORDER))
737    {
738        if (U_FAILURE(status))
739        {
740            log_err("iter2->next() returned an error. %s\n", myErrorName(status));
741            ucol_closeElements(iter2);
742            ucol_closeElements(iter1);
743    ucol_close(en_us);
744            return;
745        }
746
747        c = ucol_next(iter2, &status);
748    }
749
750    /* Now set it to point to the same string as the first iterator */
751    ucol_setText(iter2, test1, u_strlen(test1), &status);
752    if (U_FAILURE(status))
753    {
754        log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
755    }
756    else
757    {
758        assertEqual(iter1, iter2);
759    }
760
761    /* Now set it to point to a null string with fake length*/
762    ucol_setText(iter2, NULL, 2, &status);
763    if (U_FAILURE(status))
764    {
765        log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
766    }
767    else
768    {
769        if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
770            log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
771        }
772    }
773
774    ucol_closeElements(iter2);
775    ucol_closeElements(iter1);
776    ucol_close(en_us);
777}
778
779/** @bug 4108762
780 * Test for getMaxExpansion()
781 */
782static void TestMaxExpansion()
783{
784    UErrorCode          status = U_ZERO_ERROR;
785    UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
786    UChar               ch     = 0;
787    UChar32             unassigned = 0xEFFFD;
788    UChar               supplementary[2];
789    uint32_t            index = 0;
790    UBool               isError = FALSE;
791    uint32_t            sorder = 0;
792    UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
793    uint32_t            temporder = 0;
794
795    UChar rule[256];
796    u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
797    coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
798        UCOL_DEFAULT_STRENGTH,NULL, &status);
799    if(U_SUCCESS(status) && coll) {
800      iter = ucol_openElements(coll, &ch, 1, &status);
801
802      while (ch < 0xFFFF && U_SUCCESS(status)) {
803          int      count = 1;
804          uint32_t order;
805          int32_t  size = 0;
806
807          ch ++;
808
809          ucol_setText(iter, &ch, 1, &status);
810          order = ucol_previous(iter, &status);
811
812          /* thai management */
813          if (order == 0)
814              order = ucol_previous(iter, &status);
815
816          while (U_SUCCESS(status) &&
817              ucol_previous(iter, &status) != UCOL_NULLORDER) {
818              count ++;
819          }
820
821          size = ucol_getMaxExpansion(iter, order);
822          if (U_FAILURE(status) || size < count) {
823              log_err("Failure at codepoint %d, maximum expansion count < %d\n",
824                  ch, count);
825          }
826      }
827
828      /* testing for exact max expansion */
829      ch = 0;
830      while (ch < 0x61) {
831          uint32_t order;
832          int32_t  size;
833          ucol_setText(iter, &ch, 1, &status);
834          order = ucol_previous(iter, &status);
835          size  = ucol_getMaxExpansion(iter, order);
836          if (U_FAILURE(status) || size != 1) {
837              log_err("Failure at codepoint %d, maximum expansion count < %d\n",
838                  ch, 1);
839          }
840          ch ++;
841      }
842
843      ch = 0x63;
844      ucol_setText(iter, &ch, 1, &status);
845      temporder = ucol_previous(iter, &status);
846
847      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
848          log_err("Failure at codepoint %d, maximum expansion count != %d\n",
849                  ch, 3);
850      }
851
852      ch = 0x64;
853      ucol_setText(iter, &ch, 1, &status);
854      temporder = ucol_previous(iter, &status);
855
856      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
857          log_err("Failure at codepoint %d, maximum expansion count != %d\n",
858                  ch, 3);
859      }
860
861      U16_APPEND(supplementary, index, 2, unassigned, isError);
862      ucol_setText(iter, supplementary, 2, &status);
863      sorder = ucol_previous(iter, &status);
864
865      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
866          log_err("Failure at codepoint %d, maximum expansion count < %d\n",
867                  ch, 2);
868      }
869
870      /* testing jamo */
871      ch = 0x1165;
872
873      ucol_setText(iter, &ch, 1, &status);
874      temporder = ucol_previous(iter, &status);
875      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
876          log_err("Failure at codepoint %d, maximum expansion count > %d\n",
877                  ch, 3);
878      }
879
880      ucol_closeElements(iter);
881      ucol_close(coll);
882
883      /* testing special jamo &a<\u1160 */
884      rule[0] = 0x26;
885      rule[1] = 0x71;
886      rule[2] = 0x3c;
887      rule[3] = 0x1165;
888      rule[4] = 0x2f;
889      rule[5] = 0x71;
890      rule[6] = 0x71;
891      rule[7] = 0x71;
892      rule[8] = 0x71;
893      rule[9] = 0;
894
895      coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
896          UCOL_DEFAULT_STRENGTH,NULL, &status);
897      iter = ucol_openElements(coll, &ch, 1, &status);
898
899      temporder = ucol_previous(iter, &status);
900      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
901          log_err("Failure at codepoint %d, maximum expansion count > %d\n",
902                  ch, 5);
903      }
904
905      ucol_closeElements(iter);
906      ucol_close(coll);
907    } else {
908      log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
909    }
910
911}
912
913
914static void assertEqual(UCollationElements *i1, UCollationElements *i2)
915{
916    int32_t c1, c2;
917    int32_t count = 0;
918    UErrorCode status = U_ZERO_ERROR;
919
920    do
921    {
922        c1 = ucol_next(i1, &status);
923        c2 = ucol_next(i2, &status);
924
925        if (c1 != c2)
926        {
927            log_err("Error in iteration %d assetEqual between\n  %d  and   %d, they are not equal\n", count, c1, c2);
928            break;
929        }
930
931        count += 1;
932    }
933    while (c1 != UCOL_NULLORDER);
934}
935
936/**
937 * Testing iterators with extremely small buffers
938 */
939static void TestSmallBuffer()
940{
941    UErrorCode          status = U_ZERO_ERROR;
942    UCollator          *coll;
943    UCollationElements *testiter,
944                       *iter;
945    int32_t             count = 0;
946    OrderAndOffset     *testorders,
947                       *orders;
948
949    UChar teststr[500];
950    UChar str[] = {0x300, 0x31A, 0};
951    /*
952    creating a long string of decomposable characters,
953    since by default the writable buffer is of size 256
954    */
955    while (count < 500) {
956        if ((count & 1) == 0) {
957            teststr[count ++] = 0x300;
958        }
959        else {
960            teststr[count ++] = 0x31A;
961        }
962    }
963
964    coll = ucol_open("th_TH", &status);
965    if(U_SUCCESS(status) && coll) {
966      testiter = ucol_openElements(coll, teststr, 500, &status);
967      iter = ucol_openElements(coll, str, 2, &status);
968
969      orders     = getOrders(iter, &count);
970      if (count != 2) {
971          log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
972      }
973
974      /*
975      this will rearrange the string data to 250 characters of 0x300 first then
976      250 characters of 0x031A
977      */
978      testorders = getOrders(testiter, &count);
979
980      if (count != 500) {
981          log_err("Error decomposition does not give the right sized collation elements\n");
982      }
983
984      while (count != 0) {
985          /* UCA collation element for 0x0F76 */
986          if ((count > 250 && testorders[-- count].order != orders[1].order) ||
987              (count <= 250 && testorders[-- count].order != orders[0].order)) {
988              log_err("Error decomposition does not give the right collation element at %d count\n", count);
989              break;
990          }
991      }
992
993      free(testorders);
994      free(orders);
995
996      ucol_reset(testiter);
997      /* ensures that the writable buffer was cleared */
998      if (testiter->iteratordata_.writableBuffer !=
999          testiter->iteratordata_.stackWritableBuffer) {
1000          log_err("Error Writable buffer in collation element iterator not reset\n");
1001      }
1002
1003      /* ensures closing of elements done properly to clear writable buffer */
1004      ucol_next(testiter, &status);
1005      ucol_next(testiter, &status);
1006      ucol_closeElements(testiter);
1007      ucol_closeElements(iter);
1008      ucol_close(coll);
1009    } else {
1010      log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1011    }
1012}
1013
1014/**
1015* Sniplets of code from genuca
1016*/
1017static int32_t hex2num(char hex) {
1018    if(hex>='0' && hex <='9') {
1019        return hex-'0';
1020    } else if(hex>='a' && hex<='f') {
1021        return hex-'a'+10;
1022    } else if(hex>='A' && hex<='F') {
1023        return hex-'A'+10;
1024    } else {
1025        return 0;
1026    }
1027}
1028
1029/**
1030* Getting codepoints from a string
1031* @param str character string contain codepoints seperated by space and ended
1032*        by a semicolon
1033* @param codepoints array for storage, assuming size > 5
1034* @return position at the end of the codepoint section
1035*/
1036static char * getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
1037    char *pStartCP = str;
1038    char *pEndCP   = str + 4;
1039
1040    *codepoints = (UChar)((hex2num(*pStartCP) << 12) |
1041                          (hex2num(*(pStartCP + 1)) << 8) |
1042                          (hex2num(*(pStartCP + 2)) << 4) |
1043                          (hex2num(*(pStartCP + 3))));
1044    if (*pEndCP == '|' || *(pEndCP+1) == '|') {
1045        /* pre-context rule */
1046        pStartCP = pEndCP;
1047        while (*pStartCP==' ' || *pStartCP== '|' ) {
1048            pStartCP++;
1049        }
1050        pEndCP = pStartCP+4;
1051        *contextCPs = *codepoints;
1052        *(++codepoints) = (UChar)((hex2num(*pStartCP) << 12) |
1053                                  (hex2num(*(pStartCP + 1)) << 8) |
1054                                  (hex2num(*(pStartCP + 2)) << 4) |
1055                                  (hex2num(*(pStartCP + 3))));
1056        contextCPs++;
1057    }
1058    *contextCPs = 0;
1059    codepoints ++;
1060    while (*pEndCP != ';') {
1061        pStartCP = pEndCP + 1;
1062        *codepoints = (UChar)((hex2num(*pStartCP) << 12) |
1063                          (hex2num(*(pStartCP + 1)) << 8) |
1064                          (hex2num(*(pStartCP + 2)) << 4) |
1065                          (hex2num(*(pStartCP + 3))));
1066        codepoints ++;
1067        pEndCP = pStartCP + 4;
1068    }
1069    *codepoints = 0;
1070    return pEndCP + 1;
1071}
1072
1073/**
1074* Sniplets of code from genuca
1075*/
1076static int32_t
1077readElement(char **from, char *to, char separator, UErrorCode *status)
1078{
1079    if (U_SUCCESS(*status)) {
1080        char    buffer[1024];
1081        int32_t i = 0;
1082        while (**from != separator) {
1083            if (**from != ' ') {
1084                *(buffer+i++) = **from;
1085            }
1086            (*from)++;
1087        }
1088        (*from)++;
1089        *(buffer + i) = 0;
1090        strcpy(to, buffer);
1091        return i/2;
1092    }
1093
1094    return 0;
1095}
1096
1097/**
1098* Sniplets of code from genuca
1099*/
1100static uint32_t
1101getSingleCEValue(char *primary, char *secondary, char *tertiary,
1102                          UErrorCode *status)
1103{
1104    if (U_SUCCESS(*status)) {
1105        uint32_t  value    = 0;
1106        char      primsave = '\0';
1107        char      secsave  = '\0';
1108        char      tersave  = '\0';
1109        char     *primend  = primary+4;
1110        char     *secend   = secondary+2;
1111        char     *terend   = tertiary+2;
1112        uint32_t  primvalue;
1113        uint32_t  secvalue;
1114        uint32_t  tervalue;
1115
1116        if (uprv_strlen(primary) > 4) {
1117            primsave = *primend;
1118            *primend = '\0';
1119        }
1120
1121        if (uprv_strlen(secondary) > 2) {
1122            secsave = *secend;
1123            *secend = '\0';
1124        }
1125
1126        if (uprv_strlen(tertiary) > 2) {
1127            tersave = *terend;
1128            *terend = '\0';
1129        }
1130
1131        primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
1132        secvalue  = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
1133        tervalue  = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
1134        if(primvalue <= 0xFF) {
1135          primvalue <<= 8;
1136        }
1137
1138        value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
1139           | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
1140           | (tervalue & UCOL_TERTIARYORDERMASK);
1141
1142        if(primsave!='\0') {
1143            *primend = primsave;
1144        }
1145        if(secsave!='\0') {
1146            *secend = secsave;
1147        }
1148        if(tersave!='\0') {
1149            *terend = tersave;
1150        }
1151        return value;
1152    }
1153    return 0;
1154}
1155
1156/**
1157* Getting collation elements generated from a string
1158* @param str character string contain collation elements contained in [] and
1159*        seperated by space
1160* @param ce array for storage, assuming size > 20
1161* @param status error status
1162* @return position at the end of the codepoint section
1163*/
1164static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
1165    char       *pStartCP     = uprv_strchr(str, '[');
1166    int         count        = 0;
1167    char       *pEndCP;
1168    char        primary[100];
1169    char        secondary[100];
1170    char        tertiary[100];
1171
1172    while (*pStartCP == '[') {
1173        uint32_t primarycount   = 0;
1174        uint32_t secondarycount = 0;
1175        uint32_t tertiarycount  = 0;
1176        uint32_t CEi = 1;
1177        pEndCP = strchr(pStartCP, ']');
1178        if(pEndCP == NULL) {
1179            break;
1180        }
1181        pStartCP ++;
1182
1183        primarycount   = readElement(&pStartCP, primary, ',', status);
1184        secondarycount = readElement(&pStartCP, secondary, ',', status);
1185        tertiarycount  = readElement(&pStartCP, tertiary, ']', status);
1186
1187        /* I want to get the CEs entered right here, including continuation */
1188        ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
1189        if (U_FAILURE(*status)) {
1190            break;
1191        }
1192
1193        while (2 * CEi < primarycount || CEi < secondarycount ||
1194               CEi < tertiarycount) {
1195            uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
1196            if (2 * CEi < primarycount) {
1197                value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
1198                value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
1199            }
1200
1201            if (2 * CEi + 1 < primarycount) {
1202                value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
1203                value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
1204            }
1205
1206            if (CEi < secondarycount) {
1207                value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
1208                value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
1209            }
1210
1211            if (CEi < tertiarycount) {
1212                value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
1213                value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
1214            }
1215
1216            CEi ++;
1217            ces[count ++] = value;
1218        }
1219
1220      pStartCP = pEndCP + 1;
1221    }
1222    ces[count] = 0;
1223    return pStartCP;
1224}
1225
1226/**
1227* Getting the FractionalUCA.txt file stream
1228*/
1229static FileStream * getFractionalUCA(void)
1230{
1231    char        newPath[256];
1232    char        backupPath[256];
1233    FileStream *result = NULL;
1234
1235    /* Look inside ICU_DATA first */
1236    uprv_strcpy(newPath, ctest_dataSrcDir());
1237    uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
1238    uprv_strcat(newPath, "FractionalUCA.txt");
1239
1240    /* As a fallback, try to guess where the source data was located
1241     *   at the time ICU was built, and look there.
1242     */
1243#if defined (U_TOPSRCDIR)
1244    strcpy(backupPath, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
1245#else
1246    {
1247        UErrorCode errorCode = U_ZERO_ERROR;
1248        strcpy(backupPath, loadTestData(&errorCode));
1249        strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
1250    }
1251#endif
1252    strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
1253
1254    result = T_FileStream_open(newPath, "rb");
1255
1256    if (result == NULL) {
1257        result = T_FileStream_open(backupPath, "rb");
1258        if (result == NULL) {
1259            log_err("Failed to open either %s or %s\n", newPath, backupPath);
1260        }
1261    }
1262    return result;
1263}
1264
1265/**
1266* Testing the CEs returned by the iterator
1267*/
1268static void TestCEs() {
1269    FileStream *file = NULL;
1270    char        line[1024];
1271    char       *str;
1272    UChar       codepoints[10];
1273    uint32_t    ces[20];
1274    UErrorCode  status = U_ZERO_ERROR;
1275    UCollator          *coll = ucol_open("", &status);
1276    uint32_t lineNo = 0;
1277    UChar       contextCPs[5];
1278
1279    if (U_FAILURE(status)) {
1280        log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status));
1281        return;
1282    }
1283
1284    file = getFractionalUCA();
1285
1286    if (file == NULL) {
1287        log_err("*** unable to open input FractionalUCA.txt file ***\n");
1288        return;
1289    }
1290
1291
1292    while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1293        int                 count = 0;
1294        UCollationElements *iter;
1295        int32_t            preContextCeLen=0;
1296        lineNo++;
1297        /* skip this line if it is empty or a comment or is a return value
1298        or start of some variable section */
1299        if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1300            line[0] == 0x000D || line[0] == '[') {
1301            continue;
1302        }
1303
1304        str = getCodePoints(line, codepoints, contextCPs);
1305
1306        /* these are 'fake' codepoints in the fractional UCA, and are used just
1307         * for positioning of indirect values. They should not go through this
1308         * test.
1309         */
1310        if(*codepoints == 0xFDD0) {
1311          continue;
1312        }
1313        if (*contextCPs != 0) {
1314            iter = ucol_openElements(coll, contextCPs, -1, &status);
1315            if (U_FAILURE(status)) {
1316                log_err("Error in opening collation elements\n");
1317                break;
1318            }
1319            while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) {
1320                preContextCeLen++;
1321            }
1322            ucol_closeElements(iter);
1323        }
1324
1325        getCEs(str, ces+preContextCeLen, &status);
1326        if (U_FAILURE(status)) {
1327            log_err("Error in parsing collation elements in FractionalUCA.txt\n");
1328            break;
1329        }
1330        iter = ucol_openElements(coll, codepoints, -1, &status);
1331        if (U_FAILURE(status)) {
1332            log_err("Error in opening collation elements\n");
1333            break;
1334        }
1335        for (;;) {
1336            uint32_t ce = (uint32_t)ucol_next(iter, &status);
1337            if (ce == 0xFFFFFFFF) {
1338                ce = 0;
1339            }
1340            /* we now unconditionally reorder Thai/Lao prevowels, so this
1341             * test would fail if we don't skip here.
1342             */
1343            if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
1344              continue;
1345            }
1346            if (ce != ces[count] || U_FAILURE(status)) {
1347                log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
1348                break;
1349            }
1350            if (ces[count] == 0) {
1351                break;
1352            }
1353            count ++;
1354        }
1355        ucol_closeElements(iter);
1356    }
1357
1358    T_FileStream_close(file);
1359    ucol_close(coll);
1360}
1361
1362/**
1363* Testing the discontigous contractions
1364*/
1365static void TestDiscontiguos() {
1366    const char               *rulestr    =
1367                            "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1368          UChar               rule[50];
1369          int                 rulelen = u_unescape(rulestr, rule, 50);
1370    const char               *src[] = {
1371     "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1372    /* base character blocked */
1373     "XD\\u0300", "XD\\u0300\\u0315",
1374    /* non blocking combining character */
1375     "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1376     /* blocking combining character */
1377     "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1378     /* contraction prefix */
1379     "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1380     "X\\u0300\\u031A\\u0315",
1381     /* ends not with a contraction character */
1382     "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1383     "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1384    };
1385    const char               *tgt[] = {
1386     /* non blocking combining character */
1387     "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1388    /* base character blocked */
1389     "X D \\u0300", "X D \\u0300\\u0315",
1390    /* non blocking combining character */
1391     "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1392     /* blocking combining character */
1393     "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1394     /* contraction prefix */
1395     "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1396     "X\\u0300 \\u031A \\u0315",
1397     /* ends not with a contraction character */
1398     "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1399     "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1400    };
1401          int                 size   = 20;
1402          UCollator          *coll;
1403          UErrorCode          status    = U_ZERO_ERROR;
1404          int                 count     = 0;
1405          UCollationElements *iter;
1406          UCollationElements *resultiter;
1407
1408    coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1409    iter       = ucol_openElements(coll, rule, 1, &status);
1410    resultiter = ucol_openElements(coll, rule, 1, &status);
1411
1412    if (U_FAILURE(status)) {
1413        log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
1414        return;
1415    }
1416
1417    while (count < size) {
1418        UChar  str[20];
1419        UChar  tstr[20];
1420        int    strLen = u_unescape(src[count], str, 20);
1421        UChar *s;
1422
1423        ucol_setText(iter, str, strLen, &status);
1424        if (U_FAILURE(status)) {
1425            log_err("Error opening collation iterator\n");
1426            return;
1427        }
1428
1429        u_unescape(tgt[count], tstr, 20);
1430        s = tstr;
1431
1432        log_verbose("count %d\n", count);
1433
1434        for (;;) {
1435            uint32_t  ce;
1436            UChar    *e = u_strchr(s, 0x20);
1437            if (e == 0) {
1438                e = u_strchr(s, 0);
1439            }
1440            ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1441            ce = ucol_next(resultiter, &status);
1442            if (U_FAILURE(status)) {
1443                log_err("Error manipulating collation iterator\n");
1444                return;
1445            }
1446            while (ce != UCOL_NULLORDER) {
1447                if (ce != (uint32_t)ucol_next(iter, &status) ||
1448                    U_FAILURE(status)) {
1449                    log_err("Discontiguos contraction test mismatch\n");
1450                    return;
1451                }
1452                ce = ucol_next(resultiter, &status);
1453                if (U_FAILURE(status)) {
1454                    log_err("Error getting next collation element\n");
1455                    return;
1456                }
1457            }
1458            s = e + 1;
1459            if (*e == 0) {
1460                break;
1461            }
1462        }
1463        ucol_reset(iter);
1464        backAndForth(iter);
1465        count ++;
1466    }
1467    ucol_closeElements(resultiter);
1468    ucol_closeElements(iter);
1469    ucol_close(coll);
1470}
1471
1472static void TestCEBufferOverflow()
1473{
1474    UChar               str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
1475    UErrorCode          status = U_ZERO_ERROR;
1476    UChar               rule[10];
1477    UCollator          *coll;
1478    UCollationElements *iter;
1479
1480    u_uastrcpy(rule, "&z < AB");
1481    coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
1482    if (U_FAILURE(status)) {
1483        log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status));
1484        return;
1485    }
1486
1487    /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
1488    test. this will cause an overflow in getPrev */
1489    str[0] = 0x0041;    /* 'A' */
1490    /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
1491    uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
1492    str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042;   /* 'B' */
1493    iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
1494                             &status);
1495    if (ucol_previous(iter, &status) == UCOL_NULLORDER ||
1496        status == U_BUFFER_OVERFLOW_ERROR) {
1497        log_err("CE buffer should not overflow with long string of trail surrogates\n");
1498    }
1499    ucol_closeElements(iter);
1500    ucol_close(coll);
1501}
1502
1503/**
1504* Byte bounds checks. Checks if each byte in data is between upper and lower
1505* inclusive.
1506*/
1507static UBool checkByteBounds(uint32_t data, char upper, char lower)
1508{
1509    int count = 4;
1510    while (count > 0) {
1511        char b = (char)(data & 0xFF);
1512        if (b > upper || b < lower) {
1513            return FALSE;
1514        }
1515        data = data >> 8;
1516        count --;
1517    }
1518    return TRUE;
1519}
1520
1521/**
1522* Determines case of the string of codepoints.
1523* If it is a multiple codepoints it has to treated as a contraction.
1524*/
1525#if 0
1526static uint8_t getCase(const UChar *s, uint32_t len) {
1527    UBool       lower = FALSE;
1528    UBool       upper = FALSE;
1529    UBool       title = FALSE;
1530    UErrorCode  status = U_ZERO_ERROR;
1531    UChar       str[256];
1532    const UChar      *ps = s;
1533
1534    if (len == 0) {
1535        return UCOL_LOWER_CASE;
1536    }
1537
1538    while (len > 0) {
1539        UChar c = *ps ++;
1540
1541        if (u_islower(c)) {
1542            lower = TRUE;
1543        }
1544        if (u_isupper(c)) {
1545            upper = TRUE;
1546        }
1547        if (u_istitle(c)) {
1548            title = TRUE;
1549        }
1550
1551        len --;
1552    }
1553    if ((lower && !upper && !title) || (!lower && !upper && !title)){
1554        return UCOL_LOWER_CASE;
1555    }
1556    if (upper && !lower && !title) {
1557        return UCOL_UPPER_CASE;
1558    }
1559    /* mix of cases here */
1560    /* len = unorm_normalize(s, len, UNORM_NFKD, 0, str, 256, &status);
1561    if (U_FAILURE(status)) {
1562        log_err("Error normalizing data string\n");
1563        return UCOL_LOWER_CASE;
1564    }*/
1565
1566    if ((title && len >= 2) || (lower && upper)) {
1567        return UCOL_MIXED_CASE;
1568    }
1569    if (u_isupper(s[0])) {
1570        return UCOL_UPPER_CASE;
1571    }
1572    return UCOL_LOWER_CASE;
1573}
1574#endif
1575
1576/**
1577* Checking collation element validity given the boundary arguments.
1578*/
1579static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
1580                             int length, uint32_t primarymax,
1581                             uint32_t secondarymax)
1582{
1583    UErrorCode          status = U_ZERO_ERROR;
1584    UCollationElements *iter   = ucol_openElements(coll, codepoints, length,
1585                                                  &status);
1586    uint32_t            ce;
1587    UBool               first  = TRUE;
1588/*
1589    UBool               upper  = FALSE;
1590    UBool               lower  = FALSE;
1591*/
1592
1593    if (U_FAILURE(status)) {
1594        log_err("Error creating iterator for testing validity\n");
1595    }
1596
1597    ce = ucol_next(iter, &status);
1598
1599    while (ce != UCOL_NULLORDER) {
1600       if (ce != 0) {
1601           uint32_t primary   = UCOL_PRIMARYORDER(ce);
1602           uint32_t secondary = UCOL_SECONDARYORDER(ce);
1603           uint32_t tertiary  = UCOL_TERTIARYORDER(ce);
1604/*           uint32_t scasebits = tertiary & 0xC0;*/
1605
1606           if ((tertiary == 0 && secondary != 0) ||
1607               (tertiary < 0xC0 && secondary == 0 && primary != 0)) {
1608               /* n-1th level is not zero when the nth level is
1609                  except for continuations, this is wrong */
1610               log_err("Lower level weight not 0 when high level weight is 0\n");
1611               goto fail;
1612           }
1613           else {
1614               /* checks if any byte is illegal ie = 01 02 03. */
1615               if (checkByteBounds(ce, 0x3, 0x1)) {
1616                   log_err("Byte range in CE lies in illegal bounds 0x1 - 0x3\n");
1617                   goto fail;
1618               }
1619           }
1620           if ((primary != 0 && primary < primarymax)
1621               || ((primary & 0xFF) == 0xFF) || (((primary>>8) & 0xFF) == 0xFF)
1622               || ((primary & 0xFF) && ((primary & 0xFF) <= 0x03))
1623               || (((primary>>8) & 0xFF) && ((primary>>8) & 0xFF) <= 0x03)
1624               || (primary >= 0xFE00 && !isContinuation(ce))) {
1625               log_err("UCA primary weight out of bounds: %04X for string starting with %04X\n",
1626                   primary, codepoints[0]);
1627               goto fail;
1628           }
1629           /* case matching not done since data generated by ken */
1630           if (first) {
1631               if (secondary >= 6 && secondary <= secondarymax) {
1632                   log_err("Secondary weight out of range\n");
1633                   goto fail;
1634               }
1635               first = FALSE;
1636           }
1637       }
1638       ce   = ucol_next(iter, &status);
1639   }
1640   ucol_closeElements(iter);
1641   return TRUE;
1642fail :
1643   ucol_closeElements(iter);
1644   return FALSE;
1645}
1646
1647static void TestCEValidity()
1648{
1649    /* testing UCA collation elements */
1650    UErrorCode  status      = U_ZERO_ERROR;
1651    /* en_US has no tailorings */
1652    UCollator  *coll        = ucol_open("root", &status);
1653    /* tailored locales */
1654    char        locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
1655    const char *loc;
1656    FileStream *file = NULL;
1657    char        line[1024];
1658    UChar       codepoints[10];
1659    int         count = 0;
1660    int         maxCount = 0;
1661    UChar       contextCPs[3];
1662    UParseError parseError;
1663    if (U_FAILURE(status)) {
1664        log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
1665        return;
1666    }
1667    log_verbose("Testing UCA elements\n");
1668    file = getFractionalUCA();
1669    if (file == NULL) {
1670        log_err("Fractional UCA data can not be opened\n");
1671        return;
1672    }
1673
1674    while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1675        if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1676            line[0] == 0x000D || line[0] == '[') {
1677            continue;
1678        }
1679
1680        getCodePoints(line, codepoints, contextCPs);
1681        checkCEValidity(coll, codepoints, u_strlen(codepoints), 5, 86);
1682    }
1683
1684    log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1685    codepoints[0] = 0;
1686    while (codepoints[0] < 0xFFFF) {
1687        if (u_isdefined((UChar32)codepoints[0])) {
1688            checkCEValidity(coll, codepoints, 1, 5, 86);
1689        }
1690        codepoints[0] ++;
1691    }
1692
1693    ucol_close(coll);
1694
1695    /* testing tailored collation elements */
1696    log_verbose("Testing tailored elements\n");
1697    if(QUICK) {
1698        maxCount = sizeof(locale)/sizeof(locale[0]);
1699    } else {
1700        maxCount = uloc_countAvailable();
1701    }
1702    while (count < maxCount) {
1703        const UChar *rules = NULL,
1704                    *current = NULL;
1705        UChar *rulesCopy = NULL;
1706        int32_t ruleLen = 0;
1707
1708        uint32_t chOffset = 0;
1709        uint32_t chLen = 0;
1710        uint32_t exOffset = 0;
1711        uint32_t exLen = 0;
1712        uint32_t prefixOffset = 0;
1713        uint32_t prefixLen = 0;
1714        UBool    startOfRules = TRUE;
1715        UColOptionSet opts;
1716
1717        UColTokenParser src;
1718        uint32_t strength = 0;
1719        uint16_t specs = 0;
1720        if(QUICK) {
1721            loc = locale[count];
1722        } else {
1723            loc = uloc_getAvailable(count);
1724            if(!hasCollationElements(loc)) {
1725                count++;
1726                continue;
1727            }
1728        }
1729
1730        log_verbose("Testing CEs for %s\n", loc);
1731
1732        coll      = ucol_open(loc, &status);
1733        if (U_FAILURE(status)) {
1734            log_err("%s collator creation failed\n", loc);
1735            return;
1736        }
1737
1738        src.opts = &opts;
1739        rules = ucol_getRules(coll, &ruleLen);
1740
1741        if (ruleLen > 0) {
1742            rulesCopy = (UChar *)malloc((ruleLen +
1743                UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1744            uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1745            src.current = src.source = rulesCopy;
1746            src.end = rulesCopy + ruleLen;
1747            src.extraCurrent = src.end;
1748            src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1749
1750            while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
1751              strength = src.parsedToken.strength;
1752              chOffset = src.parsedToken.charsOffset;
1753              chLen = src.parsedToken.charsLen;
1754              exOffset = src.parsedToken.extensionOffset;
1755              exLen = src.parsedToken.extensionLen;
1756              prefixOffset = src.parsedToken.prefixOffset;
1757              prefixLen = src.parsedToken.prefixLen;
1758              specs = src.parsedToken.flags;
1759
1760                startOfRules = FALSE;
1761                uprv_memcpy(codepoints, src.source + chOffset,
1762                                                       chLen * sizeof(UChar));
1763                codepoints[chLen] = 0;
1764                checkCEValidity(coll, codepoints, chLen, 4, 85);
1765            }
1766            free(rulesCopy);
1767        }
1768
1769        ucol_close(coll);
1770        count ++;
1771    }
1772    T_FileStream_close(file);
1773}
1774
1775static void printSortKeyError(const UChar   *codepoints, int length,
1776                                    uint8_t *sortkey, int sklen)
1777{
1778    int count = 0;
1779    log_err("Sortkey not valid for ");
1780    while (length > 0) {
1781        log_err("0x%04x ", *codepoints);
1782        length --;
1783        codepoints ++;
1784    }
1785    log_err("\nSortkey : ");
1786    while (count < sklen) {
1787        log_err("0x%02x ", sortkey[count]);
1788        count ++;
1789    }
1790    log_err("\n");
1791}
1792
1793/**
1794* Checking sort key validity for all levels
1795*/
1796static UBool checkSortKeyValidity(UCollator *coll,
1797                                  const UChar *codepoints,
1798                                  int length)
1799{
1800    UErrorCode status  = U_ZERO_ERROR;
1801    UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
1802                                      UCOL_TERTIARY, UCOL_QUATERNARY,
1803                                      UCOL_IDENTICAL};
1804    int        strengthlen = 5;
1805    int        index       = 0;
1806    int        caselevel   = 0;
1807
1808    while (caselevel < 1) {
1809        if (caselevel == 0) {
1810            ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
1811        }
1812        else {
1813            ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
1814        }
1815
1816        while (index < strengthlen) {
1817            int        count01 = 0;
1818            uint32_t   count   = 0;
1819            uint8_t    sortkey[128];
1820            uint32_t   sklen;
1821
1822            ucol_setStrength(coll, strength[index]);
1823            sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
1824            while (sortkey[count] != 0) {
1825                if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && index != 4)) {
1826                    printSortKeyError(codepoints, length, sortkey, sklen);
1827                    return FALSE;
1828                }
1829                if (sortkey[count] == 1) {
1830                    count01 ++;
1831                }
1832                count ++;
1833            }
1834
1835            if (count + 1 != sklen || (count01 != index + caselevel)) {
1836                printSortKeyError(codepoints, length, sortkey, sklen);
1837                return FALSE;
1838            }
1839            index ++;
1840        }
1841        caselevel ++;
1842    }
1843    return TRUE;
1844}
1845
1846static void TestSortKeyValidity(void)
1847{
1848    /* testing UCA collation elements */
1849    UErrorCode  status      = U_ZERO_ERROR;
1850    /* en_US has no tailorings */
1851    UCollator  *coll        = ucol_open("en_US", &status);
1852    /* tailored locales */
1853    char        locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
1854    FileStream *file = NULL;
1855    char        line[1024];
1856    UChar       codepoints[10];
1857    int         count = 0;
1858    UChar       contextCPs[5];
1859    UParseError parseError;
1860    if (U_FAILURE(status)) {
1861        log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
1862        return;
1863    }
1864    log_verbose("Testing UCA elements\n");
1865    file = getFractionalUCA();
1866    if (file == NULL) {
1867        log_err("Fractional UCA data can not be opened\n");
1868        return;
1869    }
1870
1871    while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1872        if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1873            line[0] == 0x000D || line[0] == '[') {
1874            continue;
1875        }
1876
1877        getCodePoints(line, codepoints, contextCPs);
1878        checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
1879    }
1880
1881    log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1882    codepoints[0] = 0;
1883
1884    while (codepoints[0] < 0xFFFF) {
1885        if (u_isdefined((UChar32)codepoints[0])) {
1886            checkSortKeyValidity(coll, codepoints, 1);
1887        }
1888        codepoints[0] ++;
1889    }
1890
1891    ucol_close(coll);
1892
1893    /* testing tailored collation elements */
1894    log_verbose("Testing tailored elements\n");
1895    while (count < 5) {
1896        const UChar *rules = NULL,
1897                    *current = NULL;
1898        UChar *rulesCopy = NULL;
1899        int32_t ruleLen = 0;
1900
1901        uint32_t chOffset = 0;
1902        uint32_t chLen = 0;
1903        uint32_t exOffset = 0;
1904        uint32_t exLen = 0;
1905        uint32_t prefixOffset = 0;
1906        uint32_t prefixLen = 0;
1907        UBool    startOfRules = TRUE;
1908        UColOptionSet opts;
1909
1910        UColTokenParser src;
1911        uint32_t strength = 0;
1912        uint16_t specs = 0;
1913
1914        coll      = ucol_open(locale[count], &status);
1915        if (U_FAILURE(status)) {
1916            log_err("%s collator creation failed\n", locale[count]);
1917            return;
1918        }
1919
1920        src.opts = &opts;
1921        rules = ucol_getRules(coll, &ruleLen);
1922
1923        if (ruleLen > 0) {
1924            rulesCopy = (UChar *)malloc((ruleLen +
1925                UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1926            uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1927            src.current = src.source = rulesCopy;
1928            src.end = rulesCopy + ruleLen;
1929            src.extraCurrent = src.end;
1930            src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1931
1932            while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL) {
1933                strength = src.parsedToken.strength;
1934                chOffset = src.parsedToken.charsOffset;
1935                chLen = src.parsedToken.charsLen;
1936                exOffset = src.parsedToken.extensionOffset;
1937                exLen = src.parsedToken.extensionLen;
1938                prefixOffset = src.parsedToken.prefixOffset;
1939                prefixLen = src.parsedToken.prefixLen;
1940                specs = src.parsedToken.flags;
1941
1942                startOfRules = FALSE;
1943                uprv_memcpy(codepoints, src.source + chOffset,
1944                                                       chLen * sizeof(UChar));
1945                codepoints[chLen] = 0;
1946                checkSortKeyValidity(coll, codepoints, chLen);
1947            }
1948            free(rulesCopy);
1949        }
1950
1951        ucol_close(coll);
1952        count ++;
1953    }
1954    T_FileStream_close(file);
1955}
1956
1957#endif /* #if !UCONFIG_NO_COLLATION */
1958