1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File CITERTST.C
9*
10* Modification History:
11* Date      Name               Description
12*           Madhu Katragadda   Ported for C API
13* 02/19/01  synwee             Modified test case for new collation iterator
14*********************************************************************************/
15/*
16 * Collation Iterator tests.
17 * (Let me reiterate my position...)
18 */
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_COLLATION
23
24#include "unicode/ucol.h"
25#include "unicode/ucoleitr.h"
26#include "unicode/uloc.h"
27#include "unicode/uchar.h"
28#include "unicode/ustring.h"
29#include "unicode/putil.h"
30#include "callcoll.h"
31#include "cmemory.h"
32#include "cintltst.h"
33#include "citertst.h"
34#include "ccolltst.h"
35#include "filestrm.h"
36#include "cstring.h"
37#include "ucol_imp.h"
38#include "uparse.h"
39#include <stdio.h>
40
41extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
42
43void addCollIterTest(TestNode** root)
44{
45    addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
46    addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
47    addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
48    addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
49    addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
50    addTest(root, &TestNormalizedUnicodeChar,
51                                "tscoll/citertst/TestNormalizedUnicodeChar");
52    addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
53    addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
54    addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
55    addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
56    addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
57    addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
58}
59
60/* The locales we support */
61
62static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
63
64static void TestBug672() {
65    UErrorCode  status = U_ZERO_ERROR;
66    UChar       pattern[20];
67    UChar       text[50];
68    int         i;
69    int         result[3][3];
70
71    u_uastrcpy(pattern, "resume");
72    u_uastrcpy(text, "Time to resume updating my resume.");
73
74    for (i = 0; i < 3; ++ i) {
75        UCollator          *coll = ucol_open(LOCALES[i], &status);
76        UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
77                                                     &status);
78        UCollationElements *titer = ucol_openElements(coll, text, -1,
79                                                     &status);
80        if (U_FAILURE(status)) {
81            log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
82                    myErrorName(status));
83            return;
84        }
85
86        log_verbose("locale tested %s\n", LOCALES[i]);
87
88        while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
89               U_SUCCESS(status)) {
90        }
91        if (U_FAILURE(status)) {
92            log_err("ERROR: reversing collation iterator :%s\n",
93                    myErrorName(status));
94            return;
95        }
96        ucol_reset(pitr);
97
98        ucol_setOffset(titer, u_strlen(pattern), &status);
99        if (U_FAILURE(status)) {
100            log_err("ERROR: setting offset in collator :%s\n",
101                    myErrorName(status));
102            return;
103        }
104        result[i][0] = ucol_getOffset(titer);
105        log_verbose("Text iterator set to offset %d\n", result[i][0]);
106
107        /* Use previous() */
108        ucol_previous(titer, &status);
109        result[i][1] = ucol_getOffset(titer);
110        log_verbose("Current offset %d after previous\n", result[i][1]);
111
112        /* Add one to index */
113        log_verbose("Adding one to current offset...\n");
114        ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
115        if (U_FAILURE(status)) {
116            log_err("ERROR: setting offset in collator :%s\n",
117                    myErrorName(status));
118            return;
119        }
120        result[i][2] = ucol_getOffset(titer);
121        log_verbose("Current offset in text = %d\n", result[i][2]);
122        ucol_closeElements(pitr);
123        ucol_closeElements(titer);
124        ucol_close(coll);
125    }
126
127    if (uprv_memcmp(result[0], result[1], 3) != 0 ||
128        uprv_memcmp(result[1], result[2], 3) != 0) {
129        log_err("ERROR: Different locales have different offsets at the same character\n");
130    }
131}
132
133
134
135/*  Running this test with normalization enabled showed up a bug in the incremental
136    normalization code. */
137static void TestBug672Normalize() {
138    UErrorCode  status = U_ZERO_ERROR;
139    UChar       pattern[20];
140    UChar       text[50];
141    int         i;
142    int         result[3][3];
143
144    u_uastrcpy(pattern, "resume");
145    u_uastrcpy(text, "Time to resume updating my resume.");
146
147    for (i = 0; i < 3; ++ i) {
148        UCollator          *coll = ucol_open(LOCALES[i], &status);
149        UCollationElements *pitr = NULL;
150        UCollationElements *titer = NULL;
151
152        ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
153
154        pitr = ucol_openElements(coll, pattern, -1, &status);
155        titer = ucol_openElements(coll, text, -1, &status);
156        if (U_FAILURE(status)) {
157            log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
158                    myErrorName(status));
159            return;
160        }
161
162        log_verbose("locale tested %s\n", LOCALES[i]);
163
164        while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
165               U_SUCCESS(status)) {
166        }
167        if (U_FAILURE(status)) {
168            log_err("ERROR: reversing collation iterator :%s\n",
169                    myErrorName(status));
170            return;
171        }
172        ucol_reset(pitr);
173
174        ucol_setOffset(titer, u_strlen(pattern), &status);
175        if (U_FAILURE(status)) {
176            log_err("ERROR: setting offset in collator :%s\n",
177                    myErrorName(status));
178            return;
179        }
180        result[i][0] = ucol_getOffset(titer);
181        log_verbose("Text iterator set to offset %d\n", result[i][0]);
182
183        /* Use previous() */
184        ucol_previous(titer, &status);
185        result[i][1] = ucol_getOffset(titer);
186        log_verbose("Current offset %d after previous\n", result[i][1]);
187
188        /* Add one to index */
189        log_verbose("Adding one to current offset...\n");
190        ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
191        if (U_FAILURE(status)) {
192            log_err("ERROR: setting offset in collator :%s\n",
193                    myErrorName(status));
194            return;
195        }
196        result[i][2] = ucol_getOffset(titer);
197        log_verbose("Current offset in text = %d\n", result[i][2]);
198        ucol_closeElements(pitr);
199        ucol_closeElements(titer);
200        ucol_close(coll);
201    }
202
203    if (uprv_memcmp(result[0], result[1], 3) != 0 ||
204        uprv_memcmp(result[1], result[2], 3) != 0) {
205        log_err("ERROR: Different locales have different offsets at the same character\n");
206    }
207}
208
209
210
211
212/**
213 * Test for CollationElementIterator previous and next for the whole set of
214 * unicode characters.
215 */
216static void TestUnicodeChar()
217{
218    UChar source[0x100];
219    UCollator *en_us;
220    UCollationElements *iter;
221    UErrorCode status = U_ZERO_ERROR;
222    UChar codepoint;
223
224    UChar *test;
225    en_us = ucol_open("en_US", &status);
226    if (U_FAILURE(status)){
227       log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
228              myErrorName(status));
229       return;
230    }
231
232    for (codepoint = 1; codepoint < 0xFFFE;)
233    {
234      test = source;
235
236      while (codepoint % 0xFF != 0)
237      {
238        if (u_isdefined(codepoint))
239          *(test ++) = codepoint;
240        codepoint ++;
241      }
242
243      if (u_isdefined(codepoint))
244        *(test ++) = codepoint;
245
246      if (codepoint != 0xFFFF)
247        codepoint ++;
248
249      *test = 0;
250      iter=ucol_openElements(en_us, source, u_strlen(source), &status);
251      if(U_FAILURE(status)){
252          log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
253              myErrorName(status));
254          ucol_close(en_us);
255          return;
256      }
257      /* A basic test to see if it's working at all */
258      log_verbose("codepoint testing %x\n", codepoint);
259      backAndForth(iter);
260      ucol_closeElements(iter);
261
262      /* null termination test */
263      iter=ucol_openElements(en_us, source, -1, &status);
264      if(U_FAILURE(status)){
265          log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
266              myErrorName(status));
267          ucol_close(en_us);
268          return;
269      }
270      /* A basic test to see if it's working at all */
271      backAndForth(iter);
272      ucol_closeElements(iter);
273    }
274
275    ucol_close(en_us);
276}
277
278/**
279 * Test for CollationElementIterator previous and next for the whole set of
280 * unicode characters with normalization on.
281 */
282static void TestNormalizedUnicodeChar()
283{
284    UChar source[0x100];
285    UCollator *th_th;
286    UCollationElements *iter;
287    UErrorCode status = U_ZERO_ERROR;
288    UChar codepoint;
289
290    UChar *test;
291    /* thai should have normalization on */
292    th_th = ucol_open("th_TH", &status);
293    if (U_FAILURE(status)){
294        log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
295              myErrorName(status));
296        return;
297    }
298
299    for (codepoint = 1; codepoint < 0xFFFE;)
300    {
301      test = source;
302
303      while (codepoint % 0xFF != 0)
304      {
305        if (u_isdefined(codepoint))
306          *(test ++) = codepoint;
307        codepoint ++;
308      }
309
310      if (u_isdefined(codepoint))
311        *(test ++) = codepoint;
312
313      if (codepoint != 0xFFFF)
314        codepoint ++;
315
316      *test = 0;
317      iter=ucol_openElements(th_th, source, u_strlen(source), &status);
318      if(U_FAILURE(status)){
319          log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
320              myErrorName(status));
321            ucol_close(th_th);
322          return;
323      }
324
325      backAndForth(iter);
326      ucol_closeElements(iter);
327
328      iter=ucol_openElements(th_th, source, -1, &status);
329      if(U_FAILURE(status)){
330          log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
331              myErrorName(status));
332            ucol_close(th_th);
333          return;
334      }
335
336      backAndForth(iter);
337      ucol_closeElements(iter);
338    }
339
340    ucol_close(th_th);
341}
342
343/**
344* Test the incremental normalization
345*/
346static void TestNormalization()
347{
348          UErrorCode          status = U_ZERO_ERROR;
349    const char               *str    =
350                            "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
351          UCollator          *coll;
352          UChar               rule[50];
353          int                 rulelen = u_unescape(str, rule, 50);
354          int                 count = 0;
355    const char                *testdata[] =
356                        {"\\u1ED9", "o\\u0323\\u0302",
357                        "\\u0300\\u0315", "\\u0315\\u0300",
358                        "A\\u0300\\u0315B", "A\\u0315\\u0300B",
359                        "A\\u0316\\u0315B", "A\\u0315\\u0316B",
360                        "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
361                        "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
362                        "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
363    int32_t   srclen;
364    UChar source[10];
365    UCollationElements *iter;
366
367    coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
368    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
369    if (U_FAILURE(status)){
370        log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
371              myErrorName(status));
372        return;
373    }
374
375    srclen = u_unescape(testdata[0], source, 10);
376    iter = ucol_openElements(coll, source, srclen, &status);
377    backAndForth(iter);
378    ucol_closeElements(iter);
379
380    srclen = u_unescape(testdata[1], source, 10);
381    iter = ucol_openElements(coll, source, srclen, &status);
382    backAndForth(iter);
383    ucol_closeElements(iter);
384
385    while (count < 12) {
386        srclen = u_unescape(testdata[count], source, 10);
387        iter = ucol_openElements(coll, source, srclen, &status);
388
389        if (U_FAILURE(status)){
390            log_err("ERROR: in creation of collator element iterator\n %s\n",
391                  myErrorName(status));
392            return;
393        }
394        backAndForth(iter);
395        ucol_closeElements(iter);
396
397        iter = ucol_openElements(coll, source, -1, &status);
398
399        if (U_FAILURE(status)){
400            log_err("ERROR: in creation of collator element iterator\n %s\n",
401                  myErrorName(status));
402            return;
403        }
404        backAndForth(iter);
405        ucol_closeElements(iter);
406        count ++;
407    }
408    ucol_close(coll);
409}
410
411/**
412 * Test for CollationElementIterator.previous()
413 *
414 * @bug 4108758 - Make sure it works with contracting characters
415 *
416 */
417static void TestPrevious()
418{
419    UCollator *coll=NULL;
420    UChar rule[50];
421    UChar *source;
422    UCollator *c1, *c2, *c3;
423    UCollationElements *iter;
424    UErrorCode status = U_ZERO_ERROR;
425    UChar test1[50];
426    UChar test2[50];
427
428    u_uastrcpy(test1, "What subset of all possible test cases?");
429    u_uastrcpy(test2, "has the highest probability of detecting");
430    coll = ucol_open("en_US", &status);
431
432    iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
433    log_verbose("English locale testing back and forth\n");
434    if(U_FAILURE(status)){
435        log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
436            myErrorName(status));
437        ucol_close(coll);
438        return;
439    }
440    /* A basic test to see if it's working at all */
441    backAndForth(iter);
442    ucol_closeElements(iter);
443    ucol_close(coll);
444
445    /* Test with a contracting character sequence */
446    u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
447    c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
448
449    log_verbose("Contraction rule testing back and forth with no normalization\n");
450
451    if (c1 == NULL || U_FAILURE(status))
452    {
453        log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
454            myErrorName(status));
455        return;
456    }
457    source=(UChar*)malloc(sizeof(UChar) * 20);
458    u_uastrcpy(source, "abchdcba");
459    iter=ucol_openElements(c1, source, u_strlen(source), &status);
460    if(U_FAILURE(status)){
461        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
462            myErrorName(status));
463        return;
464    }
465    backAndForth(iter);
466    ucol_closeElements(iter);
467    ucol_close(c1);
468
469    /* Test with an expanding character sequence */
470    u_uastrcpy(rule, "&a < b < c/abd < d");
471    c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
472    log_verbose("Expansion rule testing back and forth with no normalization\n");
473    if (c2 == NULL || U_FAILURE(status))
474    {
475        log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
476            myErrorName(status));
477        return;
478    }
479    u_uastrcpy(source, "abcd");
480    iter=ucol_openElements(c2, source, u_strlen(source), &status);
481    if(U_FAILURE(status)){
482        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
483            myErrorName(status));
484        return;
485    }
486    backAndForth(iter);
487    ucol_closeElements(iter);
488    ucol_close(c2);
489    /* Now try both */
490    u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
491    c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,  UCOL_DEFAULT_STRENGTH,NULL, &status);
492    log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
493
494    if (c3 == NULL || U_FAILURE(status))
495    {
496        log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
497            myErrorName(status));
498        return;
499    }
500    u_uastrcpy(source, "abcdbchdc");
501    iter=ucol_openElements(c3, source, u_strlen(source), &status);
502    if(U_FAILURE(status)){
503        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
504            myErrorName(status));
505        return;
506    }
507    backAndForth(iter);
508    ucol_closeElements(iter);
509    ucol_close(c3);
510    source[0] = 0x0e41;
511    source[1] = 0x0e02;
512    source[2] = 0x0e41;
513    source[3] = 0x0e02;
514    source[4] = 0x0e27;
515    source[5] = 0x61;
516    source[6] = 0x62;
517    source[7] = 0x63;
518    source[8] = 0;
519
520    coll = ucol_open("th_TH", &status);
521    log_verbose("Thai locale testing back and forth with normalization\n");
522    iter=ucol_openElements(coll, source, u_strlen(source), &status);
523    if(U_FAILURE(status)){
524        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
525            myErrorName(status));
526        return;
527    }
528    backAndForth(iter);
529    ucol_closeElements(iter);
530    ucol_close(coll);
531
532    /* prev test */
533    source[0] = 0x0061;
534    source[1] = 0x30CF;
535    source[2] = 0x3099;
536    source[3] = 0x30FC;
537    source[4] = 0;
538
539    coll = ucol_open("ja_JP", &status);
540    log_verbose("Japanese locale testing back and forth with normalization\n");
541    iter=ucol_openElements(coll, source, u_strlen(source), &status);
542    if(U_FAILURE(status)){
543        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
544            myErrorName(status));
545        return;
546    }
547    backAndForth(iter);
548    ucol_closeElements(iter);
549    ucol_close(coll);
550
551    free(source);
552}
553
554/**
555 * Test for getOffset() and setOffset()
556 */
557static void TestOffset()
558{
559    UErrorCode status= U_ZERO_ERROR;
560    UCollator *en_us=NULL;
561    UCollationElements *iter, *pristine;
562    int32_t offset;
563    OrderAndOffset *orders;
564    int32_t orderLength=0;
565    int     count = 0;
566    UChar test1[50];
567    UChar test2[50];
568
569    u_uastrcpy(test1, "What subset of all possible test cases?");
570    u_uastrcpy(test2, "has the highest probability of detecting");
571    en_us = ucol_open("en_US", &status);
572    log_verbose("Testing getOffset and setOffset for collations\n");
573    iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
574    if(U_FAILURE(status)){
575        log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
576            myErrorName(status));
577        ucol_close(en_us);
578        return;
579    }
580
581    /* testing boundaries */
582    ucol_setOffset(iter, 0, &status);
583    if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
584        log_err("Error: After setting offset to 0, we should be at the end "
585                "of the backwards iteration");
586    }
587    ucol_setOffset(iter, u_strlen(test1), &status);
588    if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
589        log_err("Error: After setting offset to end of the string, we should "
590                "be at the end of the backwards iteration");
591    }
592
593    /* Run all the way through the iterator, then get the offset */
594
595    orders = getOrders(iter, &orderLength);
596
597    offset = ucol_getOffset(iter);
598
599    if (offset != u_strlen(test1))
600    {
601        log_err("offset at end != length %d vs %d\n", offset,
602            u_strlen(test1) );
603    }
604
605    /* Now set the offset back to the beginning and see if it works */
606    pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
607    if(U_FAILURE(status)){
608        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
609            myErrorName(status));
610    ucol_close(en_us);
611        return;
612    }
613    status = U_ZERO_ERROR;
614
615    ucol_setOffset(iter, 0, &status);
616    if (U_FAILURE(status))
617    {
618        log_err("setOffset failed. %s\n",    myErrorName(status));
619    }
620    else
621    {
622        assertEqual(iter, pristine);
623    }
624
625    ucol_closeElements(pristine);
626    ucol_closeElements(iter);
627    free(orders);
628
629    /* testing offsets in normalization buffer */
630    test1[0] = 0x61;
631    test1[1] = 0x300;
632    test1[2] = 0x316;
633    test1[3] = 0x62;
634    test1[4] = 0;
635    ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
636    iter = ucol_openElements(en_us, test1, 4, &status);
637    if(U_FAILURE(status)){
638        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
639            myErrorName(status));
640        ucol_close(en_us);
641        return;
642    }
643
644    count = 0;
645    while (ucol_next(iter, &status) != UCOL_NULLORDER &&
646        U_SUCCESS(status)) {
647        switch (count) {
648        case 0:
649            if (ucol_getOffset(iter) != 1) {
650                log_err("ERROR: Offset of iteration should be 1\n");
651            }
652            break;
653        case 3:
654            if (ucol_getOffset(iter) != 4) {
655                log_err("ERROR: Offset of iteration should be 4\n");
656            }
657            break;
658        default:
659            if (ucol_getOffset(iter) != 3) {
660                log_err("ERROR: Offset of iteration should be 3\n");
661            }
662        }
663        count ++;
664    }
665
666    ucol_reset(iter);
667    count = 0;
668    while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
669        U_SUCCESS(status)) {
670        switch (count) {
671        case 0:
672        case 1:
673            if (ucol_getOffset(iter) != 3) {
674                log_err("ERROR: Offset of iteration should be 3\n");
675            }
676            break;
677        case 2:
678            if (ucol_getOffset(iter) != 1) {
679                log_err("ERROR: Offset of iteration should be 1\n");
680            }
681            break;
682        default:
683            if (ucol_getOffset(iter) != 0) {
684                log_err("ERROR: Offset of iteration should be 0\n");
685            }
686        }
687        count ++;
688    }
689
690    if(U_FAILURE(status)){
691        log_err("ERROR: in iterating collation elements %s\n",
692            myErrorName(status));
693    }
694
695    ucol_closeElements(iter);
696    ucol_close(en_us);
697}
698
699/**
700 * Test for setText()
701 */
702static void TestSetText()
703{
704    int32_t c,i;
705    UErrorCode status = U_ZERO_ERROR;
706    UCollator *en_us=NULL;
707    UCollationElements *iter1, *iter2;
708    UChar test1[50];
709    UChar test2[50];
710
711    u_uastrcpy(test1, "What subset of all possible test cases?");
712    u_uastrcpy(test2, "has the highest probability of detecting");
713    en_us = ucol_open("en_US", &status);
714    log_verbose("testing setText for Collation elements\n");
715    iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
716    if(U_FAILURE(status)){
717        log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
718            myErrorName(status));
719    ucol_close(en_us);
720        return;
721    }
722    iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
723    if(U_FAILURE(status)){
724        log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
725            myErrorName(status));
726    ucol_close(en_us);
727        return;
728    }
729
730    /* Run through the second iterator just to exercise it */
731    c = ucol_next(iter2, &status);
732    i = 0;
733
734    while ( ++i < 10 && (c != UCOL_NULLORDER))
735    {
736        if (U_FAILURE(status))
737        {
738            log_err("iter2->next() returned an error. %s\n", myErrorName(status));
739            ucol_closeElements(iter2);
740            ucol_closeElements(iter1);
741    ucol_close(en_us);
742            return;
743        }
744
745        c = ucol_next(iter2, &status);
746    }
747
748    /* Now set it to point to the same string as the first iterator */
749    ucol_setText(iter2, test1, u_strlen(test1), &status);
750    if (U_FAILURE(status))
751    {
752        log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
753    }
754    else
755    {
756        assertEqual(iter1, iter2);
757    }
758
759    /* Now set it to point to a null string with fake length*/
760    ucol_setText(iter2, NULL, 2, &status);
761    if (status != U_ILLEGAL_ARGUMENT_ERROR)
762    {
763        log_err("call to iter2->setText(null, 2) should yield an illegal-argument-error - %s\n",
764                myErrorName(status));
765    }
766
767    ucol_closeElements(iter2);
768    ucol_closeElements(iter1);
769    ucol_close(en_us);
770}
771
772/** @bug 4108762
773 * Test for getMaxExpansion()
774 */
775static void TestMaxExpansion()
776{
777    UErrorCode          status = U_ZERO_ERROR;
778    UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
779    UChar               ch     = 0;
780    UChar32             unassigned = 0xEFFFD;
781    UChar               supplementary[2];
782    uint32_t            stringOffset = 0;
783    UBool               isError = FALSE;
784    uint32_t            sorder = 0;
785    UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
786    uint32_t            temporder = 0;
787
788    UChar rule[256];
789    u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
790    coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
791        UCOL_DEFAULT_STRENGTH,NULL, &status);
792    if(U_SUCCESS(status) && coll) {
793      iter = ucol_openElements(coll, &ch, 1, &status);
794
795      while (ch < 0xFFFF && U_SUCCESS(status)) {
796          int      count = 1;
797          uint32_t order;
798          int32_t  size = 0;
799
800          ch ++;
801
802          ucol_setText(iter, &ch, 1, &status);
803          order = ucol_previous(iter, &status);
804
805          /* thai management */
806          if (order == 0)
807              order = ucol_previous(iter, &status);
808
809          while (U_SUCCESS(status) &&
810              ucol_previous(iter, &status) != UCOL_NULLORDER) {
811              count ++;
812          }
813
814          size = ucol_getMaxExpansion(iter, order);
815          if (U_FAILURE(status) || size < count) {
816              log_err("Failure at codepoint %d, maximum expansion count < %d\n",
817                  ch, count);
818          }
819      }
820
821      /* testing for exact max expansion */
822      ch = 0;
823      while (ch < 0x61) {
824          uint32_t order;
825          int32_t  size;
826          ucol_setText(iter, &ch, 1, &status);
827          order = ucol_previous(iter, &status);
828          size  = ucol_getMaxExpansion(iter, order);
829          if (U_FAILURE(status) || size != 1) {
830              log_err("Failure at codepoint %d, maximum expansion count < %d\n",
831                  ch, 1);
832          }
833          ch ++;
834      }
835
836      ch = 0x63;
837      ucol_setText(iter, &ch, 1, &status);
838      temporder = ucol_previous(iter, &status);
839
840      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
841          log_err("Failure at codepoint %d, maximum expansion count != %d\n",
842                  ch, 3);
843      }
844
845      ch = 0x64;
846      ucol_setText(iter, &ch, 1, &status);
847      temporder = ucol_previous(iter, &status);
848
849      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
850          log_err("Failure at codepoint %d, maximum expansion count != %d\n",
851                  ch, 3);
852      }
853
854      U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
855      (void)isError;    /* Suppress set but not used warning. */
856      ucol_setText(iter, supplementary, 2, &status);
857      sorder = ucol_previous(iter, &status);
858
859      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
860          log_err("Failure at codepoint %d, maximum expansion count < %d\n",
861                  ch, 2);
862      }
863
864      /* testing jamo */
865      ch = 0x1165;
866
867      ucol_setText(iter, &ch, 1, &status);
868      temporder = ucol_previous(iter, &status);
869      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
870          log_err("Failure at codepoint %d, maximum expansion count > %d\n",
871                  ch, 3);
872      }
873
874      ucol_closeElements(iter);
875      ucol_close(coll);
876
877      /* testing special jamo &a<\u1160 */
878      rule[0] = 0x26;
879      rule[1] = 0x71;
880      rule[2] = 0x3c;
881      rule[3] = 0x1165;
882      rule[4] = 0x2f;
883      rule[5] = 0x71;
884      rule[6] = 0x71;
885      rule[7] = 0x71;
886      rule[8] = 0x71;
887      rule[9] = 0;
888
889      coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
890          UCOL_DEFAULT_STRENGTH,NULL, &status);
891      iter = ucol_openElements(coll, &ch, 1, &status);
892
893      temporder = ucol_previous(iter, &status);
894      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
895          log_err("Failure at codepoint %d, maximum expansion count > %d\n",
896                  ch, 5);
897      }
898
899      ucol_closeElements(iter);
900      ucol_close(coll);
901    } else {
902      log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
903    }
904
905}
906
907
908static void assertEqual(UCollationElements *i1, UCollationElements *i2)
909{
910    int32_t c1, c2;
911    int32_t count = 0;
912    UErrorCode status = U_ZERO_ERROR;
913
914    do
915    {
916        c1 = ucol_next(i1, &status);
917        c2 = ucol_next(i2, &status);
918
919        if (c1 != c2)
920        {
921            log_err("Error in iteration %d assetEqual between\n  %d  and   %d, they are not equal\n", count, c1, c2);
922            break;
923        }
924
925        count += 1;
926    }
927    while (c1 != UCOL_NULLORDER);
928}
929
930/**
931 * Testing iterators with extremely small buffers
932 */
933static void TestSmallBuffer()
934{
935    UErrorCode          status = U_ZERO_ERROR;
936    UCollator          *coll;
937    UCollationElements *testiter,
938                       *iter;
939    int32_t             count = 0;
940    OrderAndOffset     *testorders,
941                       *orders;
942
943    UChar teststr[500];
944    UChar str[] = {0x300, 0x31A, 0};
945    /*
946    creating a long string of decomposable characters,
947    since by default the writable buffer is of size 256
948    */
949    while (count < 500) {
950        if ((count & 1) == 0) {
951            teststr[count ++] = 0x300;
952        }
953        else {
954            teststr[count ++] = 0x31A;
955        }
956    }
957
958    coll = ucol_open("th_TH", &status);
959    if(U_SUCCESS(status) && coll) {
960      testiter = ucol_openElements(coll, teststr, 500, &status);
961      iter = ucol_openElements(coll, str, 2, &status);
962
963      orders     = getOrders(iter, &count);
964      if (count != 2) {
965          log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
966      }
967
968      /*
969      this will rearrange the string data to 250 characters of 0x300 first then
970      250 characters of 0x031A
971      */
972      testorders = getOrders(testiter, &count);
973
974      if (count != 500) {
975          log_err("Error decomposition does not give the right sized collation elements\n");
976      }
977
978      while (count != 0) {
979          /* UCA collation element for 0x0F76 */
980          if ((count > 250 && testorders[-- count].order != orders[1].order) ||
981              (count <= 250 && testorders[-- count].order != orders[0].order)) {
982              log_err("Error decomposition does not give the right collation element at %d count\n", count);
983              break;
984          }
985      }
986
987      free(testorders);
988      free(orders);
989
990      ucol_reset(testiter);
991
992      /* ensures closing of elements done properly to clear writable buffer */
993      ucol_next(testiter, &status);
994      ucol_next(testiter, &status);
995      ucol_closeElements(testiter);
996      ucol_closeElements(iter);
997      ucol_close(coll);
998    } else {
999      log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1000    }
1001}
1002
1003/**
1004* Testing the discontigous contractions
1005*/
1006static void TestDiscontiguos() {
1007    const char               *rulestr    =
1008                            "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1009          UChar               rule[50];
1010          int                 rulelen = u_unescape(rulestr, rule, 50);
1011    const char               *src[] = {
1012     "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1013    /* base character blocked */
1014     "XD\\u0300", "XD\\u0300\\u0315",
1015    /* non blocking combining character */
1016     "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1017     /* blocking combining character */
1018     "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1019     /* contraction prefix */
1020     "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1021     "X\\u0300\\u031A\\u0315",
1022     /* ends not with a contraction character */
1023     "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1024     "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1025    };
1026    const char               *tgt[] = {
1027     /* non blocking combining character */
1028     "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1029    /* base character blocked */
1030     "X D \\u0300", "X D \\u0300\\u0315",
1031    /* non blocking combining character */
1032     "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1033     /* blocking combining character */
1034     "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1035     /* contraction prefix */
1036     "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1037     "X\\u0300 \\u031A \\u0315",
1038     /* ends not with a contraction character */
1039     "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1040     "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1041    };
1042          int                 size   = 20;
1043          UCollator          *coll;
1044          UErrorCode          status    = U_ZERO_ERROR;
1045          int                 count     = 0;
1046          UCollationElements *iter;
1047          UCollationElements *resultiter;
1048
1049    coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1050    iter       = ucol_openElements(coll, rule, 1, &status);
1051    resultiter = ucol_openElements(coll, rule, 1, &status);
1052
1053    if (U_FAILURE(status)) {
1054        log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
1055        return;
1056    }
1057
1058    while (count < size) {
1059        UChar  str[20];
1060        UChar  tstr[20];
1061        int    strLen = u_unescape(src[count], str, 20);
1062        UChar *s;
1063
1064        ucol_setText(iter, str, strLen, &status);
1065        if (U_FAILURE(status)) {
1066            log_err("Error opening collation iterator\n");
1067            return;
1068        }
1069
1070        u_unescape(tgt[count], tstr, 20);
1071        s = tstr;
1072
1073        log_verbose("count %d\n", count);
1074
1075        for (;;) {
1076            uint32_t  ce;
1077            UChar    *e = u_strchr(s, 0x20);
1078            if (e == 0) {
1079                e = u_strchr(s, 0);
1080            }
1081            ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1082            ce = ucol_next(resultiter, &status);
1083            if (U_FAILURE(status)) {
1084                log_err("Error manipulating collation iterator\n");
1085                return;
1086            }
1087            while (ce != UCOL_NULLORDER) {
1088                if (ce != (uint32_t)ucol_next(iter, &status) ||
1089                    U_FAILURE(status)) {
1090                    log_err("Discontiguos contraction test mismatch\n");
1091                    return;
1092                }
1093                ce = ucol_next(resultiter, &status);
1094                if (U_FAILURE(status)) {
1095                    log_err("Error getting next collation element\n");
1096                    return;
1097                }
1098            }
1099            s = e + 1;
1100            if (*e == 0) {
1101                break;
1102            }
1103        }
1104        ucol_reset(iter);
1105        backAndForth(iter);
1106        count ++;
1107    }
1108    ucol_closeElements(resultiter);
1109    ucol_closeElements(iter);
1110    ucol_close(coll);
1111}
1112
1113/**
1114* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
1115* normalization on AND jamo tailoring, among other things.
1116*
1117* Note: This test is sensitive to changes of the root collator,
1118* for example whether the ae-ligature maps to three CEs (as in the DUCET)
1119* or to two CEs (as in the CLDR 24 FractionalUCA.txt).
1120* It is also sensitive to how those CEs map to the iterator's 32-bit CE encoding.
1121* For example, the DUCET's artificial secondary CE in the ae-ligature
1122* may map to two 32-bit iterator CEs (as it did until ICU 52).
1123*/
1124static const UChar tsceText[] = {   /* Nothing in here should be ignorable */
1125    0x0020, 0xAC00,                 /* simple LV Hangul */
1126    0x0020, 0xAC01,                 /* simple LVT Hangul */
1127    0x0020, 0xAC0F,                 /* LVTT, last jamo expands for search */
1128    0x0020, 0xAFFF,                 /* LLVVVTT, every jamo expands for search */
1129    0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
1130    0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
1131    0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
1132    0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
1133    0x0020, 0x00E6,                 /* small letter ae, expands */
1134    0x0020, 0x1E4D,                 /* small letter o with tilde and acute, decomposes */
1135    0x0020
1136};
1137enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) };
1138
1139static const int32_t rootStandardOffsets[] = {
1140    0,  1,2,
1141    2,  3,4,4,
1142    4,  5,6,6,
1143    6,  7,8,8,
1144    8,  9,10,11,
1145    12, 13,14,15,
1146    16, 17,18,19,
1147    20, 21,22,23,
1148    24, 25,26,  /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1149    26, 27,28,28,
1150    28,
1151    29
1152};
1153enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) };
1154
1155static const int32_t rootSearchOffsets[] = {
1156    0,  1,2,
1157    2,  3,4,4,
1158    4,  5,6,6,6,
1159    6,  7,8,8,8,8,8,8,
1160    8,  9,10,11,
1161    12, 13,14,15,
1162    16, 17,18,19,20,
1163    20, 21,22,22,23,23,23,24,
1164    24, 25,26,  /* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
1165    26, 27,28,28,
1166    28,
1167    29
1168};
1169enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) };
1170
1171typedef struct {
1172    const char *    locale;
1173    const int32_t * offsets;
1174    int32_t         offsetsLen;
1175} TSCEItem;
1176
1177static const TSCEItem tsceItems[] = {
1178    { "root",                  rootStandardOffsets, kLen_rootStandardOffsets },
1179    { "root@collation=search", rootSearchOffsets,   kLen_rootSearchOffsets   },
1180    { NULL,                    NULL,                0                        }
1181};
1182
1183static void TestSearchCollatorElements(void)
1184{
1185    const TSCEItem * tsceItemPtr;
1186    for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
1187        UErrorCode status = U_ZERO_ERROR;
1188        UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
1189        if ( U_SUCCESS(status) ) {
1190            UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
1191            if ( U_SUCCESS(status) ) {
1192                int32_t offset, element;
1193                const int32_t * nextOffsetPtr;
1194                const int32_t * limitOffsetPtr;
1195
1196                nextOffsetPtr = tsceItemPtr->offsets;
1197                limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1198                do {
1199                    offset = ucol_getOffset(uce);
1200                    element = ucol_next(uce, &status);
1201                    log_verbose("(%s) offset=%2d  ce=%08x\n", tsceItemPtr->locale, offset, element);
1202                    if ( element == 0 ) {
1203                        log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
1204                    }
1205                    if ( nextOffsetPtr < limitOffsetPtr ) {
1206                        if (offset != *nextOffsetPtr) {
1207                            log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
1208                                                            tsceItemPtr->locale, *nextOffsetPtr, offset );
1209                            nextOffsetPtr = limitOffsetPtr;
1210                            break;
1211                        }
1212                        nextOffsetPtr++;
1213                    } else {
1214                        log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
1215                    }
1216                } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1217                if ( nextOffsetPtr < limitOffsetPtr ) {
1218                    log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
1219                }
1220
1221                ucol_setOffset(uce, kLen_tsceText, &status);
1222                status = U_ZERO_ERROR;
1223                nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
1224                limitOffsetPtr = tsceItemPtr->offsets;
1225                do {
1226                    offset = ucol_getOffset(uce);
1227                    element = ucol_previous(uce, &status);
1228                    if ( element == 0 ) {
1229                        log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
1230                    }
1231                    if ( nextOffsetPtr > limitOffsetPtr ) {
1232                        nextOffsetPtr--;
1233                        if (offset != *nextOffsetPtr) {
1234                            log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
1235                                                                tsceItemPtr->locale, *nextOffsetPtr, offset );
1236                            nextOffsetPtr = limitOffsetPtr;
1237                            break;
1238                        }
1239                   } else {
1240                        log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
1241                    }
1242                } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
1243                if ( nextOffsetPtr > limitOffsetPtr ) {
1244                    log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
1245                }
1246
1247                ucol_closeElements(uce);
1248            } else {
1249                log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
1250            }
1251            ucol_close(ucol);
1252        } else {
1253            log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
1254        }
1255    }
1256}
1257
1258#endif /* #if !UCONFIG_NO_COLLATION */
1259