1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File CITERTST.C
9*
10* Modification History:
11* Date      Name               Description
12*           Madhu Katragadda   Ported for C API
13* 02/19/01  synwee             Modified test case for new collation iterator
14*********************************************************************************/
15/*
16 * Collation Iterator tests.
17 * (Let me reiterate my position...)
18 */
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_COLLATION
23
24#include "unicode/ucol.h"
25#include "unicode/ucoleitr.h"
26#include "unicode/uloc.h"
27#include "unicode/uchar.h"
28#include "unicode/ustring.h"
29#include "unicode/putil.h"
30#include "callcoll.h"
31#include "cmemory.h"
32#include "cintltst.h"
33#include "citertst.h"
34#include "ccolltst.h"
35#include "filestrm.h"
36#include "cstring.h"
37#include "ucol_imp.h"
38#include "ucol_tok.h"
39#include "uparse.h"
40#include <stdio.h>
41
42extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *);
43
44void addCollIterTest(TestNode** root)
45{
46    addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
47    addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
48    addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
49    addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
50    addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
51    addTest(root, &TestNormalizedUnicodeChar,
52                                "tscoll/citertst/TestNormalizedUnicodeChar");
53    addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
54    addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
55    addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
56    addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
57    addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
58    addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
59    addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
60    addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
61    addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
62    addTest(root, &TestSearchCollatorElements, "tscoll/citertst/TestSearchCollatorElements");
63}
64
65/* The locales we support */
66
67static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
68
69static void TestBug672() {
70    UErrorCode  status = U_ZERO_ERROR;
71    UChar       pattern[20];
72    UChar       text[50];
73    int         i;
74    int         result[3][3];
75
76    u_uastrcpy(pattern, "resume");
77    u_uastrcpy(text, "Time to resume updating my resume.");
78
79    for (i = 0; i < 3; ++ i) {
80        UCollator          *coll = ucol_open(LOCALES[i], &status);
81        UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
82                                                     &status);
83        UCollationElements *titer = ucol_openElements(coll, text, -1,
84                                                     &status);
85        if (U_FAILURE(status)) {
86            log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
87                    myErrorName(status));
88            return;
89        }
90
91        log_verbose("locale tested %s\n", LOCALES[i]);
92
93        while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
94               U_SUCCESS(status)) {
95        }
96        if (U_FAILURE(status)) {
97            log_err("ERROR: reversing collation iterator :%s\n",
98                    myErrorName(status));
99            return;
100        }
101        ucol_reset(pitr);
102
103        ucol_setOffset(titer, u_strlen(pattern), &status);
104        if (U_FAILURE(status)) {
105            log_err("ERROR: setting offset in collator :%s\n",
106                    myErrorName(status));
107            return;
108        }
109        result[i][0] = ucol_getOffset(titer);
110        log_verbose("Text iterator set to offset %d\n", result[i][0]);
111
112        /* Use previous() */
113        ucol_previous(titer, &status);
114        result[i][1] = ucol_getOffset(titer);
115        log_verbose("Current offset %d after previous\n", result[i][1]);
116
117        /* Add one to index */
118        log_verbose("Adding one to current offset...\n");
119        ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
120        if (U_FAILURE(status)) {
121            log_err("ERROR: setting offset in collator :%s\n",
122                    myErrorName(status));
123            return;
124        }
125        result[i][2] = ucol_getOffset(titer);
126        log_verbose("Current offset in text = %d\n", result[i][2]);
127        ucol_closeElements(pitr);
128        ucol_closeElements(titer);
129        ucol_close(coll);
130    }
131
132    if (uprv_memcmp(result[0], result[1], 3) != 0 ||
133        uprv_memcmp(result[1], result[2], 3) != 0) {
134        log_err("ERROR: Different locales have different offsets at the same character\n");
135    }
136}
137
138
139
140/*  Running this test with normalization enabled showed up a bug in the incremental
141    normalization code. */
142static void TestBug672Normalize() {
143    UErrorCode  status = U_ZERO_ERROR;
144    UChar       pattern[20];
145    UChar       text[50];
146    int         i;
147    int         result[3][3];
148
149    u_uastrcpy(pattern, "resume");
150    u_uastrcpy(text, "Time to resume updating my resume.");
151
152    for (i = 0; i < 3; ++ i) {
153        UCollator          *coll = ucol_open(LOCALES[i], &status);
154        UCollationElements *pitr = NULL;
155        UCollationElements *titer = NULL;
156
157        ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
158
159        pitr = ucol_openElements(coll, pattern, -1, &status);
160        titer = ucol_openElements(coll, text, -1, &status);
161        if (U_FAILURE(status)) {
162            log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",
163                    myErrorName(status));
164            return;
165        }
166
167        log_verbose("locale tested %s\n", LOCALES[i]);
168
169        while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
170               U_SUCCESS(status)) {
171        }
172        if (U_FAILURE(status)) {
173            log_err("ERROR: reversing collation iterator :%s\n",
174                    myErrorName(status));
175            return;
176        }
177        ucol_reset(pitr);
178
179        ucol_setOffset(titer, u_strlen(pattern), &status);
180        if (U_FAILURE(status)) {
181            log_err("ERROR: setting offset in collator :%s\n",
182                    myErrorName(status));
183            return;
184        }
185        result[i][0] = ucol_getOffset(titer);
186        log_verbose("Text iterator set to offset %d\n", result[i][0]);
187
188        /* Use previous() */
189        ucol_previous(titer, &status);
190        result[i][1] = ucol_getOffset(titer);
191        log_verbose("Current offset %d after previous\n", result[i][1]);
192
193        /* Add one to index */
194        log_verbose("Adding one to current offset...\n");
195        ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
196        if (U_FAILURE(status)) {
197            log_err("ERROR: setting offset in collator :%s\n",
198                    myErrorName(status));
199            return;
200        }
201        result[i][2] = ucol_getOffset(titer);
202        log_verbose("Current offset in text = %d\n", result[i][2]);
203        ucol_closeElements(pitr);
204        ucol_closeElements(titer);
205        ucol_close(coll);
206    }
207
208    if (uprv_memcmp(result[0], result[1], 3) != 0 ||
209        uprv_memcmp(result[1], result[2], 3) != 0) {
210        log_err("ERROR: Different locales have different offsets at the same character\n");
211    }
212}
213
214
215
216
217/**
218 * Test for CollationElementIterator previous and next for the whole set of
219 * unicode characters.
220 */
221static void TestUnicodeChar()
222{
223    UChar source[0x100];
224    UCollator *en_us;
225    UCollationElements *iter;
226    UErrorCode status = U_ZERO_ERROR;
227    UChar codepoint;
228
229    UChar *test;
230    en_us = ucol_open("en_US", &status);
231    if (U_FAILURE(status)){
232       log_err_status(status, "ERROR: in creation of collation data using ucol_open()\n %s\n",
233              myErrorName(status));
234       return;
235    }
236
237    for (codepoint = 1; codepoint < 0xFFFE;)
238    {
239      test = source;
240
241      while (codepoint % 0xFF != 0)
242      {
243        if (u_isdefined(codepoint))
244          *(test ++) = codepoint;
245        codepoint ++;
246      }
247
248      if (u_isdefined(codepoint))
249        *(test ++) = codepoint;
250
251      if (codepoint != 0xFFFF)
252        codepoint ++;
253
254      *test = 0;
255      iter=ucol_openElements(en_us, source, u_strlen(source), &status);
256      if(U_FAILURE(status)){
257          log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
258              myErrorName(status));
259          ucol_close(en_us);
260          return;
261      }
262      /* A basic test to see if it's working at all */
263      log_verbose("codepoint testing %x\n", codepoint);
264      backAndForth(iter);
265      ucol_closeElements(iter);
266
267      /* null termination test */
268      iter=ucol_openElements(en_us, source, -1, &status);
269      if(U_FAILURE(status)){
270          log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
271              myErrorName(status));
272          ucol_close(en_us);
273          return;
274      }
275      /* A basic test to see if it's working at all */
276      backAndForth(iter);
277      ucol_closeElements(iter);
278    }
279
280    ucol_close(en_us);
281}
282
283/**
284 * Test for CollationElementIterator previous and next for the whole set of
285 * unicode characters with normalization on.
286 */
287static void TestNormalizedUnicodeChar()
288{
289    UChar source[0x100];
290    UCollator *th_th;
291    UCollationElements *iter;
292    UErrorCode status = U_ZERO_ERROR;
293    UChar codepoint;
294
295    UChar *test;
296    /* thai should have normalization on */
297    th_th = ucol_open("th_TH", &status);
298    if (U_FAILURE(status)){
299        log_err_status(status, "ERROR: in creation of thai collation using ucol_open()\n %s\n",
300              myErrorName(status));
301        return;
302    }
303
304    for (codepoint = 1; codepoint < 0xFFFE;)
305    {
306      test = source;
307
308      while (codepoint % 0xFF != 0)
309      {
310        if (u_isdefined(codepoint))
311          *(test ++) = codepoint;
312        codepoint ++;
313      }
314
315      if (u_isdefined(codepoint))
316        *(test ++) = codepoint;
317
318      if (codepoint != 0xFFFF)
319        codepoint ++;
320
321      *test = 0;
322      iter=ucol_openElements(th_th, source, u_strlen(source), &status);
323      if(U_FAILURE(status)){
324          log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
325              myErrorName(status));
326            ucol_close(th_th);
327          return;
328      }
329
330      backAndForth(iter);
331      ucol_closeElements(iter);
332
333      iter=ucol_openElements(th_th, source, -1, &status);
334      if(U_FAILURE(status)){
335          log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
336              myErrorName(status));
337            ucol_close(th_th);
338          return;
339      }
340
341      backAndForth(iter);
342      ucol_closeElements(iter);
343    }
344
345    ucol_close(th_th);
346}
347
348/**
349* Test the incremental normalization
350*/
351static void TestNormalization()
352{
353          UErrorCode          status = U_ZERO_ERROR;
354    const char               *str    =
355                            "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
356          UCollator          *coll;
357          UChar               rule[50];
358          int                 rulelen = u_unescape(str, rule, 50);
359          int                 count = 0;
360    const char                *testdata[] =
361                        {"\\u1ED9", "o\\u0323\\u0302",
362                        "\\u0300\\u0315", "\\u0315\\u0300",
363                        "A\\u0300\\u0315B", "A\\u0315\\u0300B",
364                        "A\\u0316\\u0315B", "A\\u0315\\u0316B",
365                        "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
366                        "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
367                        "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
368    int32_t   srclen;
369    UChar source[10];
370    UCollationElements *iter;
371
372    coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
373    ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
374    if (U_FAILURE(status)){
375        log_err_status(status, "ERROR: in creation of collator using ucol_openRules()\n %s\n",
376              myErrorName(status));
377        return;
378    }
379
380    srclen = u_unescape(testdata[0], source, 10);
381    iter = ucol_openElements(coll, source, srclen, &status);
382    backAndForth(iter);
383    ucol_closeElements(iter);
384
385    srclen = u_unescape(testdata[1], source, 10);
386    iter = ucol_openElements(coll, source, srclen, &status);
387    backAndForth(iter);
388    ucol_closeElements(iter);
389
390    while (count < 12) {
391        srclen = u_unescape(testdata[count], source, 10);
392        iter = ucol_openElements(coll, source, srclen, &status);
393
394        if (U_FAILURE(status)){
395            log_err("ERROR: in creation of collator element iterator\n %s\n",
396                  myErrorName(status));
397            return;
398        }
399        backAndForth(iter);
400        ucol_closeElements(iter);
401
402        iter = ucol_openElements(coll, source, -1, &status);
403
404        if (U_FAILURE(status)){
405            log_err("ERROR: in creation of collator element iterator\n %s\n",
406                  myErrorName(status));
407            return;
408        }
409        backAndForth(iter);
410        ucol_closeElements(iter);
411        count ++;
412    }
413    ucol_close(coll);
414}
415
416/**
417 * Test for CollationElementIterator.previous()
418 *
419 * @bug 4108758 - Make sure it works with contracting characters
420 *
421 */
422static void TestPrevious()
423{
424    UCollator *coll=NULL;
425    UChar rule[50];
426    UChar *source;
427    UCollator *c1, *c2, *c3;
428    UCollationElements *iter;
429    UErrorCode status = U_ZERO_ERROR;
430    UChar test1[50];
431    UChar test2[50];
432
433    u_uastrcpy(test1, "What subset of all possible test cases?");
434    u_uastrcpy(test2, "has the highest probability of detecting");
435    coll = ucol_open("en_US", &status);
436
437    iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
438    log_verbose("English locale testing back and forth\n");
439    if(U_FAILURE(status)){
440        log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
441            myErrorName(status));
442        ucol_close(coll);
443        return;
444    }
445    /* A basic test to see if it's working at all */
446    backAndForth(iter);
447    ucol_closeElements(iter);
448    ucol_close(coll);
449
450    /* Test with a contracting character sequence */
451    u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
452    c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
453
454    log_verbose("Contraction rule testing back and forth with no normalization\n");
455
456    if (c1 == NULL || U_FAILURE(status))
457    {
458        log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
459            myErrorName(status));
460        return;
461    }
462    source=(UChar*)malloc(sizeof(UChar) * 20);
463    u_uastrcpy(source, "abchdcba");
464    iter=ucol_openElements(c1, source, u_strlen(source), &status);
465    if(U_FAILURE(status)){
466        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
467            myErrorName(status));
468        return;
469    }
470    backAndForth(iter);
471    ucol_closeElements(iter);
472    ucol_close(c1);
473
474    /* Test with an expanding character sequence */
475    u_uastrcpy(rule, "&a < b < c/abd < d");
476    c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
477    log_verbose("Expansion rule testing back and forth with no normalization\n");
478    if (c2 == NULL || U_FAILURE(status))
479    {
480        log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
481            myErrorName(status));
482        return;
483    }
484    u_uastrcpy(source, "abcd");
485    iter=ucol_openElements(c2, source, u_strlen(source), &status);
486    if(U_FAILURE(status)){
487        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
488            myErrorName(status));
489        return;
490    }
491    backAndForth(iter);
492    ucol_closeElements(iter);
493    ucol_close(c2);
494    /* Now try both */
495    u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
496    c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,  UCOL_DEFAULT_STRENGTH,NULL, &status);
497    log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
498
499    if (c3 == NULL || U_FAILURE(status))
500    {
501        log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
502            myErrorName(status));
503        return;
504    }
505    u_uastrcpy(source, "abcdbchdc");
506    iter=ucol_openElements(c3, source, u_strlen(source), &status);
507    if(U_FAILURE(status)){
508        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
509            myErrorName(status));
510        return;
511    }
512    backAndForth(iter);
513    ucol_closeElements(iter);
514    ucol_close(c3);
515    source[0] = 0x0e41;
516    source[1] = 0x0e02;
517    source[2] = 0x0e41;
518    source[3] = 0x0e02;
519    source[4] = 0x0e27;
520    source[5] = 0x61;
521    source[6] = 0x62;
522    source[7] = 0x63;
523    source[8] = 0;
524
525    coll = ucol_open("th_TH", &status);
526    log_verbose("Thai locale testing back and forth with normalization\n");
527    iter=ucol_openElements(coll, source, u_strlen(source), &status);
528    if(U_FAILURE(status)){
529        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
530            myErrorName(status));
531        return;
532    }
533    backAndForth(iter);
534    ucol_closeElements(iter);
535    ucol_close(coll);
536
537    /* prev test */
538    source[0] = 0x0061;
539    source[1] = 0x30CF;
540    source[2] = 0x3099;
541    source[3] = 0x30FC;
542    source[4] = 0;
543
544    coll = ucol_open("ja_JP", &status);
545    log_verbose("Japanese locale testing back and forth with normalization\n");
546    iter=ucol_openElements(coll, source, u_strlen(source), &status);
547    if(U_FAILURE(status)){
548        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
549            myErrorName(status));
550        return;
551    }
552    backAndForth(iter);
553    ucol_closeElements(iter);
554    ucol_close(coll);
555
556    free(source);
557}
558
559/**
560 * Test for getOffset() and setOffset()
561 */
562static void TestOffset()
563{
564    UErrorCode status= U_ZERO_ERROR;
565    UCollator *en_us=NULL;
566    UCollationElements *iter, *pristine;
567    int32_t offset;
568    OrderAndOffset *orders;
569    int32_t orderLength=0;
570    int     count = 0;
571    UChar test1[50];
572    UChar test2[50];
573
574    u_uastrcpy(test1, "What subset of all possible test cases?");
575    u_uastrcpy(test2, "has the highest probability of detecting");
576    en_us = ucol_open("en_US", &status);
577    log_verbose("Testing getOffset and setOffset for collations\n");
578    iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
579    if(U_FAILURE(status)){
580        log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
581            myErrorName(status));
582        ucol_close(en_us);
583        return;
584    }
585
586    /* testing boundaries */
587    ucol_setOffset(iter, 0, &status);
588    if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) {
589        log_err("Error: After setting offset to 0, we should be at the end "
590                "of the backwards iteration");
591    }
592    ucol_setOffset(iter, u_strlen(test1), &status);
593    if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) {
594        log_err("Error: After setting offset to end of the string, we should "
595                "be at the end of the backwards iteration");
596    }
597
598    /* Run all the way through the iterator, then get the offset */
599
600    orders = getOrders(iter, &orderLength);
601
602    offset = ucol_getOffset(iter);
603
604    if (offset != u_strlen(test1))
605    {
606        log_err("offset at end != length %d vs %d\n", offset,
607            u_strlen(test1) );
608    }
609
610    /* Now set the offset back to the beginning and see if it works */
611    pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
612    if(U_FAILURE(status)){
613        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
614            myErrorName(status));
615    ucol_close(en_us);
616        return;
617    }
618    status = U_ZERO_ERROR;
619
620    ucol_setOffset(iter, 0, &status);
621    if (U_FAILURE(status))
622    {
623        log_err("setOffset failed. %s\n",    myErrorName(status));
624    }
625    else
626    {
627        assertEqual(iter, pristine);
628    }
629
630    ucol_closeElements(pristine);
631    ucol_closeElements(iter);
632    free(orders);
633
634    /* testing offsets in normalization buffer */
635    test1[0] = 0x61;
636    test1[1] = 0x300;
637    test1[2] = 0x316;
638    test1[3] = 0x62;
639    test1[4] = 0;
640    ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
641    iter = ucol_openElements(en_us, test1, 4, &status);
642    if(U_FAILURE(status)){
643        log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
644            myErrorName(status));
645        ucol_close(en_us);
646        return;
647    }
648
649    count = 0;
650    while (ucol_next(iter, &status) != UCOL_NULLORDER &&
651        U_SUCCESS(status)) {
652        switch (count) {
653        case 0:
654            if (ucol_getOffset(iter) != 1) {
655                log_err("ERROR: Offset of iteration should be 1\n");
656            }
657            break;
658        case 3:
659            if (ucol_getOffset(iter) != 4) {
660                log_err("ERROR: Offset of iteration should be 4\n");
661            }
662            break;
663        default:
664            if (ucol_getOffset(iter) != 3) {
665                log_err("ERROR: Offset of iteration should be 3\n");
666            }
667        }
668        count ++;
669    }
670
671    ucol_reset(iter);
672    count = 0;
673    while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
674        U_SUCCESS(status)) {
675        switch (count) {
676        case 0:
677        case 1:
678            if (ucol_getOffset(iter) != 3) {
679                log_err("ERROR: Offset of iteration should be 3\n");
680            }
681            break;
682        case 2:
683            if (ucol_getOffset(iter) != 1) {
684                log_err("ERROR: Offset of iteration should be 1\n");
685            }
686            break;
687        default:
688            if (ucol_getOffset(iter) != 0) {
689                log_err("ERROR: Offset of iteration should be 0\n");
690            }
691        }
692        count ++;
693    }
694
695    if(U_FAILURE(status)){
696        log_err("ERROR: in iterating collation elements %s\n",
697            myErrorName(status));
698    }
699
700    ucol_closeElements(iter);
701    ucol_close(en_us);
702}
703
704/**
705 * Test for setText()
706 */
707static void TestSetText()
708{
709    int32_t c,i;
710    UErrorCode status = U_ZERO_ERROR;
711    UCollator *en_us=NULL;
712    UCollationElements *iter1, *iter2;
713    UChar test1[50];
714    UChar test2[50];
715
716    u_uastrcpy(test1, "What subset of all possible test cases?");
717    u_uastrcpy(test2, "has the highest probability of detecting");
718    en_us = ucol_open("en_US", &status);
719    log_verbose("testing setText for Collation elements\n");
720    iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
721    if(U_FAILURE(status)){
722        log_err_status(status, "ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
723            myErrorName(status));
724    ucol_close(en_us);
725        return;
726    }
727    iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
728    if(U_FAILURE(status)){
729        log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
730            myErrorName(status));
731    ucol_close(en_us);
732        return;
733    }
734
735    /* Run through the second iterator just to exercise it */
736    c = ucol_next(iter2, &status);
737    i = 0;
738
739    while ( ++i < 10 && (c != UCOL_NULLORDER))
740    {
741        if (U_FAILURE(status))
742        {
743            log_err("iter2->next() returned an error. %s\n", myErrorName(status));
744            ucol_closeElements(iter2);
745            ucol_closeElements(iter1);
746    ucol_close(en_us);
747            return;
748        }
749
750        c = ucol_next(iter2, &status);
751    }
752
753    /* Now set it to point to the same string as the first iterator */
754    ucol_setText(iter2, test1, u_strlen(test1), &status);
755    if (U_FAILURE(status))
756    {
757        log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
758    }
759    else
760    {
761        assertEqual(iter1, iter2);
762    }
763
764    /* Now set it to point to a null string with fake length*/
765    ucol_setText(iter2, NULL, 2, &status);
766    if (U_FAILURE(status))
767    {
768        log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
769    }
770    else
771    {
772        if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
773            log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
774        }
775    }
776
777    ucol_closeElements(iter2);
778    ucol_closeElements(iter1);
779    ucol_close(en_us);
780}
781
782/** @bug 4108762
783 * Test for getMaxExpansion()
784 */
785static void TestMaxExpansion()
786{
787    UErrorCode          status = U_ZERO_ERROR;
788    UCollator          *coll   ;/*= ucol_open("en_US", &status);*/
789    UChar               ch     = 0;
790    UChar32             unassigned = 0xEFFFD;
791    UChar               supplementary[2];
792    uint32_t            stringOffset = 0;
793    UBool               isError = FALSE;
794    uint32_t            sorder = 0;
795    UCollationElements *iter   ;/*= ucol_openElements(coll, &ch, 1, &status);*/
796    uint32_t            temporder = 0;
797
798    UChar rule[256];
799    u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
800    coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
801        UCOL_DEFAULT_STRENGTH,NULL, &status);
802    if(U_SUCCESS(status) && coll) {
803      iter = ucol_openElements(coll, &ch, 1, &status);
804
805      while (ch < 0xFFFF && U_SUCCESS(status)) {
806          int      count = 1;
807          uint32_t order;
808          int32_t  size = 0;
809
810          ch ++;
811
812          ucol_setText(iter, &ch, 1, &status);
813          order = ucol_previous(iter, &status);
814
815          /* thai management */
816          if (order == 0)
817              order = ucol_previous(iter, &status);
818
819          while (U_SUCCESS(status) &&
820              ucol_previous(iter, &status) != UCOL_NULLORDER) {
821              count ++;
822          }
823
824          size = ucol_getMaxExpansion(iter, order);
825          if (U_FAILURE(status) || size < count) {
826              log_err("Failure at codepoint %d, maximum expansion count < %d\n",
827                  ch, count);
828          }
829      }
830
831      /* testing for exact max expansion */
832      ch = 0;
833      while (ch < 0x61) {
834          uint32_t order;
835          int32_t  size;
836          ucol_setText(iter, &ch, 1, &status);
837          order = ucol_previous(iter, &status);
838          size  = ucol_getMaxExpansion(iter, order);
839          if (U_FAILURE(status) || size != 1) {
840              log_err("Failure at codepoint %d, maximum expansion count < %d\n",
841                  ch, 1);
842          }
843          ch ++;
844      }
845
846      ch = 0x63;
847      ucol_setText(iter, &ch, 1, &status);
848      temporder = ucol_previous(iter, &status);
849
850      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) {
851          log_err("Failure at codepoint %d, maximum expansion count != %d\n",
852                  ch, 3);
853      }
854
855      ch = 0x64;
856      ucol_setText(iter, &ch, 1, &status);
857      temporder = ucol_previous(iter, &status);
858
859      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) {
860          log_err("Failure at codepoint %d, maximum expansion count != %d\n",
861                  ch, 3);
862      }
863
864      U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);
865      ucol_setText(iter, supplementary, 2, &status);
866      sorder = ucol_previous(iter, &status);
867
868      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) {
869          log_err("Failure at codepoint %d, maximum expansion count < %d\n",
870                  ch, 2);
871      }
872
873      /* testing jamo */
874      ch = 0x1165;
875
876      ucol_setText(iter, &ch, 1, &status);
877      temporder = ucol_previous(iter, &status);
878      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) {
879          log_err("Failure at codepoint %d, maximum expansion count > %d\n",
880                  ch, 3);
881      }
882
883      ucol_closeElements(iter);
884      ucol_close(coll);
885
886      /* testing special jamo &a<\u1160 */
887      rule[0] = 0x26;
888      rule[1] = 0x71;
889      rule[2] = 0x3c;
890      rule[3] = 0x1165;
891      rule[4] = 0x2f;
892      rule[5] = 0x71;
893      rule[6] = 0x71;
894      rule[7] = 0x71;
895      rule[8] = 0x71;
896      rule[9] = 0;
897
898      coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
899          UCOL_DEFAULT_STRENGTH,NULL, &status);
900      iter = ucol_openElements(coll, &ch, 1, &status);
901
902      temporder = ucol_previous(iter, &status);
903      if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) {
904          log_err("Failure at codepoint %d, maximum expansion count > %d\n",
905                  ch, 5);
906      }
907
908      ucol_closeElements(iter);
909      ucol_close(coll);
910    } else {
911      log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
912    }
913
914}
915
916
917static void assertEqual(UCollationElements *i1, UCollationElements *i2)
918{
919    int32_t c1, c2;
920    int32_t count = 0;
921    UErrorCode status = U_ZERO_ERROR;
922
923    do
924    {
925        c1 = ucol_next(i1, &status);
926        c2 = ucol_next(i2, &status);
927
928        if (c1 != c2)
929        {
930            log_err("Error in iteration %d assetEqual between\n  %d  and   %d, they are not equal\n", count, c1, c2);
931            break;
932        }
933
934        count += 1;
935    }
936    while (c1 != UCOL_NULLORDER);
937}
938
939/**
940 * Testing iterators with extremely small buffers
941 */
942static void TestSmallBuffer()
943{
944    UErrorCode          status = U_ZERO_ERROR;
945    UCollator          *coll;
946    UCollationElements *testiter,
947                       *iter;
948    int32_t             count = 0;
949    OrderAndOffset     *testorders,
950                       *orders;
951
952    UChar teststr[500];
953    UChar str[] = {0x300, 0x31A, 0};
954    /*
955    creating a long string of decomposable characters,
956    since by default the writable buffer is of size 256
957    */
958    while (count < 500) {
959        if ((count & 1) == 0) {
960            teststr[count ++] = 0x300;
961        }
962        else {
963            teststr[count ++] = 0x31A;
964        }
965    }
966
967    coll = ucol_open("th_TH", &status);
968    if(U_SUCCESS(status) && coll) {
969      testiter = ucol_openElements(coll, teststr, 500, &status);
970      iter = ucol_openElements(coll, str, 2, &status);
971
972      orders     = getOrders(iter, &count);
973      if (count != 2) {
974          log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
975      }
976
977      /*
978      this will rearrange the string data to 250 characters of 0x300 first then
979      250 characters of 0x031A
980      */
981      testorders = getOrders(testiter, &count);
982
983      if (count != 500) {
984          log_err("Error decomposition does not give the right sized collation elements\n");
985      }
986
987      while (count != 0) {
988          /* UCA collation element for 0x0F76 */
989          if ((count > 250 && testorders[-- count].order != orders[1].order) ||
990              (count <= 250 && testorders[-- count].order != orders[0].order)) {
991              log_err("Error decomposition does not give the right collation element at %d count\n", count);
992              break;
993          }
994      }
995
996      free(testorders);
997      free(orders);
998
999      ucol_reset(testiter);
1000
1001      /* ensures closing of elements done properly to clear writable buffer */
1002      ucol_next(testiter, &status);
1003      ucol_next(testiter, &status);
1004      ucol_closeElements(testiter);
1005      ucol_closeElements(iter);
1006      ucol_close(coll);
1007    } else {
1008      log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(status));
1009    }
1010}
1011
1012/**
1013* Sniplets of code from genuca
1014*/
1015static int32_t hex2num(char hex) {
1016    if(hex>='0' && hex <='9') {
1017        return hex-'0';
1018    } else if(hex>='a' && hex<='f') {
1019        return hex-'a'+10;
1020    } else if(hex>='A' && hex<='F') {
1021        return hex-'A'+10;
1022    } else {
1023        return 0;
1024    }
1025}
1026
1027/**
1028* Getting codepoints from a string
1029* @param str character string contain codepoints seperated by space and ended
1030*        by a semicolon
1031* @param codepoints array for storage, assuming size > 5
1032* @return position at the end of the codepoint section
1033*/
1034static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) {
1035    UErrorCode errorCode = U_ZERO_ERROR;
1036    char *semi = uprv_strchr(str, ';');
1037    char *pipe = uprv_strchr(str, '|');
1038    char *s;
1039    *codepoints = 0;
1040    *contextCPs = 0;
1041    if(semi == NULL) {
1042        log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str);
1043        return str;
1044    }
1045    if(pipe != NULL) {
1046        int32_t contextLength;
1047        *pipe = 0;
1048        contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode);
1049        *pipe = '|';
1050        if(U_FAILURE(errorCode)) {
1051            log_err("error parsing precontext string from FractionalUCA.txt %s\n", str);
1052            return str;
1053        }
1054        /* prepend the precontext string to the codepoints */
1055        u_memcpy(codepoints, contextCPs, contextLength);
1056        codepoints += contextLength;
1057        /* start of the code point string */
1058        s = pipe + 1;
1059    } else {
1060        s = str;
1061    }
1062    u_parseString(s, codepoints, 99, NULL, &errorCode);
1063    if(U_FAILURE(errorCode)) {
1064        log_err("error parsing code point string from FractionalUCA.txt %s\n", str);
1065        return str;
1066    }
1067    return semi + 1;
1068}
1069
1070/**
1071* Sniplets of code from genuca
1072*/
1073static int32_t
1074readElement(char **from, char *to, char separator, UErrorCode *status)
1075{
1076    if (U_SUCCESS(*status)) {
1077        char    buffer[1024];
1078        int32_t i = 0;
1079        while (**from != separator) {
1080            if (**from != ' ') {
1081                *(buffer+i++) = **from;
1082            }
1083            (*from)++;
1084        }
1085        (*from)++;
1086        *(buffer + i) = 0;
1087        strcpy(to, buffer);
1088        return i/2;
1089    }
1090
1091    return 0;
1092}
1093
1094/**
1095* Sniplets of code from genuca
1096*/
1097static uint32_t
1098getSingleCEValue(char *primary, char *secondary, char *tertiary,
1099                          UErrorCode *status)
1100{
1101    if (U_SUCCESS(*status)) {
1102        uint32_t  value    = 0;
1103        char      primsave = '\0';
1104        char      secsave  = '\0';
1105        char      tersave  = '\0';
1106        char     *primend  = primary+4;
1107        char     *secend   = secondary+2;
1108        char     *terend   = tertiary+2;
1109        uint32_t  primvalue;
1110        uint32_t  secvalue;
1111        uint32_t  tervalue;
1112
1113        if (uprv_strlen(primary) > 4) {
1114            primsave = *primend;
1115            *primend = '\0';
1116        }
1117
1118        if (uprv_strlen(secondary) > 2) {
1119            secsave = *secend;
1120            *secend = '\0';
1121        }
1122
1123        if (uprv_strlen(tertiary) > 2) {
1124            tersave = *terend;
1125            *terend = '\0';
1126        }
1127
1128        primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
1129        secvalue  = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
1130        tervalue  = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
1131        if(primvalue <= 0xFF) {
1132          primvalue <<= 8;
1133        }
1134
1135        value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
1136           | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
1137           | (tervalue & UCOL_TERTIARYORDERMASK);
1138
1139        if(primsave!='\0') {
1140            *primend = primsave;
1141        }
1142        if(secsave!='\0') {
1143            *secend = secsave;
1144        }
1145        if(tersave!='\0') {
1146            *terend = tersave;
1147        }
1148        return value;
1149    }
1150    return 0;
1151}
1152
1153/**
1154* Getting collation elements generated from a string
1155* @param str character string contain collation elements contained in [] and
1156*        seperated by space
1157* @param ce array for storage, assuming size > 20
1158* @param status error status
1159* @return position at the end of the codepoint section
1160*/
1161static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) {
1162    char       *pStartCP     = uprv_strchr(str, '[');
1163    int         count        = 0;
1164    char       *pEndCP;
1165    char        primary[100];
1166    char        secondary[100];
1167    char        tertiary[100];
1168
1169    while (*pStartCP == '[') {
1170        uint32_t primarycount   = 0;
1171        uint32_t secondarycount = 0;
1172        uint32_t tertiarycount  = 0;
1173        uint32_t CEi = 1;
1174        pEndCP = strchr(pStartCP, ']');
1175        if(pEndCP == NULL) {
1176            break;
1177        }
1178        pStartCP ++;
1179
1180        primarycount   = readElement(&pStartCP, primary, ',', status);
1181        secondarycount = readElement(&pStartCP, secondary, ',', status);
1182        tertiarycount  = readElement(&pStartCP, tertiary, ']', status);
1183
1184        /* I want to get the CEs entered right here, including continuation */
1185        ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
1186        if (U_FAILURE(*status)) {
1187            break;
1188        }
1189
1190        while (2 * CEi < primarycount || CEi < secondarycount ||
1191               CEi < tertiarycount) {
1192            uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
1193            if (2 * CEi < primarycount) {
1194                value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28);
1195                value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24);
1196            }
1197
1198            if (2 * CEi + 1 < primarycount) {
1199                value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20);
1200                value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16);
1201            }
1202
1203            if (CEi < secondarycount) {
1204                value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12);
1205                value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8);
1206            }
1207
1208            if (CEi < tertiarycount) {
1209                value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4);
1210                value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF);
1211            }
1212
1213            CEi ++;
1214            ces[count ++] = value;
1215        }
1216
1217      pStartCP = pEndCP + 1;
1218    }
1219    ces[count] = 0;
1220    return pStartCP;
1221}
1222
1223/**
1224* Getting the FractionalUCA.txt file stream
1225*/
1226static FileStream * getFractionalUCA(void)
1227{
1228    char        newPath[256];
1229    char        backupPath[256];
1230    FileStream *result = NULL;
1231
1232    /* Look inside ICU_DATA first */
1233    uprv_strcpy(newPath, ctest_dataSrcDir());
1234    uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
1235    uprv_strcat(newPath, "FractionalUCA.txt");
1236
1237    /* As a fallback, try to guess where the source data was located
1238     *   at the time ICU was built, and look there.
1239     */
1240#if defined (U_TOPSRCDIR)
1241    strcpy(backupPath, U_TOPSRCDIR  U_FILE_SEP_STRING "data");
1242#else
1243    {
1244        UErrorCode errorCode = U_ZERO_ERROR;
1245        strcpy(backupPath, loadTestData(&errorCode));
1246        strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
1247    }
1248#endif
1249    strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
1250
1251    result = T_FileStream_open(newPath, "rb");
1252
1253    if (result == NULL) {
1254        result = T_FileStream_open(backupPath, "rb");
1255        if (result == NULL) {
1256            log_err("Failed to open either %s or %s\n", newPath, backupPath);
1257        }
1258    }
1259    return result;
1260}
1261
1262/**
1263* Testing the CEs returned by the iterator
1264*/
1265static void TestCEs() {
1266    FileStream *file = NULL;
1267    char        line[2048];
1268    char       *str;
1269    UChar       codepoints[10];
1270    uint32_t    ces[20];
1271    UErrorCode  status = U_ZERO_ERROR;
1272    UCollator          *coll = ucol_open("", &status);
1273    uint32_t lineNo = 0;
1274    UChar       contextCPs[5];
1275
1276    if (U_FAILURE(status)) {
1277        log_err_status(status, "Error in opening root collator -> %s\n", u_errorName(status));
1278        return;
1279    }
1280
1281    file = getFractionalUCA();
1282
1283    if (file == NULL) {
1284        log_err("*** unable to open input FractionalUCA.txt file ***\n");
1285        return;
1286    }
1287
1288
1289    while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1290        int                 count = 0;
1291        UCollationElements *iter;
1292        int32_t            preContextCeLen=0;
1293        lineNo++;
1294        /* skip this line if it is empty or a comment or is a return value
1295        or start of some variable section */
1296        if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1297            line[0] == 0x000D || line[0] == '[') {
1298            continue;
1299        }
1300
1301        str = getCodePoints(line, codepoints, contextCPs);
1302
1303        /* these are 'fake' codepoints in the fractional UCA, and are used just
1304         * for positioning of indirect values. They should not go through this
1305         * test.
1306         */
1307        if(*codepoints == 0xFDD0) {
1308          continue;
1309        }
1310        if (*contextCPs != 0) {
1311            iter = ucol_openElements(coll, contextCPs, -1, &status);
1312            if (U_FAILURE(status)) {
1313                log_err("Error in opening collation elements\n");
1314                break;
1315            }
1316            while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t)UCOL_NULLORDER) {
1317                preContextCeLen++;
1318            }
1319            ucol_closeElements(iter);
1320        }
1321
1322        getCEs(str, ces+preContextCeLen, &status);
1323        if (U_FAILURE(status)) {
1324            log_err("Error in parsing collation elements in FractionalUCA.txt\n");
1325            break;
1326        }
1327        iter = ucol_openElements(coll, codepoints, -1, &status);
1328        if (U_FAILURE(status)) {
1329            log_err("Error in opening collation elements\n");
1330            break;
1331        }
1332        for (;;) {
1333            uint32_t ce = (uint32_t)ucol_next(iter, &status);
1334            if (ce == 0xFFFFFFFF) {
1335                ce = 0;
1336            }
1337            /* we now unconditionally reorder Thai/Lao prevowels, so this
1338             * test would fail if we don't skip here.
1339             */
1340            if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
1341              continue;
1342            }
1343            if (ce != ces[count] || U_FAILURE(status)) {
1344                log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
1345                break;
1346            }
1347            if (ces[count] == 0) {
1348                break;
1349            }
1350            count ++;
1351        }
1352        ucol_closeElements(iter);
1353    }
1354
1355    T_FileStream_close(file);
1356    ucol_close(coll);
1357}
1358
1359/**
1360* Testing the discontigous contractions
1361*/
1362static void TestDiscontiguos() {
1363    const char               *rulestr    =
1364                            "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
1365          UChar               rule[50];
1366          int                 rulelen = u_unescape(rulestr, rule, 50);
1367    const char               *src[] = {
1368     "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
1369    /* base character blocked */
1370     "XD\\u0300", "XD\\u0300\\u0315",
1371    /* non blocking combining character */
1372     "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
1373     /* blocking combining character */
1374     "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
1375     /* contraction prefix */
1376     "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
1377     "X\\u0300\\u031A\\u0315",
1378     /* ends not with a contraction character */
1379     "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
1380     "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
1381    };
1382    const char               *tgt[] = {
1383     /* non blocking combining character */
1384     "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
1385    /* base character blocked */
1386     "X D \\u0300", "X D \\u0300\\u0315",
1387    /* non blocking combining character */
1388     "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
1389     /* blocking combining character */
1390     "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
1391     /* contraction prefix */
1392     "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
1393     "X\\u0300 \\u031A \\u0315",
1394     /* ends not with a contraction character */
1395     "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
1396     "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
1397    };
1398          int                 size   = 20;
1399          UCollator          *coll;
1400          UErrorCode          status    = U_ZERO_ERROR;
1401          int                 count     = 0;
1402          UCollationElements *iter;
1403          UCollationElements *resultiter;
1404
1405    coll       = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
1406    iter       = ucol_openElements(coll, rule, 1, &status);
1407    resultiter = ucol_openElements(coll, rule, 1, &status);
1408
1409    if (U_FAILURE(status)) {
1410        log_err_status(status, "Error opening collation rules -> %s\n", u_errorName(status));
1411        return;
1412    }
1413
1414    while (count < size) {
1415        UChar  str[20];
1416        UChar  tstr[20];
1417        int    strLen = u_unescape(src[count], str, 20);
1418        UChar *s;
1419
1420        ucol_setText(iter, str, strLen, &status);
1421        if (U_FAILURE(status)) {
1422            log_err("Error opening collation iterator\n");
1423            return;
1424        }
1425
1426        u_unescape(tgt[count], tstr, 20);
1427        s = tstr;
1428
1429        log_verbose("count %d\n", count);
1430
1431        for (;;) {
1432            uint32_t  ce;
1433            UChar    *e = u_strchr(s, 0x20);
1434            if (e == 0) {
1435                e = u_strchr(s, 0);
1436            }
1437            ucol_setText(resultiter, s, (int32_t)(e - s), &status);
1438            ce = ucol_next(resultiter, &status);
1439            if (U_FAILURE(status)) {
1440                log_err("Error manipulating collation iterator\n");
1441                return;
1442            }
1443            while (ce != UCOL_NULLORDER) {
1444                if (ce != (uint32_t)ucol_next(iter, &status) ||
1445                    U_FAILURE(status)) {
1446                    log_err("Discontiguos contraction test mismatch\n");
1447                    return;
1448                }
1449                ce = ucol_next(resultiter, &status);
1450                if (U_FAILURE(status)) {
1451                    log_err("Error getting next collation element\n");
1452                    return;
1453                }
1454            }
1455            s = e + 1;
1456            if (*e == 0) {
1457                break;
1458            }
1459        }
1460        ucol_reset(iter);
1461        backAndForth(iter);
1462        count ++;
1463    }
1464    ucol_closeElements(resultiter);
1465    ucol_closeElements(iter);
1466    ucol_close(coll);
1467}
1468
1469static void TestCEBufferOverflow()
1470{
1471    UChar               str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
1472    UErrorCode          status = U_ZERO_ERROR;
1473    UChar               rule[10];
1474    UCollator          *coll;
1475    UCollationElements *iter;
1476
1477    u_uastrcpy(rule, "&z < AB");
1478    coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
1479    if (U_FAILURE(status)) {
1480        log_err_status(status, "Rule based collator not created for testing ce buffer overflow -> %s\n", u_errorName(status));
1481        return;
1482    }
1483
1484    /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
1485    test. this will cause an overflow in getPrev */
1486    str[0] = 0x0041;    /* 'A' */
1487    /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/
1488    uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
1489    str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042;   /* 'B' */
1490    iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
1491                             &status);
1492    if (ucol_previous(iter, &status) == UCOL_NULLORDER ||
1493        status == U_BUFFER_OVERFLOW_ERROR) {
1494        log_err("CE buffer should not overflow with long string of trail surrogates\n");
1495    }
1496    ucol_closeElements(iter);
1497    ucol_close(coll);
1498}
1499
1500/**
1501* Checking collation element validity.
1502*/
1503#define MAX_CODEPOINTS_TO_SHOW 10
1504static void showCodepoints(const UChar *codepoints, int length, char * codepointText) {
1505    int i, lengthToUse = length;
1506    if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) {
1507        lengthToUse = MAX_CODEPOINTS_TO_SHOW;
1508    }
1509    for (i = 0; i < lengthToUse; ++i) {
1510        int bytesWritten = sprintf(codepointText, " %04X", *codepoints++);
1511        if (bytesWritten <= 0) {
1512            break;
1513        }
1514        codepointText += bytesWritten;
1515    }
1516    if (i < length) {
1517        sprintf(codepointText, " ...");
1518    }
1519}
1520
1521static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
1522                             int length)
1523{
1524    UErrorCode          status = U_ZERO_ERROR;
1525    UCollationElements *iter   = ucol_openElements(coll, codepoints, length,
1526                                                  &status);
1527    UBool result = FALSE;
1528    UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;
1529    const char * collLocale;
1530
1531    if (U_FAILURE(status)) {
1532        log_err("Error creating iterator for testing validity\n");
1533        return FALSE;
1534    }
1535    collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status);
1536    if (U_FAILURE(status) || collLocale==NULL) {
1537        status = U_ZERO_ERROR;
1538        collLocale = "?";
1539    }
1540
1541    for (;;) {
1542        uint32_t ce = ucol_next(iter, &status);
1543        uint32_t primary, p1, p2, secondary, tertiary;
1544        if (ce == UCOL_NULLORDER) {
1545            result = TRUE;
1546            break;
1547        }
1548        if (ce == 0) {
1549            continue;
1550        }
1551        if (ce == 0x02000202) {
1552            /* special CE for merge-sort character */
1553            if (*codepoints == 0xFFFE /* && length == 1 */) {
1554                /*
1555                 * Note: We should check for length==1 but the token parser appears
1556                 * to give us trailing NUL characters.
1557                 * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTailoredSet()
1558                 *                     rather than the internal collation rule parser
1559                 */
1560                continue;
1561            } else {
1562                log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n",
1563                        (int)*codepoints, (int)length);
1564                break;
1565            }
1566        }
1567        primary   = UCOL_PRIMARYORDER(ce);
1568        p1 = primary >> 8;
1569        p2 = primary & 0xFF;
1570        secondary = UCOL_SECONDARYORDER(ce);
1571        tertiary  = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;
1572
1573        if (!isContinuation(ce)) {
1574            if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
1575                log_err("Empty CE %08lX except for case bits\n", (long)ce);
1576                break;
1577            }
1578            if (p1 == 0) {
1579                if (p2 != 0) {
1580                    log_err("Primary 00 xx in %08lX\n", (long)ce);
1581                    break;
1582                }
1583                primaryDone = TRUE;
1584            } else {
1585                if (p1 <= 2 || p1 >= 0xF0) {
1586                    /* Primary first bytes F0..FF are specials. */
1587                    log_err("Primary first byte of %08lX out of range\n", (long)ce);
1588                    break;
1589                }
1590                if (p2 == 0) {
1591                    primaryDone = TRUE;
1592                } else {
1593                    if (p2 <= 3 || p2 >= 0xFF) {
1594                        /* Primary second bytes 03 and FF are sort key compression terminators. */
1595                        log_err("Primary second byte of %08lX out of range\n", (long)ce);
1596                        break;
1597                    }
1598                    primaryDone = FALSE;
1599                }
1600            }
1601            if (secondary == 0) {
1602                if (primary != 0) {
1603                    log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);
1604                    break;
1605                }
1606                secondaryDone = TRUE;
1607            } else {
1608                if (secondary <= 2 ||
1609                    (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80))
1610                ) {
1611                    /* Secondary first bytes common+1..+0x80 are used for sort key compression. */
1612                    log_err("Secondary byte of %08lX out of range\n", (long)ce);
1613                    break;
1614                }
1615                secondaryDone = FALSE;
1616            }
1617            if (tertiary == 0) {
1618                /* We know that ce != 0. */
1619                log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);
1620                break;
1621            }
1622            if (tertiary <= 2) {
1623                log_err("Tertiary byte of %08lX out of range\n", (long)ce);
1624                break;
1625            }
1626            tertiaryDone = FALSE;
1627        } else {
1628            if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
1629                log_err("Empty continuation %08lX\n", (long)ce);
1630                break;
1631            }
1632            if (primaryDone && primary != 0) {
1633                log_err("Primary was done but continues in %08lX\n", (long)ce);
1634                break;
1635            }
1636            if (p1 == 0) {
1637                if (p2 != 0) {
1638                    log_err("Primary 00 xx in %08lX\n", (long)ce);
1639                    break;
1640                }
1641                primaryDone = TRUE;
1642            } else {
1643                if (p1 <= 2) {
1644                    log_err("Primary first byte of %08lX out of range\n", (long)ce);
1645                    break;
1646                }
1647                if (p2 == 0) {
1648                    primaryDone = TRUE;
1649                } else {
1650                    if (p2 <= 3) {
1651                        log_err("Primary second byte of %08lX out of range\n", (long)ce);
1652                        break;
1653                    }
1654                }
1655            }
1656            if (secondaryDone && secondary != 0) {
1657                log_err("Secondary was done but continues in %08lX\n", (long)ce);
1658                break;
1659            }
1660            if (secondary == 0) {
1661                secondaryDone = TRUE;
1662            } else {
1663                if (secondary <= 2) {
1664                    log_err("Secondary byte of %08lX out of range\n", (long)ce);
1665                    break;
1666                }
1667            }
1668            if (tertiaryDone && tertiary != 0) {
1669                log_err("Tertiary was done but continues in %08lX\n", (long)ce);
1670                break;
1671            }
1672            if (tertiary == 0) {
1673                tertiaryDone = TRUE;
1674            } else if (tertiary <= 2) {
1675                log_err("Tertiary byte of %08lX out of range\n", (long)ce);
1676                break;
1677            }
1678        }
1679    }
1680    if (!result) {
1681        char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5];
1682        showCodepoints(codepoints, length, codepointText);
1683        log_err("Locale: %s  Code point string: %s\n", collLocale, codepointText);
1684    }
1685    ucol_closeElements(iter);
1686    return result;
1687}
1688
1689static void TestCEValidity()
1690{
1691    /* testing UCA collation elements */
1692    UErrorCode  status      = U_ZERO_ERROR;
1693    /* en_US has no tailorings */
1694    UCollator  *coll        = ucol_open("root", &status);
1695    /* tailored locales */
1696    char        locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
1697    const char *loc;
1698    FileStream *file = NULL;
1699    char        line[2048];
1700    UChar       codepoints[11];
1701    int         count = 0;
1702    int         maxCount = 0;
1703    UChar       contextCPs[3];
1704    UChar32     c;
1705    UParseError parseError;
1706    if (U_FAILURE(status)) {
1707        log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
1708        return;
1709    }
1710    log_verbose("Testing UCA elements\n");
1711    file = getFractionalUCA();
1712    if (file == NULL) {
1713        log_err("Fractional UCA data can not be opened\n");
1714        return;
1715    }
1716
1717    while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1718        if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1719            line[0] == 0x000D || line[0] == '[') {
1720            continue;
1721        }
1722
1723        getCodePoints(line, codepoints, contextCPs);
1724        checkCEValidity(coll, codepoints, u_strlen(codepoints));
1725    }
1726
1727    log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1728    for (c = 0; c <= 0xffff; ++c) {
1729        if (u_isdefined(c)) {
1730            codepoints[0] = (UChar)c;
1731            checkCEValidity(coll, codepoints, 1);
1732        }
1733    }
1734    for (; c <= 0x10ffff; ++c) {
1735        if (u_isdefined(c)) {
1736            int32_t i = 0;
1737            U16_APPEND_UNSAFE(codepoints, i, c);
1738            checkCEValidity(coll, codepoints, i);
1739        }
1740    }
1741
1742    ucol_close(coll);
1743
1744    /* testing tailored collation elements */
1745    log_verbose("Testing tailored elements\n");
1746    if(getTestOption(QUICK_OPTION)) {
1747        maxCount = sizeof(locale)/sizeof(locale[0]);
1748    } else {
1749        maxCount = uloc_countAvailable();
1750    }
1751    while (count < maxCount) {
1752        const UChar *rules = NULL,
1753                    *current = NULL;
1754        UChar *rulesCopy = NULL;
1755        int32_t ruleLen = 0;
1756
1757        uint32_t chOffset = 0;
1758        uint32_t chLen = 0;
1759        uint32_t exOffset = 0;
1760        uint32_t exLen = 0;
1761        uint32_t prefixOffset = 0;
1762        uint32_t prefixLen = 0;
1763        UBool    startOfRules = TRUE;
1764        UColOptionSet opts;
1765
1766        UColTokenParser src;
1767        uint32_t strength = 0;
1768        uint16_t specs = 0;
1769        if(getTestOption(QUICK_OPTION)) {
1770            loc = locale[count];
1771        } else {
1772            loc = uloc_getAvailable(count);
1773            if(!hasCollationElements(loc)) {
1774                count++;
1775                continue;
1776            }
1777        }
1778
1779        uprv_memset(&src, 0, sizeof(UColTokenParser));
1780
1781        log_verbose("Testing CEs for %s\n", loc);
1782
1783        coll      = ucol_open(loc, &status);
1784        if (U_FAILURE(status)) {
1785            log_err("%s collator creation failed\n", loc);
1786            return;
1787        }
1788
1789        src.opts = &opts;
1790        rules = ucol_getRules(coll, &ruleLen);
1791
1792        if (ruleLen > 0) {
1793            rulesCopy = (UChar *)uprv_malloc((ruleLen +
1794                UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1795            uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1796            src.current = src.source = rulesCopy;
1797            src.end = rulesCopy + ruleLen;
1798            src.extraCurrent = src.end;
1799            src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1800
1801	        /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1802	           the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1803            while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
1804              strength = src.parsedToken.strength;
1805              chOffset = src.parsedToken.charsOffset;
1806              chLen = src.parsedToken.charsLen;
1807              exOffset = src.parsedToken.extensionOffset;
1808              exLen = src.parsedToken.extensionLen;
1809              prefixOffset = src.parsedToken.prefixOffset;
1810              prefixLen = src.parsedToken.prefixLen;
1811              specs = src.parsedToken.flags;
1812
1813                startOfRules = FALSE;
1814                uprv_memcpy(codepoints, src.source + chOffset,
1815                                                       chLen * sizeof(UChar));
1816                codepoints[chLen] = 0;
1817                checkCEValidity(coll, codepoints, chLen);
1818            }
1819            uprv_free(src.source);
1820        }
1821
1822        ucol_close(coll);
1823        count ++;
1824    }
1825    T_FileStream_close(file);
1826}
1827
1828static void printSortKeyError(const UChar   *codepoints, int length,
1829                                    uint8_t *sortkey, int sklen)
1830{
1831    int count = 0;
1832    log_err("Sortkey not valid for ");
1833    while (length > 0) {
1834        log_err("0x%04x ", *codepoints);
1835        length --;
1836        codepoints ++;
1837    }
1838    log_err("\nSortkey : ");
1839    while (count < sklen) {
1840        log_err("0x%02x ", sortkey[count]);
1841        count ++;
1842    }
1843    log_err("\n");
1844}
1845
1846/**
1847* Checking sort key validity for all levels
1848*/
1849static UBool checkSortKeyValidity(UCollator *coll,
1850                                  const UChar *codepoints,
1851                                  int length)
1852{
1853    UErrorCode status  = U_ZERO_ERROR;
1854    UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
1855                                      UCOL_TERTIARY, UCOL_QUATERNARY,
1856                                      UCOL_IDENTICAL};
1857    int        strengthlen = 5;
1858    int        strengthIndex = 0;
1859    int        caselevel   = 0;
1860
1861    while (caselevel < 1) {
1862        if (caselevel == 0) {
1863            ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
1864        }
1865        else {
1866            ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
1867        }
1868
1869        while (strengthIndex < strengthlen) {
1870            int        count01 = 0;
1871            uint32_t   count   = 0;
1872            uint8_t    sortkey[128];
1873            uint32_t   sklen;
1874
1875            ucol_setStrength(coll, strength[strengthIndex]);
1876            sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
1877            while (sortkey[count] != 0) {
1878                if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 && strengthIndex != 4)) {
1879                    printSortKeyError(codepoints, length, sortkey, sklen);
1880                    return FALSE;
1881                }
1882                if (sortkey[count] == 1) {
1883                    count01 ++;
1884                }
1885                count ++;
1886            }
1887
1888            if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) {
1889                printSortKeyError(codepoints, length, sortkey, sklen);
1890                return FALSE;
1891            }
1892            strengthIndex ++;
1893        }
1894        caselevel ++;
1895    }
1896    return TRUE;
1897}
1898
1899static void TestSortKeyValidity(void)
1900{
1901    /* testing UCA collation elements */
1902    UErrorCode  status      = U_ZERO_ERROR;
1903    /* en_US has no tailorings */
1904    UCollator  *coll        = ucol_open("en_US", &status);
1905    /* tailored locales */
1906    char        locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
1907    FileStream *file = NULL;
1908    char        line[2048];
1909    UChar       codepoints[10];
1910    int         count = 0;
1911    UChar       contextCPs[5];
1912    UParseError parseError;
1913    if (U_FAILURE(status)) {
1914        log_err_status(status, "en_US collator creation failed -> %s\n", u_errorName(status));
1915        return;
1916    }
1917    log_verbose("Testing UCA elements\n");
1918    file = getFractionalUCA();
1919    if (file == NULL) {
1920        log_err("Fractional UCA data can not be opened\n");
1921        return;
1922    }
1923
1924    while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
1925        if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
1926            line[0] == 0x000D || line[0] == '[') {
1927            continue;
1928        }
1929
1930        getCodePoints(line, codepoints, contextCPs);
1931        if(codepoints[0] == 0xFFFE) {
1932            /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
1933            continue;
1934        }
1935        checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
1936    }
1937
1938    log_verbose("Testing UCA elements for the whole range of unicode characters\n");
1939    codepoints[0] = 0;
1940
1941    while (codepoints[0] < 0xFFFF) {
1942        if (u_isdefined((UChar32)codepoints[0])) {
1943            checkSortKeyValidity(coll, codepoints, 1);
1944        }
1945        codepoints[0] ++;
1946    }
1947
1948    ucol_close(coll);
1949
1950    /* testing tailored collation elements */
1951    log_verbose("Testing tailored elements\n");
1952    while (count < 5) {
1953        const UChar *rules = NULL,
1954                    *current = NULL;
1955        UChar *rulesCopy = NULL;
1956        int32_t ruleLen = 0;
1957
1958        uint32_t chOffset = 0;
1959        uint32_t chLen = 0;
1960        uint32_t exOffset = 0;
1961        uint32_t exLen = 0;
1962        uint32_t prefixOffset = 0;
1963        uint32_t prefixLen = 0;
1964        UBool    startOfRules = TRUE;
1965        UColOptionSet opts;
1966
1967        UColTokenParser src;
1968        uint32_t strength = 0;
1969        uint16_t specs = 0;
1970
1971        uprv_memset(&src, 0, sizeof(UColTokenParser));
1972
1973        coll      = ucol_open(locale[count], &status);
1974        if (U_FAILURE(status)) {
1975            log_err("%s collator creation failed\n", locale[count]);
1976            return;
1977        }
1978
1979        src.opts = &opts;
1980        rules = ucol_getRules(coll, &ruleLen);
1981
1982        if (ruleLen > 0) {
1983            rulesCopy = (UChar *)uprv_malloc((ruleLen +
1984                UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
1985            uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
1986            src.current = src.source = rulesCopy;
1987            src.end = rulesCopy + ruleLen;
1988            src.extraCurrent = src.end;
1989            src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
1990
1991	        /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken can cause the pointer to
1992	           the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */
1993            while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL) {
1994                strength = src.parsedToken.strength;
1995                chOffset = src.parsedToken.charsOffset;
1996                chLen = src.parsedToken.charsLen;
1997                exOffset = src.parsedToken.extensionOffset;
1998                exLen = src.parsedToken.extensionLen;
1999                prefixOffset = src.parsedToken.prefixOffset;
2000                prefixLen = src.parsedToken.prefixLen;
2001                specs = src.parsedToken.flags;
2002
2003                startOfRules = FALSE;
2004                uprv_memcpy(codepoints, src.source + chOffset,
2005                                                       chLen * sizeof(UChar));
2006                codepoints[chLen] = 0;
2007                if(codepoints[0] == 0xFFFE) {
2008                    /* Skip special merge-sort character U+FFFE which has otherwise illegal 02 weight bytes. */
2009                    continue;
2010                }
2011                checkSortKeyValidity(coll, codepoints, chLen);
2012            }
2013            uprv_free(src.source);
2014        }
2015
2016        ucol_close(coll);
2017        count ++;
2018    }
2019    T_FileStream_close(file);
2020}
2021
2022/**
2023* TestSearchCollatorElements tests iterator behavior (forwards and backwards) with
2024* normalization on AND jamo tailoring, among other things.
2025*/
2026static const UChar tsceText[] = {   /* Nothing in here should be ignorable */
2027    0x0020, 0xAC00,                 /* simple LV Hangul */
2028    0x0020, 0xAC01,                 /* simple LVT Hangul */
2029    0x0020, 0xAC0F,                 /* LVTT, last jamo expands for search */
2030    0x0020, 0xAFFF,                 /* LLVVVTT, every jamo expands for search */
2031    0x0020, 0x1100, 0x1161, 0x11A8, /* 0xAC01 as conjoining jamo */
2032    0x0020, 0x3131, 0x314F, 0x3131, /* 0xAC01 as compatibility jamo */
2033    0x0020, 0x1100, 0x1161, 0x11B6, /* 0xAC0F as conjoining jamo; last expands for search */
2034    0x0020, 0x1101, 0x1170, 0x11B6, /* 0xAFFF as conjoining jamo; all expand for search */
2035    0x0020, 0x00E6,                 /* small letter ae, expands */
2036    0x0020, 0x1E4D,                 /* small letter o with tilde and acute, decomposes */
2037    0x0020
2038};
2039enum { kLen_tsceText = sizeof(tsceText)/sizeof(tsceText[0]) };
2040
2041static const int32_t rootStandardOffsets[] = {
2042    0,  1,2,
2043    2,  3,4,4,
2044    4,  5,6,6,
2045    6,  7,8,8,
2046    8,  9,10,11,
2047    12, 13,14,15,
2048    16, 17,18,19,
2049    20, 21,22,23,
2050    24, 25,26,26,26,
2051    26, 27,28,28,
2052    28,
2053    29
2054};
2055enum { kLen_rootStandardOffsets = sizeof(rootStandardOffsets)/sizeof(rootStandardOffsets[0]) };
2056
2057static const int32_t rootSearchOffsets[] = {
2058    0,  1,2,
2059    2,  3,4,4,
2060    4,  5,6,6,6,
2061    6,  7,8,8,8,8,8,8,
2062    8,  9,10,11,
2063    12, 13,14,15,
2064    16, 17,18,19,20,
2065    20, 21,22,22,23,23,23,24,
2066    24, 25,26,26,26,
2067    26, 27,28,28,
2068    28,
2069    29
2070};
2071enum { kLen_rootSearchOffsets = sizeof(rootSearchOffsets)/sizeof(rootSearchOffsets[0]) };
2072
2073typedef struct {
2074    const char *    locale;
2075    const int32_t * offsets;
2076    int32_t         offsetsLen;
2077} TSCEItem;
2078
2079static const TSCEItem tsceItems[] = {
2080    { "root",                  rootStandardOffsets, kLen_rootStandardOffsets },
2081    { "root@collation=search", rootSearchOffsets,   kLen_rootSearchOffsets   },
2082    { NULL,                    NULL,                0                        }
2083};
2084
2085static void TestSearchCollatorElements(void)
2086{
2087    const TSCEItem * tsceItemPtr;
2088    for (tsceItemPtr = tsceItems; tsceItemPtr->locale != NULL; tsceItemPtr++) {
2089        UErrorCode status = U_ZERO_ERROR;
2090        UCollator* ucol = ucol_open(tsceItemPtr->locale, &status);
2091        if ( U_SUCCESS(status) ) {
2092            UCollationElements * uce = ucol_openElements(ucol, tsceText, kLen_tsceText, &status);
2093            if ( U_SUCCESS(status) ) {
2094                int32_t offset, element;
2095                const int32_t * nextOffsetPtr;
2096                const int32_t * limitOffsetPtr;
2097
2098                nextOffsetPtr = tsceItemPtr->offsets;
2099                limitOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
2100                do {
2101                    offset = ucol_getOffset(uce);
2102                    element = ucol_next(uce, &status);
2103                    if ( element == 0 ) {
2104                        log_err("error, locale %s, ucol_next returned element 0\n", tsceItemPtr->locale );
2105                    }
2106                    if ( nextOffsetPtr < limitOffsetPtr ) {
2107                        if (offset != *nextOffsetPtr) {
2108                            log_err("error, locale %s, expected ucol_next -> ucol_getOffset %d, got %d\n",
2109                                                            tsceItemPtr->locale, *nextOffsetPtr, offset );
2110                            nextOffsetPtr = limitOffsetPtr;
2111                            break;
2112                        }
2113                        nextOffsetPtr++;
2114                    } else {
2115                        log_err("error, locale %s, ucol_next returned more elements than expected\n", tsceItemPtr->locale );
2116                    }
2117                } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
2118                if ( nextOffsetPtr < limitOffsetPtr ) {
2119                    log_err("error, locale %s, ucol_next returned fewer elements than expected\n", tsceItemPtr->locale );
2120                }
2121
2122                ucol_setOffset(uce, kLen_tsceText, &status);
2123                status = U_ZERO_ERROR;
2124                nextOffsetPtr = tsceItemPtr->offsets + tsceItemPtr->offsetsLen;
2125                limitOffsetPtr = tsceItemPtr->offsets;
2126                do {
2127                    offset = ucol_getOffset(uce);
2128                    element = ucol_previous(uce, &status);
2129                    if ( element == 0 ) {
2130                        log_err("error, locale %s, ucol_previous returned element 0\n", tsceItemPtr->locale );
2131                    }
2132                    if ( nextOffsetPtr > limitOffsetPtr ) {
2133                        nextOffsetPtr--;
2134                        if (offset != *nextOffsetPtr) {
2135                            log_err("error, locale %s, expected ucol_previous -> ucol_getOffset %d, got %d\n",
2136                                                                tsceItemPtr->locale, *nextOffsetPtr, offset );
2137                            nextOffsetPtr = limitOffsetPtr;
2138                            break;
2139                        }
2140                   } else {
2141                        log_err("error, locale %s, ucol_previous returned more elements than expected\n", tsceItemPtr->locale );
2142                    }
2143                } while ( U_SUCCESS(status) && element != UCOL_NULLORDER );
2144                if ( nextOffsetPtr > limitOffsetPtr ) {
2145                    log_err("error, locale %s, ucol_previous returned fewer elements than expected\n", tsceItemPtr->locale );
2146                }
2147
2148                ucol_closeElements(uce);
2149            } else {
2150                log_err("error, locale %s, ucol_openElements failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
2151            }
2152            ucol_close(ucol);
2153        } else {
2154            log_data_err("error, locale %s, ucol_open failed: %s\n", tsceItemPtr->locale, u_errorName(status) );
2155        }
2156    }
2157}
2158
2159#endif /* #if !UCONFIG_NO_COLLATION */
2160