1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7#include "unicode/utypes.h"
8
9#if !UCONFIG_NO_COLLATION
10
11#include "unicode/coll.h"
12#include "unicode/tblcoll.h"
13#include "unicode/unistr.h"
14#include "unicode/sortkey.h"
15#include "itercoll.h"
16#include "unicode/schriter.h"
17#include "unicode/chariter.h"
18#include "unicode/uchar.h"
19#include "cmemory.h"
20
21#define ARRAY_LENGTH(array) (sizeof array / sizeof array[0])
22
23static UErrorCode status = U_ZERO_ERROR;
24
25CollationIteratorTest::CollationIteratorTest()
26 : test1("What subset of all possible test cases?", ""),
27   test2("has the highest probability of detecting", "")
28{
29    en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
30    if(U_FAILURE(status)) {
31      delete en_us;
32      en_us = 0;
33      errcheckln(status, "Collator creation failed with %s", u_errorName(status));
34      return;
35    }
36
37}
38
39CollationIteratorTest::~CollationIteratorTest()
40{
41    delete en_us;
42}
43
44/**
45 * Test for CollationElementIterator previous and next for the whole set of
46 * unicode characters.
47 */
48void CollationIteratorTest::TestUnicodeChar()
49{
50    CollationElementIterator *iter;
51    UChar codepoint;
52    UnicodeString source;
53
54    for (codepoint = 1; codepoint < 0xFFFE;)
55    {
56      source.remove();
57
58      while (codepoint % 0xFF != 0)
59      {
60        if (u_isdefined(codepoint))
61          source += codepoint;
62        codepoint ++;
63      }
64
65      if (u_isdefined(codepoint))
66        source += codepoint;
67
68      if (codepoint != 0xFFFF)
69        codepoint ++;
70
71      iter = en_us->createCollationElementIterator(source);
72      /* A basic test to see if it's working at all */
73      backAndForth(*iter);
74      delete iter;
75    }
76}
77
78/**
79 * Test for CollationElementIterator.previous()
80 *
81 * @bug 4108758 - Make sure it works with contracting characters
82 *
83 */
84void CollationIteratorTest::TestPrevious(/* char* par */)
85{
86    UErrorCode status = U_ZERO_ERROR;
87    CollationElementIterator *iter = en_us->createCollationElementIterator(test1);
88
89    // A basic test to see if it's working at all
90    backAndForth(*iter);
91    delete iter;
92
93    // Test with a contracting character sequence
94    UnicodeString source;
95    RuleBasedCollator *c1 = NULL;
96    c1 = new RuleBasedCollator(
97        (UnicodeString)"&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH", status);
98
99    if (c1 == NULL || U_FAILURE(status))
100    {
101        errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
102        delete c1;
103        return;
104    }
105
106    source = "abchdcba";
107    iter = c1->createCollationElementIterator(source);
108    backAndForth(*iter);
109    delete iter;
110    delete c1;
111
112    // Test with an expanding character sequence
113    RuleBasedCollator *c2 = NULL;
114    c2 = new RuleBasedCollator((UnicodeString)"&a < b < c/abd < d", status);
115
116    if (c2 == NULL || U_FAILURE(status))
117    {
118        errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
119        delete c2;
120        return;
121    }
122
123    source = "abcd";
124    iter = c2->createCollationElementIterator(source);
125    backAndForth(*iter);
126    delete iter;
127    delete c2;
128
129    // Now try both
130    RuleBasedCollator *c3 = NULL;
131    c3 = new RuleBasedCollator((UnicodeString)"&a < b < c/aba < d < z < ch", status);
132
133    if (c3 == NULL || U_FAILURE(status))
134    {
135        errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
136        delete c3;
137        return;
138    }
139
140    source = "abcdbchdc";
141    iter = c3->createCollationElementIterator(source);
142    backAndForth(*iter);
143    delete iter;
144    delete c3;
145
146    status=U_ZERO_ERROR;
147    source= CharsToUnicodeString("\\u0e41\\u0e02\\u0e41\\u0e02\\u0e27abc");
148
149    Collator *c4 = Collator::createInstance(Locale("th", "TH", ""), status);
150    if(U_FAILURE(status)){
151        errln("Couldn't create a collator");
152    }
153    iter = ((RuleBasedCollator*)c4)->createCollationElementIterator(source);
154    backAndForth(*iter);
155    delete iter;
156    delete c4;
157
158    source= CharsToUnicodeString("\\u0061\\u30CF\\u3099\\u30FC");
159    Collator *c5 = Collator::createInstance(Locale("ja", "JP", ""), status);
160
161    iter = ((RuleBasedCollator*)c5)->createCollationElementIterator(source);
162    if(U_FAILURE(status)){
163        errln("Couldn't create Japanese collator\n");
164    }
165    backAndForth(*iter);
166    delete iter;
167    delete c5;
168}
169
170/**
171 * Test for getOffset() and setOffset()
172 */
173void CollationIteratorTest::TestOffset(/* char* par */)
174{
175    CollationElementIterator *iter = en_us->createCollationElementIterator(test1);
176    UErrorCode status = U_ZERO_ERROR;
177    // testing boundaries
178    iter->setOffset(0, status);
179    if (U_FAILURE(status) || iter->previous(status) != CollationElementIterator::NULLORDER) {
180        errln("Error: After setting offset to 0, we should be at the end "
181                "of the backwards iteration");
182    }
183    iter->setOffset(test1.length(), status);
184    if (U_FAILURE(status) || iter->next(status) != CollationElementIterator::NULLORDER) {
185        errln("Error: After setting offset to end of the string, we should "
186                "be at the end of the backwards iteration");
187    }
188
189    // Run all the way through the iterator, then get the offset
190    int32_t orderLength = 0;
191    Order *orders = getOrders(*iter, orderLength);
192
193    int32_t offset = iter->getOffset();
194
195    if (offset != test1.length())
196    {
197        UnicodeString msg1("offset at end != length: ");
198        UnicodeString msg2(" vs ");
199
200        errln(msg1 + offset + msg2 + test1.length());
201    }
202
203    // Now set the offset back to the beginning and see if it works
204    CollationElementIterator *pristine = en_us->createCollationElementIterator(test1);
205
206    iter->setOffset(0, status);
207
208    if (U_FAILURE(status))
209    {
210        errln("setOffset failed.");
211    }
212    else
213    {
214        assertEqual(*iter, *pristine);
215    }
216
217    delete pristine;
218    delete[] orders;
219    delete iter;
220
221    // setting offset in the middle of a contraction
222    UnicodeString contraction = "change";
223    status = U_ZERO_ERROR;
224    RuleBasedCollator tailored("& a < ch", status);
225    if (U_FAILURE(status)) {
226        errln("Error: in creation of Spanish collator - %s", u_errorName(status));
227        return;
228    }
229    iter = tailored.createCollationElementIterator(contraction);
230    Order *order = getOrders(*iter, orderLength);
231    iter->setOffset(1, status); // sets offset in the middle of ch
232    int32_t order2Length = 0;
233    Order *order2 = getOrders(*iter, order2Length);
234    if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) {
235        errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
236    }
237    delete[] order;
238    delete[] order2;
239    delete iter;
240    contraction = "peache";
241    iter = tailored.createCollationElementIterator(contraction);
242    iter->setOffset(3, status);
243    order = getOrders(*iter, orderLength);
244    iter->setOffset(4, status); // sets offset in the middle of ch
245    order2 = getOrders(*iter, order2Length);
246    if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) {
247        errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
248    }
249    delete[] order;
250    delete[] order2;
251    delete iter;
252    // setting offset in the middle of a surrogate pair
253    UnicodeString surrogate = UNICODE_STRING_SIMPLE("\\ud800\\udc00str").unescape();
254    iter = tailored.createCollationElementIterator(surrogate);
255    order = getOrders(*iter, orderLength);
256    iter->setOffset(1, status); // sets offset in the middle of surrogate
257    order2 = getOrders(*iter, order2Length);
258    if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) {
259        errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
260    }
261    delete[] order;
262    delete[] order2;
263    delete iter;
264    surrogate = UNICODE_STRING_SIMPLE("simple\\ud800\\udc00str").unescape();
265    iter = tailored.createCollationElementIterator(surrogate);
266    iter->setOffset(6, status);
267    order = getOrders(*iter, orderLength);
268    iter->setOffset(7, status); // sets offset in the middle of surrogate
269    order2 = getOrders(*iter, order2Length);
270    if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) {
271        errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
272    }
273    delete[] order;
274    delete[] order2;
275    delete iter;
276    // TODO: try iterating halfway through a messy string.
277}
278
279/**
280 * Test for setText()
281 */
282void CollationIteratorTest::TestSetText(/* char* par */)
283{
284    CollationElementIterator *iter1 = en_us->createCollationElementIterator(test1);
285    CollationElementIterator *iter2 = en_us->createCollationElementIterator(test2);
286    UErrorCode status = U_ZERO_ERROR;
287
288    // Run through the second iterator just to exercise it
289    int32_t c = iter2->next(status);
290    int32_t i = 0;
291
292    while ( ++i < 10 && c != CollationElementIterator::NULLORDER)
293    {
294        if (U_FAILURE(status))
295        {
296            errln("iter2->next() returned an error.");
297            delete iter2;
298            delete iter1;
299        }
300
301        c = iter2->next(status);
302    }
303
304    // Now set it to point to the same string as the first iterator
305    iter2->setText(test1, status);
306
307    if (U_FAILURE(status))
308    {
309        errln("call to iter2->setText(test1) failed.");
310    }
311    else
312    {
313        assertEqual(*iter1, *iter2);
314    }
315    iter1->reset();
316    //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
317    CharacterIterator* chariter = new StringCharacterIterator(test1);
318    iter2->setText(*chariter, status);
319    if (U_FAILURE(status))
320    {
321        errln("call to iter2->setText(chariter(test1)) failed.");
322    }
323    else
324    {
325        assertEqual(*iter1, *iter2);
326    }
327
328    // test for an empty string
329    UnicodeString empty("");
330    iter1->setText(empty, status);
331    if (U_FAILURE(status)
332        || iter1->next(status) != (int32_t)CollationElementIterator::NULLORDER) {
333        errln("Empty string should have no CEs.");
334    }
335    ((StringCharacterIterator *)chariter)->setText(empty);
336    iter1->setText(*chariter, status);
337    if (U_FAILURE(status)
338        || iter1->next(status) != (int32_t)CollationElementIterator::NULLORDER) {
339        errln("Empty string should have no CEs.");
340    }
341    delete chariter;
342    delete iter2;
343    delete iter1;
344}
345
346/** @bug 4108762
347 * Test for getMaxExpansion()
348 */
349void CollationIteratorTest::TestMaxExpansion(/* char* par */)
350{
351    UErrorCode          status = U_ZERO_ERROR;
352    UnicodeString rule("&a < ab < c/aba < d < z < ch");
353    RuleBasedCollator  *coll   = new RuleBasedCollator(rule, status);
354    UChar               ch     = 0;
355    UnicodeString       str(ch);
356
357    CollationElementIterator *iter   = coll->createCollationElementIterator(str);
358
359    while (ch < 0xFFFF && U_SUCCESS(status)) {
360        int      count = 1;
361        uint32_t order;
362        ch ++;
363        UnicodeString str(ch);
364        iter->setText(str, status);
365        order = iter->previous(status);
366
367        /* thai management */
368        if (CollationElementIterator::isIgnorable(order))
369            order = iter->previous(status);
370
371        while (U_SUCCESS(status)
372            && iter->previous(status) != (int32_t)CollationElementIterator::NULLORDER)
373        {
374            count ++;
375        }
376
377        if (U_FAILURE(status) && iter->getMaxExpansion(order) < count) {
378            errln("Failure at codepoint %d, maximum expansion count < %d\n",
379                ch, count);
380        }
381    }
382
383    delete iter;
384    delete coll;
385}
386
387/*
388 * @bug 4157299
389 */
390void CollationIteratorTest::TestClearBuffers(/* char* par */)
391{
392    UErrorCode status = U_ZERO_ERROR;
393    RuleBasedCollator *c = new RuleBasedCollator((UnicodeString)"&a < b < c & ab = d", status);
394
395    if (c == NULL || U_FAILURE(status))
396    {
397        errln("Couldn't create a RuleBasedCollator.");
398        delete c;
399        return;
400    }
401
402    UnicodeString source("abcd");
403    CollationElementIterator *i = c->createCollationElementIterator(source);
404    int32_t e0 = i->next(status);    // save the first collation element
405
406    if (U_FAILURE(status))
407    {
408        errln("call to i->next() failed. err=%s", u_errorName(status));
409    }
410    else
411    {
412        i->setOffset(3, status);        // go to the expanding character
413
414        if (U_FAILURE(status))
415        {
416            errln("call to i->setOffset(3) failed. err=%s", u_errorName(status));
417        }
418        else
419        {
420            i->next(status);                // but only use up half of it
421
422            if (U_FAILURE(status))
423            {
424                errln("call to i->next() failed. err=%s", u_errorName(status));
425            }
426            else
427            {
428                i->setOffset(0, status);        // go back to the beginning
429
430                if (U_FAILURE(status))
431                {
432                    errln("call to i->setOffset(0) failed. err=%s", u_errorName(status));
433                }
434                else
435                {
436                    int32_t e = i->next(status);    // and get this one again
437
438                    if (U_FAILURE(status))
439                    {
440                        errln("call to i->next() failed. err=%s", u_errorName(status));
441                    }
442                    else if (e != e0)
443                    {
444                        errln("got 0x%X, expected 0x%X", e, e0);
445                    }
446                }
447            }
448        }
449    }
450
451    delete i;
452    delete c;
453}
454
455/**
456 * Testing the assignment operator
457 */
458void CollationIteratorTest::TestAssignment()
459{
460    UErrorCode status = U_ZERO_ERROR;
461    RuleBasedCollator *coll =
462        (RuleBasedCollator *)Collator::createInstance(status);
463
464    if (coll == NULL || U_FAILURE(status))
465    {
466        errln("Couldn't create a default collator.");
467        return;
468    }
469
470    UnicodeString source("abcd");
471    CollationElementIterator *iter1 =
472        coll->createCollationElementIterator(source);
473
474    CollationElementIterator iter2 = *iter1;
475
476    if (*iter1 != iter2) {
477        errln("Fail collation iterator assignment does not produce the same elements");
478    }
479
480    CollationElementIterator iter3(*iter1);
481
482    if (*iter1 != iter3) {
483        errln("Fail collation iterator copy constructor does not produce the same elements");
484    }
485
486    source = CharsToUnicodeString("a\\u0300\\u0325");
487    coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
488    CollationElementIterator *iter4
489                        = coll->createCollationElementIterator(source);
490    CollationElementIterator iter5(*iter4);
491    if (*iter4 != iter5) {
492        errln("collation iterator assignment does not produce the same elements");
493    }
494    iter4->next(status);
495    if (U_FAILURE(status) || *iter4 == iter5) {
496        errln("collation iterator not equal");
497    }
498    iter5.next(status);
499    if (U_FAILURE(status) || *iter4 != iter5) {
500        errln("collation iterator equal");
501    }
502    iter4->next(status);
503    if (U_FAILURE(status) || *iter4 == iter5) {
504        errln("collation iterator not equal");
505    }
506    iter5.next(status);
507    if (U_FAILURE(status) || *iter4 != iter5) {
508        errln("collation iterator equal");
509    }
510    CollationElementIterator iter6(*iter4);
511    if (*iter4 != iter6) {
512        errln("collation iterator equal");
513    }
514    iter4->next(status);
515    if (U_FAILURE(status) || *iter4 == iter5) {
516        errln("collation iterator not equal");
517    }
518    iter5.next(status);
519    if (U_FAILURE(status) || *iter4 != iter5) {
520        errln("collation iterator equal");
521    }
522    iter4->next(status);
523    if (U_FAILURE(status) || *iter4 == iter5) {
524        errln("collation iterator not equal");
525    }
526    iter5.next(status);
527    if (U_FAILURE(status) || *iter4 != iter5) {
528        errln("collation iterator equal");
529    }
530    delete iter1;
531    delete iter4;
532    delete coll;
533}
534
535/**
536 * Testing the constructors
537 */
538void CollationIteratorTest::TestConstructors()
539{
540    UErrorCode status = U_ZERO_ERROR;
541    RuleBasedCollator *coll =
542        (RuleBasedCollator *)Collator::createInstance(status);
543    if (coll == NULL || U_FAILURE(status))
544    {
545        errln("Couldn't create a default collator.");
546        return;
547    }
548
549    // testing protected constructor with character iterator as argument
550    StringCharacterIterator chariter(test1);
551    CollationElementIterator *iter1 =
552        coll->createCollationElementIterator(chariter);
553    if (U_FAILURE(status)) {
554        errln("Couldn't create collation element iterator with character iterator.");
555        return;
556    }
557    CollationElementIterator *iter2 =
558        coll->createCollationElementIterator(test1);
559
560    // initially the 2 collation element iterators should be the same
561    if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2
562        || *iter2 != *iter1) {
563        errln("CollationElementIterators constructed with the same string data should be the same at the start");
564    }
565    assertEqual(*iter1, *iter2);
566
567    delete iter1;
568    delete iter2;
569
570    // tests empty strings
571    UnicodeString empty("");
572    iter1 = coll->createCollationElementIterator(empty);
573    chariter.setText(empty);
574    iter2 = coll->createCollationElementIterator(chariter);
575    if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2
576        || *iter2 != *iter1) {
577        errln("CollationElementIterators constructed with the same string data should be the same at the start");
578    }
579    if (iter1->next(status) != (int32_t)CollationElementIterator::NULLORDER) {
580        errln("Empty string should have no CEs.");
581    }
582    if (iter2->next(status) != (int32_t)CollationElementIterator::NULLORDER) {
583        errln("Empty string should have no CEs.");
584    }
585    delete iter1;
586    delete iter2;
587    delete coll;
588}
589
590/**
591 * Testing the strength order
592 */
593void CollationIteratorTest::TestStrengthOrder()
594{
595    int order = 0x0123ABCD;
596
597    UErrorCode status = U_ZERO_ERROR;
598    RuleBasedCollator *coll =
599        (RuleBasedCollator *)Collator::createInstance(status);
600    if (coll == NULL || U_FAILURE(status))
601    {
602        errln("Couldn't create a default collator.");
603        return;
604    }
605
606    coll->setStrength(Collator::PRIMARY);
607    CollationElementIterator *iter =
608        coll->createCollationElementIterator(test1);
609
610    if (iter == NULL) {
611        errln("Couldn't create a collation element iterator from default collator");
612        return;
613    }
614
615    if (iter->strengthOrder(order) != 0x01230000) {
616        errln("Strength order for a primary strength collator should be the first 2 bytes");
617        return;
618    }
619
620    coll->setStrength(Collator::SECONDARY);
621    if (iter->strengthOrder(order) != 0x0123AB00) {
622        errln("Strength order for a secondary strength collator should be the third byte");
623        return;
624    }
625
626    coll->setStrength(Collator::TERTIARY);
627    if (iter->strengthOrder(order) != order) {
628        errln("Strength order for a tertiary strength collator should be the third byte");
629        return;
630    }
631    delete iter;
632    delete coll;
633}
634
635/**
636 * Return a string containing all of the collation orders
637 * returned by calls to next on the specified iterator
638 */
639UnicodeString &CollationIteratorTest::orderString(CollationElementIterator &iter, UnicodeString &target)
640{
641    int32_t order;
642    UErrorCode status = U_ZERO_ERROR;
643
644    while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
645    {
646        target += "0x";
647        appendHex(order, 8, target);
648        target += " ";
649    }
650
651    return target;
652}
653
654void CollationIteratorTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
655{
656    int32_t c1, c2, count = 0;
657    UErrorCode status = U_ZERO_ERROR;
658
659    do
660    {
661        c1 = i1.next(status);
662        c2 = i2.next(status);
663
664        if (c1 != c2)
665        {
666            errln("    %d: strength(0x%X) != strength(0x%X)", count, c1, c2);
667            break;
668        }
669
670        count += 1;
671    }
672    while (c1 != CollationElementIterator::NULLORDER);
673}
674
675void CollationIteratorTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/)
676{
677    if (exec)
678    {
679        logln("Collation Iteration Tests: ");
680    }
681
682    if(en_us) {
683      switch (index)
684      {
685          case  0: name = "TestPrevious";      if (exec) TestPrevious(/* par */);     break;
686          case  1: name = "TestOffset";        if (exec) TestOffset(/* par */);       break;
687          case  2: name = "TestSetText";       if (exec) TestSetText(/* par */);      break;
688          case  3: name = "TestMaxExpansion";  if (exec) TestMaxExpansion(/* par */); break;
689          case  4: name = "TestClearBuffers";  if (exec) TestClearBuffers(/* par */); break;
690          case  5: name = "TestUnicodeChar";   if (exec) TestUnicodeChar(/* par */);  break;
691          case  6: name = "TestAssignment";    if (exec) TestAssignment(/* par */);    break;
692          case  7: name = "TestConstructors";  if (exec) TestConstructors(/* par */); break;
693          case  8: name = "TestStrengthOrder"; if (exec) TestStrengthOrder(/* par */); break;
694          default: name = ""; break;
695      }
696    } else {
697      dataerrln("Class iterator not instantiated");
698      name = "";
699    }
700}
701
702#endif /* #if !UCONFIG_NO_COLLATION */
703