1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2009, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7#include "unicode/utypes.h"
8
9#if !UCONFIG_NO_COLLATION
10
11#include "unicode/coll.h"
12#include "unicode/tblcoll.h"
13#include "unicode/unistr.h"
14#include "unicode/sortkey.h"
15#include "itercoll.h"
16#include "unicode/schriter.h"
17#include "unicode/chariter.h"
18#include "unicode/uchar.h"
19#include "cmemory.h"
20
21#define ARRAY_LENGTH(array) (sizeof array / sizeof array[0])
22
23static UErrorCode status = U_ZERO_ERROR;
24
25CollationIteratorTest::CollationIteratorTest()
26 : test1("What subset of all possible test cases?", ""),
27   test2("has the highest probability of detecting", "")
28{
29    en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
30    if(U_FAILURE(status)) {
31      delete en_us;
32      en_us = 0;
33      errcheckln(status, "Collator creation failed with %s", u_errorName(status));
34      return;
35    }
36
37}
38
39CollationIteratorTest::~CollationIteratorTest()
40{
41    delete en_us;
42}
43
44/**
45 * Test for CollationElementIterator previous and next for the whole set of
46 * unicode characters.
47 */
48void CollationIteratorTest::TestUnicodeChar()
49{
50    CollationElementIterator *iter;
51    UChar codepoint;
52    UnicodeString source;
53
54    for (codepoint = 1; codepoint < 0xFFFE;)
55    {
56      source.remove();
57
58      while (codepoint % 0xFF != 0)
59      {
60        if (u_isdefined(codepoint))
61          source += codepoint;
62        codepoint ++;
63      }
64
65      if (u_isdefined(codepoint))
66        source += codepoint;
67
68      if (codepoint != 0xFFFF)
69        codepoint ++;
70
71      iter = en_us->createCollationElementIterator(source);
72      /* A basic test to see if it's working at all */
73      backAndForth(*iter);
74      delete iter;
75    }
76}
77
78/**
79 * Test for CollationElementIterator.previous()
80 *
81 * @bug 4108758 - Make sure it works with contracting characters
82 *
83 */
84void CollationIteratorTest::TestPrevious(/* char* par */)
85{
86    UErrorCode status = U_ZERO_ERROR;
87    CollationElementIterator *iter = en_us->createCollationElementIterator(test1);
88
89    // A basic test to see if it's working at all
90    backAndForth(*iter);
91    delete iter;
92
93    // Test with a contracting character sequence
94    UnicodeString source;
95    RuleBasedCollator *c1 = NULL;
96    c1 = new RuleBasedCollator(
97        (UnicodeString)"&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH", status);
98
99    if (c1 == NULL || U_FAILURE(status))
100    {
101        errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
102        delete c1;
103        return;
104    }
105
106    source = "abchdcba";
107    iter = c1->createCollationElementIterator(source);
108    backAndForth(*iter);
109    delete iter;
110    delete c1;
111
112    // Test with an expanding character sequence
113    RuleBasedCollator *c2 = NULL;
114    c2 = new RuleBasedCollator((UnicodeString)"&a < b < c/abd < d", status);
115
116    if (c2 == NULL || U_FAILURE(status))
117    {
118        errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
119        delete c2;
120        return;
121    }
122
123    source = "abcd";
124    iter = c2->createCollationElementIterator(source);
125    backAndForth(*iter);
126    delete iter;
127    delete c2;
128
129    // Now try both
130    RuleBasedCollator *c3 = NULL;
131    c3 = new RuleBasedCollator((UnicodeString)"&a < b < c/aba < d < z < ch", status);
132
133    if (c3 == NULL || U_FAILURE(status))
134    {
135        errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
136        delete c3;
137        return;
138    }
139
140    source = "abcdbchdc";
141    iter = c3->createCollationElementIterator(source);
142    backAndForth(*iter);
143    delete iter;
144    delete c3;
145
146    status=U_ZERO_ERROR;
147    source= CharsToUnicodeString("\\u0e41\\u0e02\\u0e41\\u0e02\\u0e27abc");
148
149    Collator *c4 = Collator::createInstance(Locale("th", "TH", ""), status);
150    if(U_FAILURE(status)){
151        errln("Couldn't create a collator");
152    }
153    iter = ((RuleBasedCollator*)c4)->createCollationElementIterator(source);
154    backAndForth(*iter);
155    delete iter;
156    delete c4;
157
158    source= CharsToUnicodeString("\\u0061\\u30CF\\u3099\\u30FC");
159    Collator *c5 = Collator::createInstance(Locale("ja", "JP", ""), status);
160
161    iter = ((RuleBasedCollator*)c5)->createCollationElementIterator(source);
162    if(U_FAILURE(status)){
163        errln("Couldn't create Japanese collator\n");
164    }
165    backAndForth(*iter);
166    delete iter;
167    delete c5;
168}
169
170/**
171 * Test for getOffset() and setOffset()
172 */
173void CollationIteratorTest::TestOffset(/* char* par */)
174{
175    CollationElementIterator *iter = en_us->createCollationElementIterator(test1);
176    UErrorCode status = U_ZERO_ERROR;
177    // testing boundaries
178    iter->setOffset(0, status);
179    if (U_FAILURE(status) || iter->previous(status) != UCOL_NULLORDER) {
180        errln("Error: After setting offset to 0, we should be at the end "
181                "of the backwards iteration");
182    }
183    iter->setOffset(test1.length(), status);
184    if (U_FAILURE(status) || iter->next(status) != UCOL_NULLORDER) {
185        errln("Error: After setting offset to end of the string, we should "
186                "be at the end of the backwards iteration");
187    }
188
189    // Run all the way through the iterator, then get the offset
190    int32_t orderLength = 0;
191    Order *orders = getOrders(*iter, orderLength);
192
193    int32_t offset = iter->getOffset();
194
195    if (offset != test1.length())
196    {
197        UnicodeString msg1("offset at end != length: ");
198        UnicodeString msg2(" vs ");
199
200        errln(msg1 + offset + msg2 + test1.length());
201    }
202
203    // Now set the offset back to the beginning and see if it works
204    CollationElementIterator *pristine = en_us->createCollationElementIterator(test1);
205
206    iter->setOffset(0, status);
207
208    if (U_FAILURE(status))
209    {
210        errln("setOffset failed.");
211    }
212    else
213    {
214        assertEqual(*iter, *pristine);
215    }
216
217    // TODO: try iterating halfway through a messy string.
218
219    delete pristine;
220    delete[] orders;
221    delete iter;
222}
223
224/**
225 * Test for setText()
226 */
227void CollationIteratorTest::TestSetText(/* char* par */)
228{
229    CollationElementIterator *iter1 = en_us->createCollationElementIterator(test1);
230    CollationElementIterator *iter2 = en_us->createCollationElementIterator(test2);
231    UErrorCode status = U_ZERO_ERROR;
232
233    // Run through the second iterator just to exercise it
234    int32_t c = iter2->next(status);
235    int32_t i = 0;
236
237    while ( ++i < 10 && c != CollationElementIterator::NULLORDER)
238    {
239        if (U_FAILURE(status))
240        {
241            errln("iter2->next() returned an error.");
242            delete iter2;
243            delete iter1;
244        }
245
246        c = iter2->next(status);
247    }
248
249    // Now set it to point to the same string as the first iterator
250    iter2->setText(test1, status);
251
252    if (U_FAILURE(status))
253    {
254        errln("call to iter2->setText(test1) failed.");
255    }
256    else
257    {
258        assertEqual(*iter1, *iter2);
259    }
260    iter1->reset();
261    //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
262    CharacterIterator* chariter = new StringCharacterIterator(test1);
263    iter2->setText(*chariter, status);
264    if (U_FAILURE(status))
265    {
266        errln("call to iter2->setText(chariter(test1)) failed.");
267    }
268    else
269    {
270        assertEqual(*iter1, *iter2);
271    }
272
273    // test for an empty string
274    UnicodeString empty("");
275    iter1->setText(empty, status);
276    if (U_FAILURE(status)
277        || iter1->next(status) != (int32_t)UCOL_NULLORDER) {
278        errln("Empty string should have no CEs.");
279    }
280    ((StringCharacterIterator *)chariter)->setText(empty);
281    iter1->setText(*chariter, status);
282    if (U_FAILURE(status)
283        || iter1->next(status) != (int32_t)UCOL_NULLORDER) {
284        errln("Empty string should have no CEs.");
285    }
286    delete chariter;
287    delete iter2;
288    delete iter1;
289}
290
291/** @bug 4108762
292 * Test for getMaxExpansion()
293 */
294void CollationIteratorTest::TestMaxExpansion(/* char* par */)
295{
296    UErrorCode          status = U_ZERO_ERROR;
297    UnicodeString rule("&a < ab < c/aba < d < z < ch");
298    RuleBasedCollator  *coll   = new RuleBasedCollator(rule, status);
299    UChar               ch     = 0;
300    UnicodeString       str(ch);
301
302    CollationElementIterator *iter   = coll->createCollationElementIterator(str);
303
304    while (ch < 0xFFFF && U_SUCCESS(status)) {
305        int      count = 1;
306        uint32_t order;
307        ch ++;
308        UnicodeString str(ch);
309        iter->setText(str, status);
310        order = iter->previous(status);
311
312        /* thai management */
313        if (CollationElementIterator::isIgnorable(order))
314            order = iter->previous(status);
315
316        while (U_SUCCESS(status)
317            && iter->previous(status) != (int32_t)UCOL_NULLORDER)
318        {
319            count ++;
320        }
321
322        if (U_FAILURE(status) && iter->getMaxExpansion(order) < count) {
323            errln("Failure at codepoint %d, maximum expansion count < %d\n",
324                ch, count);
325        }
326    }
327
328    delete iter;
329    delete coll;
330}
331
332/*
333 * @bug 4157299
334 */
335void CollationIteratorTest::TestClearBuffers(/* char* par */)
336{
337    UErrorCode status = U_ZERO_ERROR;
338    RuleBasedCollator *c = new RuleBasedCollator((UnicodeString)"&a < b < c & ab = d", status);
339
340    if (c == NULL || U_FAILURE(status))
341    {
342        errln("Couldn't create a RuleBasedCollator.");
343        delete c;
344        return;
345    }
346
347    UnicodeString source("abcd");
348    CollationElementIterator *i = c->createCollationElementIterator(source);
349    int32_t e0 = i->next(status);    // save the first collation element
350
351    if (U_FAILURE(status))
352    {
353        errln("call to i->next() failed. err=%s", u_errorName(status));
354    }
355    else
356    {
357        i->setOffset(3, status);        // go to the expanding character
358
359        if (U_FAILURE(status))
360        {
361            errln("call to i->setOffset(3) failed. err=%s", u_errorName(status));
362        }
363        else
364        {
365            i->next(status);                // but only use up half of it
366
367            if (U_FAILURE(status))
368            {
369                errln("call to i->next() failed. err=%s", u_errorName(status));
370            }
371            else
372            {
373                i->setOffset(0, status);        // go back to the beginning
374
375                if (U_FAILURE(status))
376                {
377                    errln("call to i->setOffset(0) failed. err=%s", u_errorName(status));
378                }
379                else
380                {
381                    int32_t e = i->next(status);    // and get this one again
382
383                    if (U_FAILURE(status))
384                    {
385                        errln("call to i->next() failed. err=%s", u_errorName(status));
386                    }
387                    else if (e != e0)
388                    {
389                        errln("got 0x%X, expected 0x%X", e, e0);
390                    }
391                }
392            }
393        }
394    }
395
396    delete i;
397    delete c;
398}
399
400/**
401 * Testing the assignment operator
402 */
403void CollationIteratorTest::TestAssignment()
404{
405    UErrorCode status = U_ZERO_ERROR;
406    RuleBasedCollator *coll =
407        (RuleBasedCollator *)Collator::createInstance(status);
408
409    if (coll == NULL || U_FAILURE(status))
410    {
411        errln("Couldn't create a default collator.");
412        return;
413    }
414
415    UnicodeString source("abcd");
416    CollationElementIterator *iter1 =
417        coll->createCollationElementIterator(source);
418
419    CollationElementIterator iter2 = *iter1;
420
421    if (*iter1 != iter2) {
422        errln("Fail collation iterator assignment does not produce the same elements");
423    }
424
425    CollationElementIterator iter3(*iter1);
426
427    if (*iter1 != iter3) {
428        errln("Fail collation iterator copy constructor does not produce the same elements");
429    }
430
431    source = CharsToUnicodeString("a\\u0300\\u0325");
432    coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
433    CollationElementIterator *iter4
434                        = coll->createCollationElementIterator(source);
435    CollationElementIterator iter5(*iter4);
436    if (*iter4 != iter5) {
437        errln("collation iterator assignment does not produce the same elements");
438    }
439    iter4->next(status);
440    if (U_FAILURE(status) || *iter4 == iter5) {
441        errln("collation iterator not equal");
442    }
443    iter5.next(status);
444    if (U_FAILURE(status) || *iter4 != iter5) {
445        errln("collation iterator equal");
446    }
447    iter4->next(status);
448    if (U_FAILURE(status) || *iter4 == iter5) {
449        errln("collation iterator not equal");
450    }
451    iter5.next(status);
452    if (U_FAILURE(status) || *iter4 != iter5) {
453        errln("collation iterator equal");
454    }
455    CollationElementIterator iter6(*iter4);
456    if (*iter4 != iter6) {
457        errln("collation iterator equal");
458    }
459    iter4->next(status);
460    if (U_FAILURE(status) || *iter4 == iter5) {
461        errln("collation iterator not equal");
462    }
463    iter5.next(status);
464    if (U_FAILURE(status) || *iter4 != iter5) {
465        errln("collation iterator equal");
466    }
467    iter4->next(status);
468    if (U_FAILURE(status) || *iter4 == iter5) {
469        errln("collation iterator not equal");
470    }
471    iter5.next(status);
472    if (U_FAILURE(status) || *iter4 != iter5) {
473        errln("collation iterator equal");
474    }
475    delete iter1;
476    delete iter4;
477    delete coll;
478}
479
480/**
481 * Testing the constructors
482 */
483void CollationIteratorTest::TestConstructors()
484{
485    UErrorCode status = U_ZERO_ERROR;
486    RuleBasedCollator *coll =
487        (RuleBasedCollator *)Collator::createInstance(status);
488    if (coll == NULL || U_FAILURE(status))
489    {
490        errln("Couldn't create a default collator.");
491        return;
492    }
493
494    // testing protected constructor with character iterator as argument
495    StringCharacterIterator chariter(test1);
496    CollationElementIterator *iter1 =
497        coll->createCollationElementIterator(chariter);
498    if (U_FAILURE(status)) {
499        errln("Couldn't create collation element iterator with character iterator.");
500        return;
501    }
502    CollationElementIterator *iter2 =
503        coll->createCollationElementIterator(test1);
504
505    // initially the 2 collation element iterators should be the same
506    if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2
507        || *iter2 != *iter1) {
508        errln("CollationElementIterators constructed with the same string data should be the same at the start");
509    }
510    assertEqual(*iter1, *iter2);
511
512    delete iter1;
513    delete iter2;
514
515    // tests empty strings
516    UnicodeString empty("");
517    iter1 = coll->createCollationElementIterator(empty);
518    chariter.setText(empty);
519    iter2 = coll->createCollationElementIterator(chariter);
520    if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2
521        || *iter2 != *iter1) {
522        errln("CollationElementIterators constructed with the same string data should be the same at the start");
523    }
524    if (iter1->next(status) != (int32_t)UCOL_NULLORDER) {
525        errln("Empty string should have no CEs.");
526    }
527    if (iter2->next(status) != (int32_t)UCOL_NULLORDER) {
528        errln("Empty string should have no CEs.");
529    }
530    delete iter1;
531    delete iter2;
532    delete coll;
533}
534
535/**
536 * Testing the strength order
537 */
538void CollationIteratorTest::TestStrengthOrder()
539{
540    int order = 0x0123ABCD;
541
542    UErrorCode status = U_ZERO_ERROR;
543    RuleBasedCollator *coll =
544        (RuleBasedCollator *)Collator::createInstance(status);
545    if (coll == NULL || U_FAILURE(status))
546    {
547        errln("Couldn't create a default collator.");
548        return;
549    }
550
551    coll->setStrength(Collator::PRIMARY);
552    CollationElementIterator *iter =
553        coll->createCollationElementIterator(test1);
554
555    if (iter == NULL) {
556        errln("Couldn't create a collation element iterator from default collator");
557        return;
558    }
559
560    if (iter->strengthOrder(order) != 0x01230000) {
561        errln("Strength order for a primary strength collator should be the first 2 bytes");
562        return;
563    }
564
565    coll->setStrength(Collator::SECONDARY);
566    if (iter->strengthOrder(order) != 0x0123AB00) {
567        errln("Strength order for a secondary strength collator should be the third byte");
568        return;
569    }
570
571    coll->setStrength(Collator::TERTIARY);
572    if (iter->strengthOrder(order) != order) {
573        errln("Strength order for a tertiary strength collator should be the third byte");
574        return;
575    }
576    delete iter;
577    delete coll;
578}
579
580/**
581 * Return a string containing all of the collation orders
582 * returned by calls to next on the specified iterator
583 */
584UnicodeString &CollationIteratorTest::orderString(CollationElementIterator &iter, UnicodeString &target)
585{
586    int32_t order;
587    UErrorCode status = U_ZERO_ERROR;
588
589    while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
590    {
591        target += "0x";
592        appendHex(order, 8, target);
593        target += " ";
594    }
595
596    return target;
597}
598
599void CollationIteratorTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
600{
601    int32_t c1, c2, count = 0;
602    UErrorCode status = U_ZERO_ERROR;
603
604    do
605    {
606        c1 = i1.next(status);
607        c2 = i2.next(status);
608
609        if (c1 != c2)
610        {
611            errln("    %d: strength(0x%X) != strength(0x%X)", count, c1, c2);
612            break;
613        }
614
615        count += 1;
616    }
617    while (c1 != CollationElementIterator::NULLORDER);
618}
619
620void CollationIteratorTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/)
621{
622    if (exec)
623    {
624        logln("Collation Iteration Tests: ");
625    }
626
627    if(en_us) {
628      switch (index)
629      {
630          case  0: name = "TestPrevious";      if (exec) TestPrevious(/* par */);     break;
631          case  1: name = "TestOffset";        if (exec) TestOffset(/* par */);       break;
632          case  2: name = "TestSetText";       if (exec) TestSetText(/* par */);      break;
633          case  3: name = "TestMaxExpansion";  if (exec) TestMaxExpansion(/* par */); break;
634          case  4: name = "TestClearBuffers";  if (exec) TestClearBuffers(/* par */); break;
635          case  5: name = "TestUnicodeChar";   if (exec) TestUnicodeChar(/* par */);  break;
636          case  6: name = "TestAssignment";    if (exec) TestAssignment(/* par */);    break;
637          case  7: name = "TestConstructors";  if (exec) TestConstructors(/* par */); break;
638          case  8: name = "TestStrengthOrder"; if (exec) TestStrengthOrder(/* par */); break;
639          default: name = ""; break;
640      }
641    } else {
642      dataerrln("Class iterator not instantiated");
643      name = "";
644    }
645}
646
647#endif /* #if !UCONFIG_NO_COLLATION */
648