1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7#include "unicode/utypes.h"
8
9#if !UCONFIG_NO_COLLATION
10
11#include "unicode/coll.h"
12#include "unicode/tblcoll.h"
13#include "unicode/unistr.h"
14#include "unicode/sortkey.h"
15#include "regcoll.h"
16#include "sfwdchit.h"
17#include "testutil.h"
18#include "cmemory.h"
19
20#define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))
21
22CollationRegressionTest::CollationRegressionTest()
23{
24    UErrorCode status = U_ZERO_ERROR;
25
26    en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
27    if(U_FAILURE(status)) {
28      delete en_us;
29      en_us = 0;
30      errcheckln(status, "Collator creation failed with %s", u_errorName(status));
31      return;
32    }
33}
34
35CollationRegressionTest::~CollationRegressionTest()
36{
37    delete en_us;
38}
39
40
41    // @bug 4048446
42//
43// CollationElementIterator.reset() doesn't work
44//
45void CollationRegressionTest::Test4048446(/* char* par */)
46{
47    const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
48    const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
49    CollationElementIterator *i1 = en_us->createCollationElementIterator(test1);
50    CollationElementIterator *i2 = en_us->createCollationElementIterator(test1);
51    UErrorCode status = U_ZERO_ERROR;
52
53    if (i1 == NULL|| i2 == NULL)
54    {
55        errln("Could not create CollationElementIterator's");
56        delete i1;
57        delete i2;
58        return;
59    }
60
61    while (i1->next(status) != CollationElementIterator::NULLORDER)
62    {
63        if (U_FAILURE(status))
64        {
65            errln("error calling next()");
66
67            delete i1;
68            delete i2;
69            return;
70        }
71    }
72
73    i1->reset();
74
75    assertEqual(*i1, *i2);
76
77    delete i1;
78    delete i2;
79}
80
81// @bug 4051866
82//
83// Collator -> rules -> Collator round-trip broken for expanding characters
84//
85void CollationRegressionTest::Test4051866(/* char* par */)
86{
87/*
88    RuleBasedCollator c1 = new RuleBasedCollator("< o "
89                                                +"& oe ,o\u3080"
90                                                +"& oe ,\u1530 ,O"
91                                                +"& OE ,O\u3080"
92                                                +"& OE ,\u1520"
93                                                +"< p ,P");
94*/
95
96    UnicodeString rules;
97    UErrorCode status = U_ZERO_ERROR;
98
99    rules += "< o ";
100    rules += "& oe ,o";
101    rules += (UChar)0x3080;
102    rules += "& oe ,";
103    rules += (UChar)0x1530;
104    rules += " ,O";
105    rules += "& OE ,O";
106    rules += (UChar)0x3080;
107    rules += "& OE ,";
108    rules += (UChar)0x1520;
109    rules += "< p ,P";
110
111    // Build a collator containing expanding characters
112    RuleBasedCollator *c1 = new RuleBasedCollator(rules, status);
113
114    // Build another using the rules from  the first
115    RuleBasedCollator *c2 = new RuleBasedCollator(c1->getRules(), status);
116
117    // Make sure they're the same
118    if (!(c1->getRules() == c2->getRules()))
119    {
120        errln("Rules are not equal");
121    }
122
123    delete c2;
124    delete c1;
125}
126
127// @bug 4053636
128//
129// Collator thinks "black-bird" == "black"
130//
131void CollationRegressionTest::Test4053636(/* char* par */)
132{
133    if (en_us->equals("black_bird", "black"))
134    {
135        errln("black-bird == black");
136    }
137}
138
139// @bug 4054238
140//
141// CollationElementIterator will not work correctly if the associated
142// Collator object's mode is changed
143//
144void CollationRegressionTest::Test4054238(/* char* par */)
145{
146    const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
147    const UnicodeString test3(chars3);
148    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
149
150    // NOTE: The Java code uses en_us to create the CollationElementIterators
151    // but I'm pretty sure that's wrong, so I've changed this to use c.
152    UErrorCode status = U_ZERO_ERROR;
153    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
154    CollationElementIterator *i1 = c->createCollationElementIterator(test3);
155    delete i1;
156    delete c;
157}
158
159// @bug 4054734
160//
161// Collator::IDENTICAL documented but not implemented
162//
163void CollationRegressionTest::Test4054734(/* char* par */)
164{
165    /*
166        Here's the original Java:
167
168        String[] decomp = {
169            "\u0001",   "<",    "\u0002",
170            "\u0001",   "=",    "\u0001",
171            "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
172            "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
173        };
174
175        String[] nodecomp = {
176            "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
177        };
178    */
179
180    static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
181    {
182        {0x0001, 0},      {0x3c, 0}, {0x0002, 0},
183        {0x0001, 0},      {0x3d, 0}, {0x0001, 0},
184        {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
185        {0x00c0, 0},      {0x3d, 0}, {0x41, 0x0300, 0}
186    };
187
188
189    UErrorCode status = U_ZERO_ERROR;
190    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
191
192    c->setStrength(Collator::IDENTICAL);
193
194    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
195    compareArray(*c, decomp, ARRAY_LENGTH(decomp));
196
197    delete c;
198}
199
200// @bug 4054736
201//
202// Full Decomposition mode not implemented
203//
204void CollationRegressionTest::Test4054736(/* char* par */)
205{
206    UErrorCode status = U_ZERO_ERROR;
207    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
208
209    c->setStrength(Collator::SECONDARY);
210    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
211
212    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
213    {
214        {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC}  // Alef-Lamed vs. Alef, Lamed
215    };
216
217    compareArray(*c, tests, ARRAY_LENGTH(tests));
218
219    delete c;
220}
221
222// @bug 4058613
223//
224// Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
225//
226void CollationRegressionTest::Test4058613(/* char* par */)
227{
228    // Creating a default collator doesn't work when Korean is the default
229    // locale
230
231    Locale oldDefault = Locale::getDefault();
232    UErrorCode status = U_ZERO_ERROR;
233
234    Locale::setDefault(Locale::getKorean(), status);
235
236    if (U_FAILURE(status))
237    {
238        errln("Could not set default locale to Locale::KOREAN");
239        return;
240    }
241
242    Collator *c = NULL;
243
244    c = Collator::createInstance("en_US", status);
245
246    if (c == NULL || U_FAILURE(status))
247    {
248        errln("Could not create a Korean collator");
249        Locale::setDefault(oldDefault, status);
250        delete c;
251        return;
252    }
253
254    // Since the fix to this bug was to turn off decomposition for Korean collators,
255    // ensure that's what we got
256    if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
257    {
258      errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
259    }
260
261    delete c;
262
263    Locale::setDefault(oldDefault, status);
264}
265
266// @bug 4059820
267//
268// RuleBasedCollator.getRules does not return the exact pattern as input
269// for expanding character sequences
270//
271void CollationRegressionTest::Test4059820(/* char* par */)
272{
273    UErrorCode status = U_ZERO_ERROR;
274
275    RuleBasedCollator *c = NULL;
276    UnicodeString rules = "< a < b , c/a < d < z";
277
278    c = new RuleBasedCollator(rules, status);
279
280    if (c == NULL || U_FAILURE(status))
281    {
282        errln("Failure building a collator.");
283        delete c;
284        return;
285    }
286
287    if ( c->getRules().indexOf("c/a") == -1)
288    {
289        errln("returned rules do not contain 'c/a'");
290    }
291
292    delete c;
293}
294
295// @bug 4060154
296//
297// MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
298//
299void CollationRegressionTest::Test4060154(/* char* par */)
300{
301    UErrorCode status = U_ZERO_ERROR;
302    UnicodeString rules;
303
304    rules += "< g, G < h, H < i, I < j, J";
305    rules +=  " & H < ";
306    rules += (UChar)0x0131;
307    rules += ", ";
308    rules += (UChar)0x0130;
309    rules += ", i, I";
310
311    RuleBasedCollator *c = NULL;
312
313    c = new RuleBasedCollator(rules, status);
314
315    if (c == NULL || U_FAILURE(status))
316    {
317        errln("failure building collator.");
318        delete c;
319        return;
320    }
321
322    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
323
324 /*
325    String[] tertiary = {
326        "A",        "<",    "B",
327        "H",        "<",    "\u0131",
328        "H",        "<",    "I",
329        "\u0131",   "<",    "\u0130",
330        "\u0130",   "<",    "i",
331        "\u0130",   ">",    "H",
332    };
333*/
334
335    static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
336    {
337        {0x41, 0},    {0x3c, 0}, {0x42, 0},
338        {0x48, 0},    {0x3c, 0}, {0x0131, 0},
339        {0x48, 0},    {0x3c, 0}, {0x49, 0},
340        {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
341        {0x0130, 0}, {0x3c, 0}, {0x69, 0},
342        {0x0130, 0}, {0x3e, 0}, {0x48, 0}
343    };
344
345    c->setStrength(Collator::TERTIARY);
346    compareArray(*c, tertiary, ARRAY_LENGTH(tertiary));
347
348    /*
349    String[] secondary = {
350        "H",        "<",    "I",
351        "\u0131",   "=",    "\u0130",
352    };
353*/
354    static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
355    {
356        {0x48, 0},    {0x3c, 0}, {0x49, 0},
357        {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
358    };
359
360    c->setStrength(Collator::PRIMARY);
361    compareArray(*c, secondary, ARRAY_LENGTH(secondary));
362
363    delete c;
364}
365
366// @bug 4062418
367//
368// Secondary/Tertiary comparison incorrect in French Secondary
369//
370void CollationRegressionTest::Test4062418(/* char* par */)
371{
372    UErrorCode status = U_ZERO_ERROR;
373
374    RuleBasedCollator *c = NULL;
375
376    c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status);
377
378    if (c == NULL || U_FAILURE(status))
379    {
380        errln("Failed to create collator for Locale::getCanadaFrench()");
381        delete c;
382        return;
383    }
384
385    c->setStrength(Collator::SECONDARY);
386
387/*
388    String[] tests = {
389            "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
390    };
391*/
392    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
393    {
394        {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
395    };
396
397    compareArray(*c, tests, ARRAY_LENGTH(tests));
398
399    delete c;
400}
401
402// @bug 4065540
403//
404// Collator::compare() method broken if either string contains spaces
405//
406void CollationRegressionTest::Test4065540(/* char* par */)
407{
408    if (en_us->compare("abcd e", "abcd f") == 0)
409    {
410        errln("'abcd e' == 'abcd f'");
411    }
412}
413
414// @bug 4066189
415//
416// Unicode characters need to be recursively decomposed to get the
417// correct result. For example,
418// u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
419//
420void CollationRegressionTest::Test4066189(/* char* par */)
421{
422    static const UChar chars1[] = {0x1EB1, 0};
423    static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0};
424    const UnicodeString test1(chars1);
425    const UnicodeString test2(chars2);
426    UErrorCode status = U_ZERO_ERROR;
427
428    // NOTE: The java code used en_us to create the
429    // CollationElementIterator's. I'm pretty sure that
430    // was wrong, so I've change the code to use c1 and c2
431    RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone();
432    c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
433    CollationElementIterator *i1 = c1->createCollationElementIterator(test1);
434
435    RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone();
436    c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
437    CollationElementIterator *i2 = c2->createCollationElementIterator(test2);
438
439    assertEqual(*i1, *i2);
440
441    delete i2;
442    delete c2;
443    delete i1;
444    delete c1;
445}
446
447// @bug 4066696
448//
449// French secondary collation checking at the end of compare iteration fails
450//
451void CollationRegressionTest::Test4066696(/* char* par */)
452{
453    UErrorCode status = U_ZERO_ERROR;
454    RuleBasedCollator *c = NULL;
455
456    c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status);
457
458    if (c == NULL || U_FAILURE(status))
459    {
460        errln("Failure creating collator for Locale::getCanadaFrench()");
461        delete c;
462        return;
463    }
464
465    c->setStrength(Collator::SECONDARY);
466
467/*
468    String[] tests = {
469        "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
470    };
471
472  should be:
473
474    String[] tests = {
475        "\u00e0",   ">",     "\u01fa",       // a-grave <  A-ring-acute
476    };
477
478*/
479
480    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
481    {
482        {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
483    };
484
485    compareArray(*c, tests, ARRAY_LENGTH(tests));
486
487    delete c;
488}
489
490// @bug 4076676
491//
492// Bad canonicalization of same-class combining characters
493//
494void CollationRegressionTest::Test4076676(/* char* par */)
495{
496    // These combining characters are all in the same class, so they should not
497    // be reordered, and they should compare as unequal.
498    static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
499    static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
500
501    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
502    c->setStrength(Collator::TERTIARY);
503
504    if (c->compare(s1,s2) == 0)
505    {
506        errln("Same-class combining chars were reordered");
507    }
508
509    delete c;
510}
511
512// @bug 4079231
513//
514// RuleBasedCollator::operator==(NULL) throws NullPointerException
515//
516void CollationRegressionTest::Test4079231(/* char* par */)
517{
518    // I don't think there's any way to write this test
519    // in C++. The following is equivalent to the Java,
520    // but doesn't compile 'cause NULL can't be converted
521    // to Collator&
522    //
523    // if (en_us->operator==(NULL))
524    // {
525    //     errln("en_us->operator==(NULL) returned TRUE");
526    // }
527
528 /*
529   try {
530        if (en_us->equals(null)) {
531            errln("en_us->equals(null) returned true");
532        }
533    }
534    catch (Exception e) {
535        errln("en_us->equals(null) threw " + e.toString());
536    }
537*/
538}
539
540// @bug 4078588
541//
542// RuleBasedCollator breaks on "< a < bb" rule
543//
544void CollationRegressionTest::Test4078588(/* char *par */)
545{
546    UErrorCode status = U_ZERO_ERROR;
547    RuleBasedCollator *rbc = new RuleBasedCollator((UnicodeString)"< a < bb", status);
548
549    if (rbc == NULL || U_FAILURE(status))
550    {
551        errln("Failed to create RuleBasedCollator.");
552        delete rbc;
553        return;
554    }
555
556    Collator::EComparisonResult result = rbc->compare("a","bb");
557
558    if (result != Collator::LESS)
559    {
560        errln((UnicodeString)"Compare(a,bb) returned " + (int)result
561            + (UnicodeString)"; expected -1");
562    }
563
564    delete rbc;
565}
566
567// @bug 4081866
568//
569// Combining characters in different classes not reordered properly.
570//
571void CollationRegressionTest::Test4081866(/* char* par */)
572{
573    // These combining characters are all in different classes,
574    // so they should be reordered and the strings should compare as equal.
575    static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
576    static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
577
578    UErrorCode status = U_ZERO_ERROR;
579    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
580    c->setStrength(Collator::TERTIARY);
581
582    // Now that the default collators are set to NO_DECOMPOSITION
583    // (as a result of fixing bug 4114077), we must set it explicitly
584    // when we're testing reordering behavior.  -- lwerner, 5/5/98
585    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
586
587    if (c->compare(s1,s2) != 0)
588    {
589        errln("Combining chars were not reordered");
590    }
591
592    delete c;
593}
594
595// @bug 4087241
596//
597// string comparison errors in Scandinavian collators
598//
599void CollationRegressionTest::Test4087241(/* char* par */)
600{
601    UErrorCode status = U_ZERO_ERROR;
602    Locale da_DK("da", "DK");
603    RuleBasedCollator *c = NULL;
604
605    c = (RuleBasedCollator *) Collator::createInstance(da_DK, status);
606
607    if (c == NULL || U_FAILURE(status))
608    {
609        errln("Failed to create collator for da_DK locale");
610        delete c;
611        return;
612    }
613
614    c->setStrength(Collator::SECONDARY);
615
616    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
617    {
618        {0x7a, 0},          {0x3c, 0}, {0x00E6, 0},            // z        < ae
619        {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0},      // a-unlaut < a-ring
620        {0x59, 0},          {0x3c, 0}, {0x75, 0x0308, 0},      // Y        < u-umlaut
621    };
622
623    compareArray(*c, tests, ARRAY_LENGTH(tests));
624
625    delete c;
626}
627
628// @bug 4087243
629//
630// CollationKey takes ignorable strings into account when it shouldn't
631//
632void CollationRegressionTest::Test4087243(/* char* par */)
633{
634    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
635    c->setStrength(Collator::TERTIARY);
636
637    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
638    {
639        {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0}    // 1 2 3  =  1 2 3 ctrl-A
640    };
641
642    compareArray(*c, tests, ARRAY_LENGTH(tests));
643
644    delete c;
645}
646
647// @bug 4092260
648//
649// Mu/micro conflict
650// Micro symbol and greek lowercase letter Mu should sort identically
651//
652void CollationRegressionTest::Test4092260(/* char* par */)
653{
654    UErrorCode status = U_ZERO_ERROR;
655    Locale el("el", "");
656    Collator *c = NULL;
657
658    c = Collator::createInstance(el, status);
659
660    if (c == NULL || U_FAILURE(status))
661    {
662        errln("Failed to create collator for el locale.");
663        delete c;
664        return;
665    }
666
667    // These now have tertiary differences in UCA
668    c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
669
670    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
671    {
672        {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
673    };
674
675    compareArray(*c, tests, ARRAY_LENGTH(tests));
676
677    delete c;
678}
679
680// @bug 4095316
681//
682void CollationRegressionTest::Test4095316(/* char* par */)
683{
684    UErrorCode status = U_ZERO_ERROR;
685    Locale el_GR("el", "GR");
686    Collator *c = Collator::createInstance(el_GR, status);
687
688    if (c == NULL || U_FAILURE(status))
689    {
690        errln("Failed to create collator for el_GR locale");
691        delete c;
692        return;
693    }
694    // These now have tertiary differences in UCA
695    //c->setStrength(Collator::TERTIARY);
696    c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
697
698    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
699    {
700        {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
701    };
702
703    compareArray(*c, tests, ARRAY_LENGTH(tests));
704
705    delete c;
706}
707
708// @bug 4101940
709//
710void CollationRegressionTest::Test4101940(/* char* par */)
711{
712    UErrorCode status = U_ZERO_ERROR;
713    RuleBasedCollator *c = NULL;
714    UnicodeString rules = "< a < b";
715    UnicodeString nothing = "";
716
717    c = new RuleBasedCollator(rules, status);
718
719    if (c == NULL || U_FAILURE(status))
720    {
721        errln("Failed to create RuleBasedCollator");
722        delete c;
723        return;
724    }
725
726    CollationElementIterator *i = c->createCollationElementIterator(nothing);
727    i->reset();
728
729    if (i->next(status) != CollationElementIterator::NULLORDER)
730    {
731        errln("next did not return NULLORDER");
732    }
733
734    delete i;
735    delete c;
736}
737
738// @bug 4103436
739//
740// Collator::compare not handling spaces properly
741//
742void CollationRegressionTest::Test4103436(/* char* par */)
743{
744    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
745    c->setStrength(Collator::TERTIARY);
746
747    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
748    {
749        {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
750        {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
751    };
752
753    compareArray(*c, tests, ARRAY_LENGTH(tests));
754
755    delete c;
756}
757
758// @bug 4114076
759//
760// Collation not Unicode conformant with Hangul syllables
761//
762void CollationRegressionTest::Test4114076(/* char* par */)
763{
764    UErrorCode status = U_ZERO_ERROR;
765    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
766    c->setStrength(Collator::TERTIARY);
767
768    //
769    // With Canonical decomposition, Hangul syllables should get decomposed
770    // into Jamo, but Jamo characters should not be decomposed into
771    // conjoining Jamo
772    //
773    static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
774    {
775        {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
776    };
777
778    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
779    compareArray(*c, test1, ARRAY_LENGTH(test1));
780
781    // From UTR #15:
782    // *In earlier versions of Unicode, jamo characters like ksf
783    //  had compatibility mappings to kf + sf. These mappings were
784    //  removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
785    // That is, the following test is obsolete as of 2.1.9
786
787//obsolete-    // With Full decomposition, it should go all the way down to
788//obsolete-    // conjoining Jamo characters.
789//obsolete-    //
790//obsolete-    static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
791//obsolete-    {
792//obsolete-        {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
793//obsolete-    };
794//obsolete-
795//obsolete-    c->setDecomposition(Normalizer::DECOMP_COMPAT);
796//obsolete-    compareArray(*c, test2, ARRAY_LENGTH(test2));
797
798    delete c;
799}
800
801
802// @bug 4124632
803//
804// Collator::getCollationKey was hanging on certain character sequences
805//
806void CollationRegressionTest::Test4124632(/* char* par */)
807{
808    UErrorCode status = U_ZERO_ERROR;
809    Collator *coll = NULL;
810
811    coll = Collator::createInstance(Locale::getJapan(), status);
812
813    if (coll == NULL || U_FAILURE(status))
814    {
815        errln("Failed to create collator for Locale::JAPAN");
816        delete coll;
817        return;
818    }
819
820    static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0};
821    CollationKey key;
822
823    coll->getCollationKey(test, key, status);
824
825    if (key.isBogus() || U_FAILURE(status))
826    {
827        errln("CollationKey creation failed.");
828    }
829
830    delete coll;
831}
832
833// @bug 4132736
834//
835// sort order of french words with multiple accents has errors
836//
837void CollationRegressionTest::Test4132736(/* char* par */)
838{
839    UErrorCode status = U_ZERO_ERROR;
840
841    Collator *c = NULL;
842
843    c = Collator::createInstance(Locale::getCanadaFrench(), status);
844    c->setStrength(Collator::TERTIARY);
845
846    if (c == NULL || U_FAILURE(status))
847    {
848        errln("Failed to create a collator for Locale::getCanadaFrench()");
849        delete c;
850        return;
851    }
852
853    static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
854    {
855        {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
856        {0x65, 0x0300, 0x0301, 0},       {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
857    };
858
859    compareArray(*c, test1, ARRAY_LENGTH(test1));
860
861    delete c;
862}
863
864// @bug 4133509
865//
866// The sorting using java.text.CollationKey is not in the exact order
867//
868void CollationRegressionTest::Test4133509(/* char* par */)
869{
870    static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
871    {
872        {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
873        {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0},      {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
874        {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0},                  {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
875    };
876
877    compareArray(*en_us, test1, ARRAY_LENGTH(test1));
878}
879
880// @bug 4114077
881//
882// Collation with decomposition off doesn't work for Europe
883//
884void CollationRegressionTest::Test4114077(/* char* par */)
885{
886    // Ensure that we get the same results with decomposition off
887    // as we do with it on....
888
889    UErrorCode status = U_ZERO_ERROR;
890    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
891    c->setStrength(Collator::TERTIARY);
892
893    static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
894    {
895        {0x00C0, 0},                     {0x3d, 0}, {0x41, 0x0300, 0},            // Should be equivalent
896        {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
897        {0x0204, 0},                     {0x3d, 0}, {0x45, 0x030F, 0},
898        {0x01fa, 0},                     {0x3d, 0}, {0x41, 0x030a, 0x0301, 0},    // a-ring-acute -> a-ring, acute
899                                                //   -> a, ring, acute
900        {0x41, 0x0300, 0x0316, 0},         {0x3c, 0}, {0x41, 0x0316, 0x0300, 0}        // No reordering --> unequal
901    };
902
903    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
904    compareArray(*c, test1, ARRAY_LENGTH(test1));
905
906    static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
907    {
908        {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0}      // Reordering --> equal
909    };
910
911    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
912    compareArray(*c, test2, ARRAY_LENGTH(test2));
913
914    delete c;
915}
916
917// @bug 4141640
918//
919// Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
920//
921void CollationRegressionTest::Test4141640(/* char* par */)
922{
923    //
924    // Rather than just creating a Swedish collator, we might as well
925    // try to instantiate one for every locale available on the system
926    // in order to prevent this sort of bug from cropping up in the future
927    //
928    UErrorCode status = U_ZERO_ERROR;
929    int32_t i, localeCount;
930    const Locale *locales = Locale::getAvailableLocales(localeCount);
931
932    for (i = 0; i < localeCount; i += 1)
933    {
934        Collator *c = NULL;
935
936        status = U_ZERO_ERROR;
937        c = Collator::createInstance(locales[i], status);
938
939        if (c == NULL || U_FAILURE(status))
940        {
941            UnicodeString msg, localeName;
942
943            msg += "Could not create collator for locale ";
944            msg += locales[i].getName();
945
946            errln(msg);
947        }
948
949        delete c;
950    }
951}
952
953// @bug 4139572
954//
955// getCollationKey throws exception for spanish text
956// Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
957//
958void CollationRegressionTest::Test4139572(/* char* par */)
959{
960    //
961    // Code pasted straight from the bug report
962    // (and then translated to C++ ;-)
963    //
964    // create spanish locale and collator
965    UErrorCode status = U_ZERO_ERROR;
966    Locale l("es", "es");
967    Collator *col = NULL;
968
969    col = Collator::createInstance(l, status);
970
971    if (col == NULL || U_FAILURE(status))
972    {
973        errln("Failed to create a collator for es_es locale.");
974        delete col;
975        return;
976    }
977
978    CollationKey key;
979
980    // this spanish phrase kills it!
981    col->getCollationKey("Nombre De Objeto", key, status);
982
983    if (key.isBogus() || U_FAILURE(status))
984    {
985        errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
986    }
987
988    delete col;
989}
990/* HSYS : RuleBasedCollator::compare() performance enhancements
991          compare() does not create CollationElementIterator() anymore.*/
992
993class My4146160Collator : public RuleBasedCollator
994{
995public:
996    My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status);
997    ~My4146160Collator();
998
999    CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const;
1000
1001    CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const;
1002
1003    static int32_t count;
1004};
1005
1006int32_t My4146160Collator::count = 0;
1007
1008My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status)
1009  : RuleBasedCollator(rbc.getRules(), status)
1010{
1011}
1012
1013My4146160Collator::~My4146160Collator()
1014{
1015}
1016
1017CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const
1018{
1019    count += 1;
1020    return RuleBasedCollator::createCollationElementIterator(text);
1021}
1022
1023CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const
1024{
1025    count += 1;
1026    return RuleBasedCollator::createCollationElementIterator(text);
1027}
1028
1029// @bug 4146160
1030//
1031// RuleBasedCollator doesn't use createCollationElementIterator internally
1032//
1033void CollationRegressionTest::Test4146160(/* char* par */)
1034{
1035#if 0
1036    //
1037    // Use a custom collator class whose createCollationElementIterator
1038    // methods increment a count....
1039    //
1040    UErrorCode status = U_ZERO_ERROR;
1041    CollationKey key;
1042
1043    My4146160Collator::count = 0;
1044    My4146160Collator *mc = NULL;
1045
1046    mc = new My4146160Collator(*en_us, status);
1047
1048    if (mc == NULL || U_FAILURE(status))
1049    {
1050        errln("Failed to create a My4146160Collator.");
1051        delete mc;
1052        return;
1053    }
1054
1055    mc->getCollationKey("1", key, status);
1056
1057    if (key.isBogus() || U_FAILURE(status))
1058    {
1059        errln("Failure to get a CollationKey from a My4146160Collator.");
1060        delete mc;
1061        return;
1062    }
1063
1064    if (My4146160Collator::count < 1)
1065    {
1066        errln("My4146160Collator::createCollationElementIterator not called for getCollationKey");
1067    }
1068
1069    My4146160Collator::count = 0;
1070    mc->compare("1", "2");
1071
1072    if (My4146160Collator::count < 1)
1073    {
1074        errln("My4146160Collator::createtCollationElementIterator not called for compare");
1075    }
1076
1077    delete mc;
1078#endif
1079}
1080
1081// Ticket 7189
1082//
1083// nextSortKeyPart incorrect for EO_S1 collation
1084static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) {
1085    UCharIterator uiter;
1086    uint32_t state[2] = { 0, 0 };
1087    int32_t keyLen;
1088    int32_t count = 8;
1089
1090    uiter_setString(&uiter, text, len);
1091    keyLen = 0;
1092    while (TRUE) {
1093        int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status);
1094        if (U_FAILURE(status)) {
1095            return -1;
1096        }
1097        if (keyPartLen == 0) {
1098            break;
1099        }
1100        keyLen += keyPartLen;
1101    }
1102    return keyLen;
1103}
1104
1105void CollationRegressionTest::TestT7189() {
1106    UErrorCode status = U_ZERO_ERROR;
1107    UCollator *coll;
1108    uint32_t i;
1109
1110    static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1111    // "Achter De Hoven"
1112        { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1113        // "ABC"
1114        { 0x41, 0x42, 0x43, 0x00 },
1115        // "HELLO world!"
1116        { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1117    };
1118
1119    static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1120    // "Achter de Hoven"
1121        { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1122        // "abc"
1123        { 0x61, 0x62, 0x63, 0x00 },
1124        // "hello world!"
1125        { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1126    };
1127
1128    // Open the collator
1129    coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status);
1130    if (U_FAILURE(status)) {
1131        errln("Failed to create a collator for short string EO_S1");
1132        return;
1133    }
1134
1135    for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * sizeof(UChar)); i++) {
1136        uint8_t key1[100], key2[100];
1137        int32_t len1, len2;
1138
1139        len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status);
1140        if (U_FAILURE(status)) {
1141            errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]);
1142            break;
1143        }
1144        len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status);
1145        if (U_FAILURE(status)) {
1146            errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]);
1147            break;
1148        }
1149
1150        if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) {
1151            errln(UnicodeString("Failed: Identical key\n") + "    text1: " + text1[i] + "\n" + "    text2: " + text2[i] + "\n" + "    key  : " + TestUtility::hex(key1, len1));
1152        } else {
1153            logln(UnicodeString("Keys produced -\n") + "    text1: " + text1[i] + "\n" + "    key1 : " + TestUtility::hex(key1, len1) + "\n" + "    text2: " + text2[i] + "\n" + "    key2 : "
1154                    + TestUtility::hex(key2, len2));
1155        }
1156    }
1157    ucol_close(coll);
1158}
1159
1160void CollationRegressionTest::TestCaseFirstCompression() {
1161    RuleBasedCollator *col = (RuleBasedCollator *) en_us->clone();
1162    UErrorCode status = U_ZERO_ERROR;
1163
1164    // default
1165    caseFirstCompressionSub(col, "default");
1166
1167    // Upper first
1168    col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
1169    if (U_FAILURE(status)) {
1170        errln("Failed to set UCOL_UPPER_FIRST");
1171        return;
1172    }
1173    caseFirstCompressionSub(col, "upper first");
1174
1175    // Lower first
1176    col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
1177    if (U_FAILURE(status)) {
1178        errln("Failed to set UCOL_LOWER_FIRST");
1179        return;
1180    }
1181    caseFirstCompressionSub(col, "lower first");
1182
1183    delete col;
1184}
1185
1186void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeString opt) {
1187    const int32_t maxLength = 50;
1188
1189    UChar str1[maxLength];
1190    UChar str2[maxLength];
1191
1192    CollationKey key1, key2;
1193
1194    for (int32_t len = 1; len <= maxLength; len++) {
1195        int32_t i = 0;
1196        for (; i < len - 1; i++) {
1197            str1[i] = str2[i] = (UChar)0x61; // 'a'
1198        }
1199        str1[i] = (UChar)0x41; // 'A'
1200        str2[i] = (UChar)0x61; // 'a'
1201
1202        UErrorCode status = U_ZERO_ERROR;
1203        col->getCollationKey(str1, len, key1, status);
1204        col->getCollationKey(str2, len, key2, status);
1205
1206        UCollationResult cmpKey = key1.compareTo(key2, status);
1207        UCollationResult cmpCol = col->compare(str1, len, str2, len, status);
1208
1209        if (U_FAILURE(status)) {
1210            errln("Error in caseFirstCompressionSub");
1211        } else if (cmpKey != cmpCol) {
1212            errln((UnicodeString)"Inconsistent comparison(" + opt
1213                + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeString(str2, len)
1214                + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol);
1215        }
1216    }
1217}
1218
1219
1220
1221void CollationRegressionTest::compareArray(Collator &c,
1222                                           const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],
1223                                           int32_t testCount)
1224{
1225    int32_t i;
1226    Collator::EComparisonResult expectedResult = Collator::EQUAL;
1227
1228    for (i = 0; i < testCount; i += 3)
1229    {
1230        UnicodeString source(tests[i]);
1231        UnicodeString comparison(tests[i + 1]);
1232        UnicodeString target(tests[i + 2]);
1233
1234        if (comparison == "<")
1235        {
1236            expectedResult = Collator::LESS;
1237        }
1238        else if (comparison == ">")
1239        {
1240            expectedResult = Collator::GREATER;
1241        }
1242        else if (comparison == "=")
1243        {
1244            expectedResult = Collator::EQUAL;
1245        }
1246        else
1247        {
1248            UnicodeString bogus1("Bogus comparison string \"");
1249            UnicodeString bogus2("\"");
1250            errln(bogus1 + comparison + bogus2);
1251        }
1252
1253        Collator::EComparisonResult compareResult = c.compare(source, target);
1254
1255        CollationKey sourceKey, targetKey;
1256        UErrorCode status = U_ZERO_ERROR;
1257
1258        c.getCollationKey(source, sourceKey, status);
1259
1260        if (U_FAILURE(status))
1261        {
1262            errln("Couldn't get collationKey for source");
1263            continue;
1264        }
1265
1266        c.getCollationKey(target, targetKey, status);
1267
1268        if (U_FAILURE(status))
1269        {
1270            errln("Couldn't get collationKey for target");
1271            continue;
1272        }
1273
1274        Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);
1275
1276        reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
1277
1278    }
1279}
1280
1281void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
1282{
1283    int32_t c1, c2, count = 0;
1284    UErrorCode status = U_ZERO_ERROR;
1285
1286    do
1287    {
1288        c1 = i1.next(status);
1289        c2 = i2.next(status);
1290
1291        if (c1 != c2)
1292        {
1293            UnicodeString msg, msg1("    ");
1294
1295            msg += msg1 + count;
1296            msg += ": strength(0x";
1297            appendHex(c1, 8, msg);
1298            msg += ") != strength(0x";
1299            appendHex(c2, 8, msg);
1300            msg += ")";
1301
1302            errln(msg);
1303            break;
1304        }
1305
1306        count += 1;
1307    }
1308    while (c1 != CollationElementIterator::NULLORDER);
1309}
1310
1311void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */)
1312{
1313    if (exec)
1314    {
1315        logln("Collation Regression Tests: ");
1316    }
1317
1318    if(en_us) {
1319      switch (index)
1320      {
1321          case  0: name = "Test4048446"; if (exec) Test4048446(/* par */); break;
1322          case  1: name = "Test4051866"; if (exec) Test4051866(/* par */); break;
1323          case  2: name = "Test4053636"; if (exec) Test4053636(/* par */); break;
1324          case  3: name = "Test4054238"; if (exec) Test4054238(/* par */); break;
1325          case  4: name = "Test4054734"; if (exec) Test4054734(/* par */); break;
1326          case  5: name = "Test4054736"; if (exec) Test4054736(/* par */); break;
1327          case  6: name = "Test4058613"; if (exec) Test4058613(/* par */); break;
1328          case  7: name = "Test4059820"; if (exec) Test4059820(/* par */); break;
1329          case  8: name = "Test4060154"; if (exec) Test4060154(/* par */); break;
1330          case  9: name = "Test4062418"; if (exec) Test4062418(/* par */); break;
1331          case 10: name = "Test4065540"; if (exec) Test4065540(/* par */); break;
1332          case 11: name = "Test4066189"; if (exec) Test4066189(/* par */); break;
1333          case 12: name = "Test4066696"; if (exec) Test4066696(/* par */); break;
1334          case 13: name = "Test4076676"; if (exec) Test4076676(/* par */); break;
1335          case 14: name = "Test4078588"; if (exec) Test4078588(/* par */); break;
1336          case 15: name = "Test4079231"; if (exec) Test4079231(/* par */); break;
1337          case 16: name = "Test4081866"; if (exec) Test4081866(/* par */); break;
1338          case 17: name = "Test4087241"; if (exec) Test4087241(/* par */); break;
1339          case 18: name = "Test4087243"; if (exec) Test4087243(/* par */); break;
1340          case 19: name = "Test4092260"; if (exec) Test4092260(/* par */); break;
1341          case 20: name = "Test4095316"; if (exec) Test4095316(/* par */); break;
1342          case 21: name = "Test4101940"; if (exec) Test4101940(/* par */); break;
1343          case 22: name = "Test4103436"; if (exec) Test4103436(/* par */); break;
1344          case 23: name = "Test4114076"; if (exec) Test4114076(/* par */); break;
1345          case 24: name = "Test4114077"; if (exec) Test4114077(/* par */); break;
1346          case 25: name = "Test4124632"; if (exec) Test4124632(/* par */); break;
1347          case 26: name = "Test4132736"; if (exec) Test4132736(/* par */); break;
1348          case 27: name = "Test4133509"; if (exec) Test4133509(/* par */); break;
1349          case 28: name = "Test4139572"; if (exec) Test4139572(/* par */); break;
1350          case 29: name = "Test4141640"; if (exec) Test4141640(/* par */); break;
1351          case 30: name = "Test4146160"; if (exec) Test4146160(/* par */); break;
1352          case 31: name = "TestT7189";   if (exec) TestT7189(); break;
1353          case 32: name = "TestCaseFirstCompression"; if (exec) TestCaseFirstCompression(); break;
1354          default: name = ""; break;
1355      }
1356    } else {
1357      dataerrln("Class collator not instantiated");
1358      name = "";
1359    }
1360}
1361
1362#endif /* #if !UCONFIG_NO_COLLATION */
1363