1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7#include "unicode/utypes.h"
8
9#if !UCONFIG_NO_COLLATION
10
11#include "unicode/coll.h"
12#include "unicode/localpointer.h"
13#include "unicode/tblcoll.h"
14#include "unicode/unistr.h"
15#include "unicode/sortkey.h"
16#include "regcoll.h"
17#include "sfwdchit.h"
18#include "testutil.h"
19#include "cmemory.h"
20
21#define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))
22
23CollationRegressionTest::CollationRegressionTest()
24{
25    UErrorCode status = U_ZERO_ERROR;
26
27    en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status);
28    if(U_FAILURE(status)) {
29      delete en_us;
30      en_us = 0;
31      errcheckln(status, "Collator creation failed with %s", u_errorName(status));
32      return;
33    }
34}
35
36CollationRegressionTest::~CollationRegressionTest()
37{
38    delete en_us;
39}
40
41
42    // @bug 4048446
43//
44// CollationElementIterator.reset() doesn't work
45//
46void CollationRegressionTest::Test4048446(/* char* par */)
47{
48    const UnicodeString test1 = "XFILE What subset of all possible test cases has the highest probability of detecting the most errors?";
49    const UnicodeString test2 = "Xf_ile What subset of all possible test cases has the lowest probability of detecting the least errors?";
50    CollationElementIterator *i1 = en_us->createCollationElementIterator(test1);
51    CollationElementIterator *i2 = en_us->createCollationElementIterator(test1);
52    UErrorCode status = U_ZERO_ERROR;
53
54    if (i1 == NULL|| i2 == NULL)
55    {
56        errln("Could not create CollationElementIterator's");
57        delete i1;
58        delete i2;
59        return;
60    }
61
62    while (i1->next(status) != CollationElementIterator::NULLORDER)
63    {
64        if (U_FAILURE(status))
65        {
66            errln("error calling next()");
67
68            delete i1;
69            delete i2;
70            return;
71        }
72    }
73
74    i1->reset();
75
76    assertEqual(*i1, *i2);
77
78    delete i1;
79    delete i2;
80}
81
82// @bug 4051866
83//
84// Collator -> rules -> Collator round-trip broken for expanding characters
85//
86void CollationRegressionTest::Test4051866(/* char* par */)
87{
88    UnicodeString rules;
89    UErrorCode status = U_ZERO_ERROR;
90
91    rules += "&n < o ";
92    rules += "& oe ,o";
93    rules += (UChar)0x3080;
94    rules += "& oe ,";
95    rules += (UChar)0x1530;
96    rules += " ,O";
97    rules += "& OE ,O";
98    rules += (UChar)0x3080;
99    rules += "& OE ,";
100    rules += (UChar)0x1520;
101    rules += "< p ,P";
102
103    // Build a collator containing expanding characters
104    LocalPointer<RuleBasedCollator> c1(new RuleBasedCollator(rules, status), status);
105    if (U_FAILURE(status)) {
106        errln("RuleBasedCollator(rule string) failed - %s", u_errorName(status));
107        return;
108    }
109
110    // Build another using the rules from  the first
111    LocalPointer<RuleBasedCollator> c2(new RuleBasedCollator(c1->getRules(), status), status);
112    if (U_FAILURE(status)) {
113        errln("RuleBasedCollator(rule string from other RBC) failed - %s", u_errorName(status));
114        return;
115    }
116
117    // Make sure they're the same
118    if (!(c1->getRules() == c2->getRules()))
119    {
120        errln("Rules are not equal");
121    }
122}
123
124// @bug 4053636
125//
126// Collator thinks "black-bird" == "black"
127//
128void CollationRegressionTest::Test4053636(/* char* par */)
129{
130    if (en_us->equals("black_bird", "black"))
131    {
132        errln("black-bird == black");
133    }
134}
135
136// @bug 4054238
137//
138// CollationElementIterator will not work correctly if the associated
139// Collator object's mode is changed
140//
141void CollationRegressionTest::Test4054238(/* char* par */)
142{
143    const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0};
144    const UnicodeString test3(chars3);
145    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
146
147    // NOTE: The Java code uses en_us to create the CollationElementIterators
148    // but I'm pretty sure that's wrong, so I've changed this to use c.
149    UErrorCode status = U_ZERO_ERROR;
150    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
151    CollationElementIterator *i1 = c->createCollationElementIterator(test3);
152    delete i1;
153    delete c;
154}
155
156// @bug 4054734
157//
158// Collator::IDENTICAL documented but not implemented
159//
160void CollationRegressionTest::Test4054734(/* char* par */)
161{
162    /*
163        Here's the original Java:
164
165        String[] decomp = {
166            "\u0001",   "<",    "\u0002",
167            "\u0001",   "=",    "\u0001",
168            "A\u0001",  ">",    "~\u0002",      // Ensure A and ~ are not compared bitwise
169            "\u00C0",   "=",    "A\u0300"       // Decomp should make these equal
170        };
171
172        String[] nodecomp = {
173            "\u00C0",   ">",    "A\u0300"       // A-grave vs. A combining-grave
174        };
175    */
176
177    static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] =
178    {
179        {0x0001, 0},      {0x3c, 0}, {0x0002, 0},
180        {0x0001, 0},      {0x3d, 0}, {0x0001, 0},
181        {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0},
182        {0x00c0, 0},      {0x3d, 0}, {0x41, 0x0300, 0}
183    };
184
185
186    UErrorCode status = U_ZERO_ERROR;
187    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
188
189    c->setStrength(Collator::IDENTICAL);
190
191    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
192    compareArray(*c, decomp, ARRAY_LENGTH(decomp));
193
194    delete c;
195}
196
197// @bug 4054736
198//
199// Full Decomposition mode not implemented
200//
201void CollationRegressionTest::Test4054736(/* char* par */)
202{
203    UErrorCode status = U_ZERO_ERROR;
204    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
205
206    c->setStrength(Collator::SECONDARY);
207    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
208
209    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
210    {
211        {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC}  // Alef-Lamed vs. Alef, Lamed
212    };
213
214    compareArray(*c, tests, ARRAY_LENGTH(tests));
215
216    delete c;
217}
218
219// @bug 4058613
220//
221// Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
222//
223void CollationRegressionTest::Test4058613(/* char* par */)
224{
225    // Creating a default collator doesn't work when Korean is the default
226    // locale
227
228    Locale oldDefault = Locale::getDefault();
229    UErrorCode status = U_ZERO_ERROR;
230
231    Locale::setDefault(Locale::getKorean(), status);
232
233    if (U_FAILURE(status))
234    {
235        errln("Could not set default locale to Locale::KOREAN");
236        return;
237    }
238
239    Collator *c = NULL;
240
241    c = Collator::createInstance("en_US", status);
242
243    if (c == NULL || U_FAILURE(status))
244    {
245        errln("Could not create a Korean collator");
246        Locale::setDefault(oldDefault, status);
247        delete c;
248        return;
249    }
250
251    // Since the fix to this bug was to turn off decomposition for Korean collators,
252    // ensure that's what we got
253    if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
254    {
255      errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator");
256    }
257
258    delete c;
259
260    Locale::setDefault(oldDefault, status);
261}
262
263// @bug 4059820
264//
265// RuleBasedCollator.getRules does not return the exact pattern as input
266// for expanding character sequences
267//
268void CollationRegressionTest::Test4059820(/* char* par */)
269{
270    UErrorCode status = U_ZERO_ERROR;
271
272    RuleBasedCollator *c = NULL;
273    UnicodeString rules = "&9 < a < b , c/a < d < z";
274
275    c = new RuleBasedCollator(rules, status);
276
277    if (c == NULL || U_FAILURE(status))
278    {
279        errln("Failure building a collator.");
280        delete c;
281        return;
282    }
283
284    if ( c->getRules().indexOf("c/a") == -1)
285    {
286        errln("returned rules do not contain 'c/a'");
287    }
288
289    delete c;
290}
291
292// @bug 4060154
293//
294// MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I"
295//
296void CollationRegressionTest::Test4060154(/* char* par */)
297{
298    UErrorCode status = U_ZERO_ERROR;
299    UnicodeString rules;
300
301    rules += "&f < g, G < h, H < i, I < j, J";
302    rules +=  " & H < ";
303    rules += (UChar)0x0131;
304    rules += ", ";
305    rules += (UChar)0x0130;
306    rules += ", i, I";
307
308    RuleBasedCollator *c = NULL;
309
310    c = new RuleBasedCollator(rules, status);
311
312    if (c == NULL || U_FAILURE(status))
313    {
314        errln("failure building collator.");
315        delete c;
316        return;
317    }
318
319    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
320
321 /*
322    String[] tertiary = {
323        "A",        "<",    "B",
324        "H",        "<",    "\u0131",
325        "H",        "<",    "I",
326        "\u0131",   "<",    "\u0130",
327        "\u0130",   "<",    "i",
328        "\u0130",   ">",    "H",
329    };
330*/
331
332    static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] =
333    {
334        {0x41, 0},    {0x3c, 0}, {0x42, 0},
335        {0x48, 0},    {0x3c, 0}, {0x0131, 0},
336        {0x48, 0},    {0x3c, 0}, {0x49, 0},
337        {0x0131, 0}, {0x3c, 0}, {0x0130, 0},
338        {0x0130, 0}, {0x3c, 0}, {0x69, 0},
339        {0x0130, 0}, {0x3e, 0}, {0x48, 0}
340    };
341
342    c->setStrength(Collator::TERTIARY);
343    compareArray(*c, tertiary, ARRAY_LENGTH(tertiary));
344
345    /*
346    String[] secondary = {
347        "H",        "<",    "I",
348        "\u0131",   "=",    "\u0130",
349    };
350*/
351    static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] =
352    {
353        {0x48, 0},    {0x3c, 0}, {0x49, 0},
354        {0x0131, 0}, {0x3d, 0}, {0x0130, 0}
355    };
356
357    c->setStrength(Collator::PRIMARY);
358    compareArray(*c, secondary, ARRAY_LENGTH(secondary));
359
360    delete c;
361}
362
363// @bug 4062418
364//
365// Secondary/Tertiary comparison incorrect in French Secondary
366//
367void CollationRegressionTest::Test4062418(/* char* par */)
368{
369    UErrorCode status = U_ZERO_ERROR;
370
371    RuleBasedCollator *c = NULL;
372
373    c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status);
374
375    if (c == NULL || U_FAILURE(status))
376    {
377        errln("Failed to create collator for Locale::getCanadaFrench()");
378        delete c;
379        return;
380    }
381
382    c->setStrength(Collator::SECONDARY);
383
384/*
385    String[] tests = {
386            "p\u00eache",    "<",    "p\u00e9ch\u00e9",    // Comparing accents from end, p\u00e9ch\u00e9 is greater
387    };
388*/
389    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
390    {
391        {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x68, 0x00E9, 0}
392    };
393
394    compareArray(*c, tests, ARRAY_LENGTH(tests));
395
396    delete c;
397}
398
399// @bug 4065540
400//
401// Collator::compare() method broken if either string contains spaces
402//
403void CollationRegressionTest::Test4065540(/* char* par */)
404{
405    if (en_us->compare("abcd e", "abcd f") == 0)
406    {
407        errln("'abcd e' == 'abcd f'");
408    }
409}
410
411// @bug 4066189
412//
413// Unicode characters need to be recursively decomposed to get the
414// correct result. For example,
415// u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300.
416//
417void CollationRegressionTest::Test4066189(/* char* par */)
418{
419    static const UChar chars1[] = {0x1EB1, 0};
420    static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0};
421    const UnicodeString test1(chars1);
422    const UnicodeString test2(chars2);
423    UErrorCode status = U_ZERO_ERROR;
424
425    // NOTE: The java code used en_us to create the
426    // CollationElementIterator's. I'm pretty sure that
427    // was wrong, so I've change the code to use c1 and c2
428    RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone();
429    c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
430    CollationElementIterator *i1 = c1->createCollationElementIterator(test1);
431
432    RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone();
433    c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
434    CollationElementIterator *i2 = c2->createCollationElementIterator(test2);
435
436    assertEqual(*i1, *i2);
437
438    delete i2;
439    delete c2;
440    delete i1;
441    delete c1;
442}
443
444// @bug 4066696
445//
446// French secondary collation checking at the end of compare iteration fails
447//
448void CollationRegressionTest::Test4066696(/* char* par */)
449{
450    UErrorCode status = U_ZERO_ERROR;
451    RuleBasedCollator *c = NULL;
452
453    c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status);
454
455    if (c == NULL || U_FAILURE(status))
456    {
457        errln("Failure creating collator for Locale::getCanadaFrench()");
458        delete c;
459        return;
460    }
461
462    c->setStrength(Collator::SECONDARY);
463
464/*
465    String[] tests = {
466        "\u00e0",   "<",     "\u01fa",       // a-grave <  A-ring-acute
467    };
468
469  should be:
470
471    String[] tests = {
472        "\u00e0",   ">",     "\u01fa",       // a-grave <  A-ring-acute
473    };
474
475*/
476
477    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
478    {
479        {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0}
480    };
481
482    compareArray(*c, tests, ARRAY_LENGTH(tests));
483
484    delete c;
485}
486
487// @bug 4076676
488//
489// Bad canonicalization of same-class combining characters
490//
491void CollationRegressionTest::Test4076676(/* char* par */)
492{
493    // These combining characters are all in the same class, so they should not
494    // be reordered, and they should compare as unequal.
495    static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0};
496    static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0};
497
498    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
499    c->setStrength(Collator::TERTIARY);
500
501    if (c->compare(s1,s2) == 0)
502    {
503        errln("Same-class combining chars were reordered");
504    }
505
506    delete c;
507}
508
509// @bug 4079231
510//
511// RuleBasedCollator::operator==(NULL) throws NullPointerException
512//
513void CollationRegressionTest::Test4079231(/* char* par */)
514{
515    // I don't think there's any way to write this test
516    // in C++. The following is equivalent to the Java,
517    // but doesn't compile 'cause NULL can't be converted
518    // to Collator&
519    //
520    // if (en_us->operator==(NULL))
521    // {
522    //     errln("en_us->operator==(NULL) returned TRUE");
523    // }
524
525 /*
526   try {
527        if (en_us->equals(null)) {
528            errln("en_us->equals(null) returned true");
529        }
530    }
531    catch (Exception e) {
532        errln("en_us->equals(null) threw " + e.toString());
533    }
534*/
535}
536
537// @bug 4078588
538//
539// RuleBasedCollator breaks on "< a < bb" rule
540//
541void CollationRegressionTest::Test4078588(/* char *par */)
542{
543    UErrorCode status = U_ZERO_ERROR;
544    RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status);
545
546    if (rbc == NULL || U_FAILURE(status))
547    {
548        errln("Failed to create RuleBasedCollator.");
549        delete rbc;
550        return;
551    }
552
553    Collator::EComparisonResult result = rbc->compare("a","bb");
554
555    if (result != Collator::LESS)
556    {
557        errln((UnicodeString)"Compare(a,bb) returned " + (int)result
558            + (UnicodeString)"; expected -1");
559    }
560
561    delete rbc;
562}
563
564// @bug 4081866
565//
566// Combining characters in different classes not reordered properly.
567//
568void CollationRegressionTest::Test4081866(/* char* par */)
569{
570    // These combining characters are all in different classes,
571    // so they should be reordered and the strings should compare as equal.
572    static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0};
573    static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0};
574
575    UErrorCode status = U_ZERO_ERROR;
576    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
577    c->setStrength(Collator::TERTIARY);
578
579    // Now that the default collators are set to NO_DECOMPOSITION
580    // (as a result of fixing bug 4114077), we must set it explicitly
581    // when we're testing reordering behavior.  -- lwerner, 5/5/98
582    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
583
584    if (c->compare(s1,s2) != 0)
585    {
586        errln("Combining chars were not reordered");
587    }
588
589    delete c;
590}
591
592// @bug 4087241
593//
594// string comparison errors in Scandinavian collators
595//
596void CollationRegressionTest::Test4087241(/* char* par */)
597{
598    UErrorCode status = U_ZERO_ERROR;
599    Locale da_DK("da", "DK");
600    RuleBasedCollator *c = NULL;
601
602    c = (RuleBasedCollator *) Collator::createInstance(da_DK, status);
603
604    if (c == NULL || U_FAILURE(status))
605    {
606        errln("Failed to create collator for da_DK locale");
607        delete c;
608        return;
609    }
610
611    c->setStrength(Collator::SECONDARY);
612
613    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
614    {
615        {0x7a, 0},          {0x3c, 0}, {0x00E6, 0},            // z        < ae
616        {0x61, 0x0308, 0},  {0x3c, 0}, {0x61, 0x030A, 0},      // a-umlaut < a-ring
617        {0x59, 0},          {0x3c, 0}, {0x75, 0x0308, 0},      // Y        < u-umlaut
618    };
619
620    compareArray(*c, tests, ARRAY_LENGTH(tests));
621
622    delete c;
623}
624
625// @bug 4087243
626//
627// CollationKey takes ignorable strings into account when it shouldn't
628//
629void CollationRegressionTest::Test4087243(/* char* par */)
630{
631    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
632    c->setStrength(Collator::TERTIARY);
633
634    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
635    {
636        {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0}    // 1 2 3  =  1 2 3 ctrl-A
637    };
638
639    compareArray(*c, tests, ARRAY_LENGTH(tests));
640
641    delete c;
642}
643
644// @bug 4092260
645//
646// Mu/micro conflict
647// Micro symbol and greek lowercase letter Mu should sort identically
648//
649void CollationRegressionTest::Test4092260(/* char* par */)
650{
651    UErrorCode status = U_ZERO_ERROR;
652    Locale el("el", "");
653    Collator *c = NULL;
654
655    c = Collator::createInstance(el, status);
656
657    if (c == NULL || U_FAILURE(status))
658    {
659        errln("Failed to create collator for el locale.");
660        delete c;
661        return;
662    }
663
664    // These now have tertiary differences in UCA
665    c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
666
667    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
668    {
669        {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0}
670    };
671
672    compareArray(*c, tests, ARRAY_LENGTH(tests));
673
674    delete c;
675}
676
677// @bug 4095316
678//
679void CollationRegressionTest::Test4095316(/* char* par */)
680{
681    UErrorCode status = U_ZERO_ERROR;
682    Locale el_GR("el", "GR");
683    Collator *c = Collator::createInstance(el_GR, status);
684
685    if (c == NULL || U_FAILURE(status))
686    {
687        errln("Failed to create collator for el_GR locale");
688        delete c;
689        return;
690    }
691    // These now have tertiary differences in UCA
692    //c->setStrength(Collator::TERTIARY);
693    c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status);
694
695    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
696    {
697        {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0}
698    };
699
700    compareArray(*c, tests, ARRAY_LENGTH(tests));
701
702    delete c;
703}
704
705// @bug 4101940
706//
707void CollationRegressionTest::Test4101940(/* char* par */)
708{
709    UErrorCode status = U_ZERO_ERROR;
710    RuleBasedCollator *c = NULL;
711    UnicodeString rules = "&9 < a < b";
712    UnicodeString nothing = "";
713
714    c = new RuleBasedCollator(rules, status);
715
716    if (c == NULL || U_FAILURE(status))
717    {
718        errln("Failed to create RuleBasedCollator");
719        delete c;
720        return;
721    }
722
723    CollationElementIterator *i = c->createCollationElementIterator(nothing);
724    i->reset();
725
726    if (i->next(status) != CollationElementIterator::NULLORDER)
727    {
728        errln("next did not return NULLORDER");
729    }
730
731    delete i;
732    delete c;
733}
734
735// @bug 4103436
736//
737// Collator::compare not handling spaces properly
738//
739void CollationRegressionTest::Test4103436(/* char* par */)
740{
741    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
742    c->setStrength(Collator::TERTIARY);
743
744    static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] =
745    {
746        {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0},
747        {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}
748    };
749
750    compareArray(*c, tests, ARRAY_LENGTH(tests));
751
752    delete c;
753}
754
755// @bug 4114076
756//
757// Collation not Unicode conformant with Hangul syllables
758//
759void CollationRegressionTest::Test4114076(/* char* par */)
760{
761    UErrorCode status = U_ZERO_ERROR;
762    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
763    c->setStrength(Collator::TERTIARY);
764
765    //
766    // With Canonical decomposition, Hangul syllables should get decomposed
767    // into Jamo, but Jamo characters should not be decomposed into
768    // conjoining Jamo
769    //
770    static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
771    {
772        {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0}
773    };
774
775    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
776    compareArray(*c, test1, ARRAY_LENGTH(test1));
777
778    // From UTR #15:
779    // *In earlier versions of Unicode, jamo characters like ksf
780    //  had compatibility mappings to kf + sf. These mappings were
781    //  removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
782    // That is, the following test is obsolete as of 2.1.9
783
784//obsolete-    // With Full decomposition, it should go all the way down to
785//obsolete-    // conjoining Jamo characters.
786//obsolete-    //
787//obsolete-    static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
788//obsolete-    {
789//obsolete-        {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11c2, 0}
790//obsolete-    };
791//obsolete-
792//obsolete-    c->setDecomposition(Normalizer::DECOMP_COMPAT);
793//obsolete-    compareArray(*c, test2, ARRAY_LENGTH(test2));
794
795    delete c;
796}
797
798
799// @bug 4124632
800//
801// Collator::getCollationKey was hanging on certain character sequences
802//
803void CollationRegressionTest::Test4124632(/* char* par */)
804{
805    UErrorCode status = U_ZERO_ERROR;
806    Collator *coll = NULL;
807
808    coll = Collator::createInstance(Locale::getJapan(), status);
809
810    if (coll == NULL || U_FAILURE(status))
811    {
812        errln("Failed to create collator for Locale::JAPAN");
813        delete coll;
814        return;
815    }
816
817    static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0};
818    CollationKey key;
819
820    coll->getCollationKey(test, key, status);
821
822    if (key.isBogus() || U_FAILURE(status))
823    {
824        errln("CollationKey creation failed.");
825    }
826
827    delete coll;
828}
829
830// @bug 4132736
831//
832// sort order of french words with multiple accents has errors
833//
834void CollationRegressionTest::Test4132736(/* char* par */)
835{
836    UErrorCode status = U_ZERO_ERROR;
837
838    Collator *c = NULL;
839
840    c = Collator::createInstance(Locale::getCanadaFrench(), status);
841    c->setStrength(Collator::TERTIARY);
842
843    if (c == NULL || U_FAILURE(status))
844    {
845        errln("Failed to create a collator for Locale::getCanadaFrench()");
846        delete c;
847        return;
848    }
849
850    static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
851    {
852        {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300, 0},
853        {0x65, 0x0300, 0x0301, 0},       {0x3c, 0}, {0x65, 0x0301, 0x0300, 0}
854    };
855
856    compareArray(*c, test1, ARRAY_LENGTH(test1));
857
858    delete c;
859}
860
861// @bug 4133509
862//
863// The sorting using java.text.CollationKey is not in the exact order
864//
865void CollationRegressionTest::Test4133509(/* char* par */)
866{
867    static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
868    {
869        {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x69, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72, 0},
870        {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0},      {0x3c, 0}, {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0},
871        {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0},                  {0x3c, 0}, {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0}
872    };
873
874    compareArray(*en_us, test1, ARRAY_LENGTH(test1));
875}
876
877// @bug 4114077
878//
879// Collation with decomposition off doesn't work for Europe
880//
881void CollationRegressionTest::Test4114077(/* char* par */)
882{
883    // Ensure that we get the same results with decomposition off
884    // as we do with it on....
885
886    UErrorCode status = U_ZERO_ERROR;
887    RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
888    c->setStrength(Collator::TERTIARY);
889
890    static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
891    {
892        {0x00C0, 0},                     {0x3d, 0}, {0x41, 0x0300, 0},            // Should be equivalent
893        {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x68, 0x00e9, 0},
894        {0x0204, 0},                     {0x3d, 0}, {0x45, 0x030F, 0},
895        {0x01fa, 0},                     {0x3d, 0}, {0x41, 0x030a, 0x0301, 0},    // a-ring-acute -> a-ring, acute
896                                                //   -> a, ring, acute
897        {0x41, 0x0300, 0x0316, 0},         {0x3c, 0}, {0x41, 0x0316, 0x0300, 0}        // No reordering --> unequal
898    };
899
900    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status);
901    compareArray(*c, test1, ARRAY_LENGTH(test1));
902
903    static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] =
904    {
905        {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0}      // Reordering --> equal
906    };
907
908    c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
909    compareArray(*c, test2, ARRAY_LENGTH(test2));
910
911    delete c;
912}
913
914// @bug 4141640
915//
916// Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
917//
918void CollationRegressionTest::Test4141640(/* char* par */)
919{
920    //
921    // Rather than just creating a Swedish collator, we might as well
922    // try to instantiate one for every locale available on the system
923    // in order to prevent this sort of bug from cropping up in the future
924    //
925    UErrorCode status = U_ZERO_ERROR;
926    int32_t i, localeCount;
927    const Locale *locales = Locale::getAvailableLocales(localeCount);
928
929    for (i = 0; i < localeCount; i += 1)
930    {
931        Collator *c = NULL;
932
933        status = U_ZERO_ERROR;
934        c = Collator::createInstance(locales[i], status);
935
936        if (c == NULL || U_FAILURE(status))
937        {
938            UnicodeString msg, localeName;
939
940            msg += "Could not create collator for locale ";
941            msg += locales[i].getName();
942
943            errln(msg);
944        }
945
946        delete c;
947    }
948}
949
950// @bug 4139572
951//
952// getCollationKey throws exception for spanish text
953// Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
954//
955void CollationRegressionTest::Test4139572(/* char* par */)
956{
957    //
958    // Code pasted straight from the bug report
959    // (and then translated to C++ ;-)
960    //
961    // create spanish locale and collator
962    UErrorCode status = U_ZERO_ERROR;
963    Locale l("es", "es");
964    Collator *col = NULL;
965
966    col = Collator::createInstance(l, status);
967
968    if (col == NULL || U_FAILURE(status))
969    {
970        errln("Failed to create a collator for es_es locale.");
971        delete col;
972        return;
973    }
974
975    CollationKey key;
976
977    // this spanish phrase kills it!
978    col->getCollationKey("Nombre De Objeto", key, status);
979
980    if (key.isBogus() || U_FAILURE(status))
981    {
982        errln("Error creating CollationKey for \"Nombre De Ojbeto\"");
983    }
984
985    delete col;
986}
987/* HSYS : RuleBasedCollator::compare() performance enhancements
988          compare() does not create CollationElementIterator() anymore.*/
989
990class My4146160Collator : public RuleBasedCollator
991{
992public:
993    My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status);
994    ~My4146160Collator();
995
996    CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const;
997
998    CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const;
999
1000    static int32_t count;
1001};
1002
1003int32_t My4146160Collator::count = 0;
1004
1005My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status)
1006  : RuleBasedCollator(rbc.getRules(), status)
1007{
1008}
1009
1010My4146160Collator::~My4146160Collator()
1011{
1012}
1013
1014CollationElementIterator *My4146160Collator::createCollationElementIterator(const UnicodeString &text) const
1015{
1016    count += 1;
1017    return RuleBasedCollator::createCollationElementIterator(text);
1018}
1019
1020CollationElementIterator *My4146160Collator::createCollationElementIterator(const CharacterIterator &text) const
1021{
1022    count += 1;
1023    return RuleBasedCollator::createCollationElementIterator(text);
1024}
1025
1026// @bug 4146160
1027//
1028// RuleBasedCollator doesn't use createCollationElementIterator internally
1029//
1030void CollationRegressionTest::Test4146160(/* char* par */)
1031{
1032#if 0
1033    //
1034    // Use a custom collator class whose createCollationElementIterator
1035    // methods increment a count....
1036    //
1037    UErrorCode status = U_ZERO_ERROR;
1038    CollationKey key;
1039
1040    My4146160Collator::count = 0;
1041    My4146160Collator *mc = NULL;
1042
1043    mc = new My4146160Collator(*en_us, status);
1044
1045    if (mc == NULL || U_FAILURE(status))
1046    {
1047        errln("Failed to create a My4146160Collator.");
1048        delete mc;
1049        return;
1050    }
1051
1052    mc->getCollationKey("1", key, status);
1053
1054    if (key.isBogus() || U_FAILURE(status))
1055    {
1056        errln("Failure to get a CollationKey from a My4146160Collator.");
1057        delete mc;
1058        return;
1059    }
1060
1061    if (My4146160Collator::count < 1)
1062    {
1063        errln("My4146160Collator::createCollationElementIterator not called for getCollationKey");
1064    }
1065
1066    My4146160Collator::count = 0;
1067    mc->compare("1", "2");
1068
1069    if (My4146160Collator::count < 1)
1070    {
1071        errln("My4146160Collator::createtCollationElementIterator not called for compare");
1072    }
1073
1074    delete mc;
1075#endif
1076}
1077
1078void CollationRegressionTest::Test4179216() {
1079    // you can position a CollationElementIterator in the middle of
1080    // a contracting character sequence, yielding a bogus collation
1081    // element
1082    IcuTestErrorCode errorCode(*this, "Test4179216");
1083    RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat < crunchy", errorCode);
1084    UnicodeString testText = "church church catcatcher runcrunchynchy";
1085    CollationElementIterator *iter = coll.createCollationElementIterator(testText);
1086
1087    // test that the "ch" combination works properly
1088    iter->setOffset(4, errorCode);
1089    int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1090
1091    iter->reset();
1092    int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1093
1094    iter->setOffset(5, errorCode);
1095    int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1096
1097    // Compares and prints only 16-bit primary weights.
1098    if (elt4 != elt0 || elt5 != elt0) {
1099        errln("The collation elements at positions 0 (0x%04x), "
1100                "4 (0x%04x), and 5 (0x%04x) don't match.",
1101                elt0, elt4, elt5);
1102    }
1103
1104    // test that the "cat" combination works properly
1105    iter->setOffset(14, errorCode);
1106    int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1107
1108    iter->setOffset(15, errorCode);
1109    int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1110
1111    iter->setOffset(16, errorCode);
1112    int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1113
1114    iter->setOffset(17, errorCode);
1115    int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1116
1117    iter->setOffset(18, errorCode);
1118    int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1119
1120    iter->setOffset(19, errorCode);
1121    int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode));
1122
1123    // Compares and prints only 16-bit primary weights.
1124    if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17
1125            || elt14 != elt18 || elt14 != elt19) {
1126        errln("\"cat\" elements don't match: elt14 = 0x%04x, "
1127                "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, "
1128                "elt18 = 0x%04x, elt19 = 0x%04x",
1129                elt14, elt15, elt16, elt17, elt18, elt19);
1130    }
1131
1132    // now generate a complete list of the collation elements,
1133    // first using next() and then using setOffset(), and
1134    // make sure both interfaces return the same set of elements
1135    iter->reset();
1136
1137    int32_t elt = iter->next(errorCode);
1138    int32_t count = 0;
1139    while (elt != CollationElementIterator::NULLORDER) {
1140        ++count;
1141        elt = iter->next(errorCode);
1142    }
1143
1144    LocalArray<UnicodeString> nextElements(new UnicodeString[count]);
1145    LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]);
1146    int32_t lastPos = 0;
1147
1148    iter->reset();
1149    elt = iter->next(errorCode);
1150    count = 0;
1151    while (elt != CollationElementIterator::NULLORDER) {
1152        nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
1153        lastPos = iter->getOffset();
1154        elt = iter->next(errorCode);
1155    }
1156    int32_t nextElementsLength = count;
1157    count = 0;
1158    for (int32_t i = 0; i < testText.length(); ) {
1159        iter->setOffset(i, errorCode);
1160        lastPos = iter->getOffset();
1161        elt = iter->next(errorCode);
1162        setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter->getOffset());
1163        i = iter->getOffset();
1164    }
1165    for (int32_t i = 0; i < nextElementsLength; i++) {
1166        if (nextElements[i] == setOffsetElements[i]) {
1167            logln(nextElements[i]);
1168        } else {
1169            errln(UnicodeString("Error: next() yielded ") + nextElements[i] +
1170                ", but setOffset() yielded " + setOffsetElements[i]);
1171        }
1172    }
1173    delete iter;
1174}
1175
1176// Ticket 7189
1177//
1178// nextSortKeyPart incorrect for EO_S1 collation
1179static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) {
1180    UCharIterator uiter;
1181    uint32_t state[2] = { 0, 0 };
1182    int32_t keyLen;
1183    int32_t count = 8;
1184
1185    uiter_setString(&uiter, text, len);
1186    keyLen = 0;
1187    while (TRUE) {
1188        int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status);
1189        if (U_FAILURE(status)) {
1190            return -1;
1191        }
1192        if (keyPartLen == 0) {
1193            break;
1194        }
1195        keyLen += keyPartLen;
1196    }
1197    return keyLen;
1198}
1199
1200void CollationRegressionTest::TestT7189() {
1201    UErrorCode status = U_ZERO_ERROR;
1202    UCollator *coll;
1203    uint32_t i;
1204
1205    static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1206    // "Achter De Hoven"
1207        { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1208        // "ABC"
1209        { 0x41, 0x42, 0x43, 0x00 },
1210        // "HELLO world!"
1211        { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1212    };
1213
1214    static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = {
1215    // "Achter de Hoven"
1216        { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
1217        // "abc"
1218        { 0x61, 0x62, 0x63, 0x00 },
1219        // "hello world!"
1220        { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
1221    };
1222
1223    // Open the collator
1224    coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status);
1225    if (U_FAILURE(status)) {
1226        errln("Failed to create a collator for short string EO_S1");
1227        return;
1228    }
1229
1230    for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * sizeof(UChar)); i++) {
1231        uint8_t key1[100], key2[100];
1232        int32_t len1, len2;
1233
1234        len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status);
1235        if (U_FAILURE(status)) {
1236            errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]);
1237            break;
1238        }
1239        len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status);
1240        if (U_FAILURE(status)) {
1241            errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]);
1242            break;
1243        }
1244
1245        if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) {
1246            errln(UnicodeString("Failed: Identical key\n") + "    text1: " + text1[i] + "\n" + "    text2: " + text2[i] + "\n" + "    key  : " + TestUtility::hex(key1, len1));
1247        } else {
1248            logln(UnicodeString("Keys produced -\n") + "    text1: " + text1[i] + "\n" + "    key1 : " + TestUtility::hex(key1, len1) + "\n" + "    text2: " + text2[i] + "\n" + "    key2 : "
1249                    + TestUtility::hex(key2, len2));
1250        }
1251    }
1252    ucol_close(coll);
1253}
1254
1255void CollationRegressionTest::TestCaseFirstCompression() {
1256    RuleBasedCollator *col = (RuleBasedCollator *) en_us->clone();
1257    UErrorCode status = U_ZERO_ERROR;
1258
1259    // default
1260    caseFirstCompressionSub(col, "default");
1261
1262    // Upper first
1263    col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
1264    if (U_FAILURE(status)) {
1265        errln("Failed to set UCOL_UPPER_FIRST");
1266        return;
1267    }
1268    caseFirstCompressionSub(col, "upper first");
1269
1270    // Lower first
1271    col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
1272    if (U_FAILURE(status)) {
1273        errln("Failed to set UCOL_LOWER_FIRST");
1274        return;
1275    }
1276    caseFirstCompressionSub(col, "lower first");
1277
1278    delete col;
1279}
1280
1281void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeString opt) {
1282    const int32_t maxLength = 50;
1283
1284    UChar str1[maxLength];
1285    UChar str2[maxLength];
1286
1287    CollationKey key1, key2;
1288
1289    for (int32_t len = 1; len <= maxLength; len++) {
1290        int32_t i = 0;
1291        for (; i < len - 1; i++) {
1292            str1[i] = str2[i] = (UChar)0x61; // 'a'
1293        }
1294        str1[i] = (UChar)0x41; // 'A'
1295        str2[i] = (UChar)0x61; // 'a'
1296
1297        UErrorCode status = U_ZERO_ERROR;
1298        col->getCollationKey(str1, len, key1, status);
1299        col->getCollationKey(str2, len, key2, status);
1300
1301        UCollationResult cmpKey = key1.compareTo(key2, status);
1302        UCollationResult cmpCol = col->compare(str1, len, str2, len, status);
1303
1304        if (U_FAILURE(status)) {
1305            errln("Error in caseFirstCompressionSub");
1306        } else if (cmpKey != cmpCol) {
1307            errln((UnicodeString)"Inconsistent comparison(" + opt
1308                + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeString(str2, len)
1309                + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol);
1310        }
1311    }
1312}
1313
1314void CollationRegressionTest::TestTrailingComment() {
1315    // ICU ticket #8070:
1316    // Check that the rule parser handles a comment without terminating end-of-line.
1317    IcuTestErrorCode errorCode(*this, "TestTrailingComment");
1318    RuleBasedCollator coll(UNICODE_STRING_SIMPLE("&c<b#comment1\n<a#comment2"), errorCode);
1319    UnicodeString a((UChar)0x61), b((UChar)0x62), c((UChar)0x63);
1320    assertTrue("c<b", coll.compare(c, b) < 0);
1321    assertTrue("b<a", coll.compare(b, a) < 0);
1322}
1323
1324void CollationRegressionTest::TestBeforeWithTooStrongAfter() {
1325    // ICU ticket #9959:
1326    // Forbid rules with a before-reset followed by a stronger relation.
1327    IcuTestErrorCode errorCode(*this, "TestBeforeWithTooStrongAfter");
1328    RuleBasedCollator before2(UNICODE_STRING_SIMPLE("&[before 2]x<<q<p"), errorCode);
1329    if(errorCode.isSuccess()) {
1330        errln("should forbid before-2-reset followed by primary relation");
1331    } else {
1332        errorCode.reset();
1333    }
1334    RuleBasedCollator before3(UNICODE_STRING_SIMPLE("&[before 3]x<<<q<<s<p"), errorCode);
1335    if(errorCode.isSuccess()) {
1336        errln("should forbid before-3-reset followed by primary or secondary relation");
1337    } else {
1338        errorCode.reset();
1339    }
1340}
1341
1342void CollationRegressionTest::compareArray(Collator &c,
1343                                           const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],
1344                                           int32_t testCount)
1345{
1346    int32_t i;
1347    Collator::EComparisonResult expectedResult = Collator::EQUAL;
1348
1349    for (i = 0; i < testCount; i += 3)
1350    {
1351        UnicodeString source(tests[i]);
1352        UnicodeString comparison(tests[i + 1]);
1353        UnicodeString target(tests[i + 2]);
1354
1355        if (comparison == "<")
1356        {
1357            expectedResult = Collator::LESS;
1358        }
1359        else if (comparison == ">")
1360        {
1361            expectedResult = Collator::GREATER;
1362        }
1363        else if (comparison == "=")
1364        {
1365            expectedResult = Collator::EQUAL;
1366        }
1367        else
1368        {
1369            UnicodeString bogus1("Bogus comparison string \"");
1370            UnicodeString bogus2("\"");
1371            errln(bogus1 + comparison + bogus2);
1372        }
1373
1374        Collator::EComparisonResult compareResult = c.compare(source, target);
1375
1376        CollationKey sourceKey, targetKey;
1377        UErrorCode status = U_ZERO_ERROR;
1378
1379        c.getCollationKey(source, sourceKey, status);
1380
1381        if (U_FAILURE(status))
1382        {
1383            errln("Couldn't get collationKey for source");
1384            continue;
1385        }
1386
1387        c.getCollationKey(target, targetKey, status);
1388
1389        if (U_FAILURE(status))
1390        {
1391            errln("Couldn't get collationKey for target");
1392            continue;
1393        }
1394
1395        Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey);
1396
1397        reportCResult( source, target, sourceKey, targetKey, compareResult, keyResult, compareResult, expectedResult );
1398
1399    }
1400}
1401
1402void CollationRegressionTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2)
1403{
1404    int32_t c1, c2, count = 0;
1405    UErrorCode status = U_ZERO_ERROR;
1406
1407    do
1408    {
1409        c1 = i1.next(status);
1410        c2 = i2.next(status);
1411
1412        if (c1 != c2)
1413        {
1414            UnicodeString msg, msg1("    ");
1415
1416            msg += msg1 + count;
1417            msg += ": strength(0x";
1418            appendHex(c1, 8, msg);
1419            msg += ") != strength(0x";
1420            appendHex(c2, 8, msg);
1421            msg += ")";
1422
1423            errln(msg);
1424            break;
1425        }
1426
1427        count += 1;
1428    }
1429    while (c1 != CollationElementIterator::NULLORDER);
1430}
1431
1432void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /* par */)
1433{
1434    if (exec)
1435    {
1436        logln("Collation Regression Tests: ");
1437    }
1438
1439    if(en_us == NULL) {
1440        dataerrln("Class collator not instantiated");
1441        name = "";
1442        return;
1443    }
1444    TESTCASE_AUTO_BEGIN;
1445    TESTCASE_AUTO(Test4048446);
1446    TESTCASE_AUTO(Test4051866);
1447    TESTCASE_AUTO(Test4053636);
1448    TESTCASE_AUTO(Test4054238);
1449    TESTCASE_AUTO(Test4054734);
1450    TESTCASE_AUTO(Test4054736);
1451    TESTCASE_AUTO(Test4058613);
1452    TESTCASE_AUTO(Test4059820);
1453    TESTCASE_AUTO(Test4060154);
1454    TESTCASE_AUTO(Test4062418);
1455    TESTCASE_AUTO(Test4065540);
1456    TESTCASE_AUTO(Test4066189);
1457    TESTCASE_AUTO(Test4066696);
1458    TESTCASE_AUTO(Test4076676);
1459    TESTCASE_AUTO(Test4078588);
1460    TESTCASE_AUTO(Test4079231);
1461    TESTCASE_AUTO(Test4081866);
1462    TESTCASE_AUTO(Test4087241);
1463    TESTCASE_AUTO(Test4087243);
1464    TESTCASE_AUTO(Test4092260);
1465    TESTCASE_AUTO(Test4095316);
1466    TESTCASE_AUTO(Test4101940);
1467    TESTCASE_AUTO(Test4103436);
1468    TESTCASE_AUTO(Test4114076);
1469    TESTCASE_AUTO(Test4114077);
1470    TESTCASE_AUTO(Test4124632);
1471    TESTCASE_AUTO(Test4132736);
1472    TESTCASE_AUTO(Test4133509);
1473    TESTCASE_AUTO(Test4139572);
1474    TESTCASE_AUTO(Test4141640);
1475    TESTCASE_AUTO(Test4146160);
1476    TESTCASE_AUTO(Test4179216);
1477    TESTCASE_AUTO(TestT7189);
1478    TESTCASE_AUTO(TestCaseFirstCompression);
1479    TESTCASE_AUTO(TestTrailingComment);
1480    TESTCASE_AUTO(TestBeforeWithTooStrongAfter);
1481    TESTCASE_AUTO_END;
1482}
1483
1484#endif /* #if !UCONFIG_NO_COLLATION */
1485