1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2014, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8
9/**
10 * IntlTestCollator is the medium level test class for everything in the directory "collate".
11 */
12
13/***********************************************************************
14* Modification history
15* Date        Name        Description
16* 02/14/2001  synwee      Compare with cintltst and commented away tests
17*                         that are not run.
18***********************************************************************/
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_COLLATION
23
24#include "unicode/localpointer.h"
25#include "unicode/sortkey.h"
26#include "unicode/uchar.h"
27#include "unicode/ustring.h"
28
29#include "encoll.h"
30#include "frcoll.h"
31#include "decoll.h"
32#include "escoll.h"
33#include "ficoll.h"
34#include "jacoll.h"
35#include "trcoll.h"
36#include "allcoll.h"
37#include "g7coll.h"
38#include "mnkytst.h"
39#include "apicoll.h"
40#include "regcoll.h"
41#include "currcoll.h"
42#include "itercoll.h"
43#include "tstnorm.h"
44#include "normconf.h"
45#include "thcoll.h"
46#include "srchtest.h"
47#include "ssearch.h"
48#include "lcukocol.h"
49#include "ucaconf.h"
50#include "svccoll.h"
51#include "cmemory.h"
52#include "alphaindextst.h"
53
54// Set to 1 to test offsets in backAndForth()
55#define TEST_OFFSETS 0
56
57extern IntlTest *createCollationTest();
58
59void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
60{
61    if (exec) {
62        logln("TestSuite Collator: ");
63    }
64
65    TESTCASE_AUTO_BEGIN;
66    TESTCASE_AUTO_CLASS(CollationEnglishTest);
67    TESTCASE_AUTO_CLASS(CollationFrenchTest);
68    TESTCASE_AUTO_CLASS(CollationGermanTest);
69    TESTCASE_AUTO_CLASS(CollationSpanishTest);
70    TESTCASE_AUTO_CLASS(CollationKanaTest);
71    TESTCASE_AUTO_CLASS(CollationTurkishTest);
72    TESTCASE_AUTO_CLASS(CollationDummyTest);
73    TESTCASE_AUTO_CLASS(G7CollationTest);
74    TESTCASE_AUTO_CLASS(CollationMonkeyTest);
75    TESTCASE_AUTO_CLASS(CollationAPITest);
76    TESTCASE_AUTO_CLASS(CollationRegressionTest);
77    TESTCASE_AUTO_CLASS(CollationCurrencyTest);
78    TESTCASE_AUTO_CLASS(CollationIteratorTest);
79    TESTCASE_AUTO_CLASS(CollationThaiTest);
80    TESTCASE_AUTO_CLASS(LotusCollationKoreanTest);
81    TESTCASE_AUTO_CLASS(StringSearchTest);
82    TESTCASE_AUTO_CLASS(UCAConformanceTest);
83    TESTCASE_AUTO_CLASS(CollationServiceTest);
84    TESTCASE_AUTO_CLASS(CollationFinnishTest); // removed by weiv - we have changed Finnish collation
85    TESTCASE_AUTO_CLASS(SSearchTest);
86#if !UCONFIG_NO_NORMALIZATION
87    TESTCASE_AUTO_CLASS(AlphabeticIndexTest);
88#endif
89    TESTCASE_AUTO_CREATE_CLASS(CollationTest);
90    TESTCASE_AUTO_END;
91}
92
93UCollationResult
94IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) {
95  int32_t partialSKResult = 0;
96  uint8_t sBuf[512], tBuf[512];
97  UCharIterator sIter, tIter;
98  uint32_t sState[2], tState[2];
99  int32_t sSize = pieceSize, tSize = pieceSize;
100  int32_t i = 0;
101  status = U_ZERO_ERROR;
102  sState[0] = 0; sState[1] = 0;
103  tState[0] = 0; tState[1] = 0;
104  while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
105    uiter_setString(&sIter, source, sLen);
106    uiter_setString(&tIter, target, tLen);
107    sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status);
108    tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status);
109
110    if(sState[0] != 0 || tState[0] != 0) {
111      log("State != 0 : %08X %08X\n", sState[0], tState[0]);
112    }
113    log("%i ", i++);
114
115    partialSKResult = memcmp(sBuf, tBuf, pieceSize);
116  }
117
118  if(partialSKResult < 0) {
119      return UCOL_LESS;
120  } else if(partialSKResult > 0) {
121    return UCOL_GREATER;
122  } else {
123    return UCOL_EQUAL;
124  }
125}
126
127void
128IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
129{
130  UErrorCode status = U_ZERO_ERROR;
131
132  UCollator *myCollation = col->toUCollator();
133
134  Collator::EComparisonResult compareResult = col->compare(source, target);
135
136  CollationKey srckey, tgtkey;
137  col->getCollationKey(source, srckey, status);
138  col->getCollationKey(target, tgtkey, status);
139  if (U_FAILURE(status)){
140    errln("Creation of collation keys failed\n");
141  }
142  Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey);
143
144  reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result);
145
146    UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
147
148    int32_t sLen = source.length(), tLen = target.length();
149    const UChar* src = source.getBuffer();
150    const UChar* trg = target.getBuffer();
151    UCollationResult compareResultIter = (UCollationResult)result;
152
153    {
154      UCharIterator sIter, tIter;
155      uiter_setString(&sIter, src, sLen);
156      uiter_setString(&tIter, trg, tLen);
157      compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
158      if(compareResultIter != (UCollationResult)result) {
159        errln("Different result for iterative comparison "+source+" "+target);
160      }
161    }
162    /* convert the strings to UTF-8 and do try comparing with char iterator */
163    if(!quick) { /*!QUICK*/
164      char utf8Source[256], utf8Target[256];
165      int32_t utf8SourceLen = 0, utf8TargetLen = 0;
166      u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status);
167      if(U_FAILURE(status)) { /* probably buffer is not big enough */
168        log("Src UTF-8 buffer too small! Will not compare!\n");
169      } else {
170        u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status);
171        if(U_SUCCESS(status)) { /* probably buffer is not big enough */
172          UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result;
173          UCharIterator sIter, tIter;
174          /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
175          uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
176          uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
177       /*uiter_setString(&sIter, source, sLen);
178      uiter_setString(&tIter, target, tLen);*/
179          compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
180          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
181          sIter.move(&sIter, 0, UITER_START);
182          tIter.move(&tIter, 0, UITER_START);
183          compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
184          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
185          if(compareResultUTF8 != compareResultIter) {
186            errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target);
187          }
188          if(compareResultUTF8 != compareResultUTF8Norm) {
189            errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target);
190          }
191        } else {
192          log("Target UTF-8 buffer too small! Did not compare!\n");
193        }
194        if(U_FAILURE(status)) {
195          log("UTF-8 strcoll failed! Ignoring result\n");
196        }
197      }
198    }
199
200    /* testing the partial sortkeys */
201    { /*!QUICK*/
202      int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
203      int32_t partialSizesSize = 1;
204      if(!quick) {
205        partialSizesSize = 7;
206      }
207      int32_t i = 0;
208      log("partial sortkey test piecesize=");
209      for(i = 0; i < partialSizesSize; i++) {
210        UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result;
211        log("%i ", partialSizes[i]);
212
213        partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
214        if(partialSKResult != (UCollationResult)result) {
215          errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")");
216        }
217
218        if(norm != UCOL_ON && !quick) {
219          log("N ");
220          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
221          partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
222          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
223          if(partialSKResult != partialNormalizedSKResult) {
224            errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")");
225          }
226        }
227      }
228      log("\n");
229    }
230/*
231  if (compareResult != result) {
232    errln("String comparison failed in variant test\n");
233  }
234  if (keyResult != result) {
235    errln("Collation key comparison failed in variant test\n");
236  }
237*/
238}
239
240void
241IntlTestCollator::doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result) {
242  doTest(col, UnicodeString(source), UnicodeString(target), result);
243}
244
245void
246IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
247{
248  if(col) {
249    doTestVariant(col, source, target, result);
250    if(result == Collator::LESS) {
251      doTestVariant(col, target, source, Collator::GREATER);
252    } else if (result == Collator::GREATER) {
253      doTestVariant(col, target, source, Collator::LESS);
254    }
255
256    UErrorCode status = U_ZERO_ERROR;
257    LocalPointer<CollationElementIterator> c(((RuleBasedCollator *)col)->createCollationElementIterator(source));
258    logln("Testing iterating source: "+source);
259    backAndForth(*c);
260    c->setText(target, status);
261    logln("Testing iterating target: "+target);
262    backAndForth(*c);
263  }
264}
265
266
267// used for collation result reporting, defined here for convenience
268// (maybe moved later)
269void
270IntlTestCollator::reportCResult( const UnicodeString &source, const UnicodeString &target,
271             CollationKey &sourceKey, CollationKey &targetKey,
272             Collator::EComparisonResult compareResult,
273             Collator::EComparisonResult keyResult,
274                                Collator::EComparisonResult incResult,
275                         Collator::EComparisonResult expectedResult )
276{
277    if (expectedResult < -1 || expectedResult > 1)
278    {
279        errln("***** invalid call to reportCResult ****");
280        return;
281    }
282
283    UBool ok1 = (compareResult == expectedResult);
284    UBool ok2 = (keyResult == expectedResult);
285    UBool ok3 = (incResult == expectedResult);
286
287
288    if (ok1 && ok2 && ok3 && !verbose) {
289        // Keep non-verbose, passing tests fast
290        return;
291    } else {
292        UnicodeString msg1(ok1 ? "Ok: compare(" : "FAIL: compare(");
293        UnicodeString msg2(", "), msg3(") returned "), msg4("; expected ");
294        UnicodeString prettySource, prettyTarget, sExpect, sResult;
295
296        IntlTest::prettify(source, prettySource);
297        IntlTest::prettify(target, prettyTarget);
298        appendCompareResult(compareResult, sResult);
299        appendCompareResult(expectedResult, sExpect);
300
301        if (ok1) {
302            logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
303        } else {
304            errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
305        }
306
307        msg1 = UnicodeString(ok2 ? "Ok: key(" : "FAIL: key(");
308        msg2 = ").compareTo(key(";
309        msg3 = ")) returned ";
310
311        appendCompareResult(keyResult, sResult);
312
313        if (ok2) {
314            logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
315        } else {
316            errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
317
318            msg1 = "  ";
319            msg2 = " vs. ";
320
321            prettify(sourceKey, prettySource);
322            prettify(targetKey, prettyTarget);
323
324            errln(msg1 + prettySource + msg2 + prettyTarget);
325        }
326        msg1 = UnicodeString (ok3 ? "Ok: incCompare(" : "FAIL: incCompare(");
327        msg2 = ", ";
328        msg3 = ") returned ";
329
330        appendCompareResult(incResult, sResult);
331
332        if (ok3) {
333            logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
334        } else {
335            errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
336        }
337    }
338}
339
340UnicodeString&
341IntlTestCollator::appendCompareResult(Collator::EComparisonResult result,
342                  UnicodeString& target)
343{
344    if (result == Collator::LESS)
345    {
346        target += "LESS";
347    }
348    else if (result == Collator::EQUAL)
349    {
350        target += "EQUAL";
351    }
352    else if (result == Collator::GREATER)
353    {
354        target += "GREATER";
355    }
356    else
357    {
358        UnicodeString huh = "?";
359
360        target += (huh + (int32_t)result);
361    }
362
363    return target;
364}
365
366// Produce a printable representation of a CollationKey
367UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeString &target)
368{
369    int32_t i, byteCount;
370    const uint8_t *bytes = source.getByteArray(byteCount);
371
372    target.remove();
373    target += "[";
374
375    for (i = 0; i < byteCount; i += 1)
376    {
377        if (i != 0) {
378            target += " ";
379        }
380        appendHex(bytes[i], 2, target);
381    }
382
383    target += "]";
384
385    return target;
386}
387
388void IntlTestCollator::backAndForth(CollationElementIterator &iter)
389{
390    // Run through the iterator forwards and stick it into an array
391    int32_t orderLength = 0;
392    LocalArray<Order> orders(getOrders(iter, orderLength));
393    UErrorCode status = U_ZERO_ERROR;
394
395    // Now go through it backwards and make sure we get the same values
396    int32_t index = orderLength;
397    int32_t o;
398
399    // reset the iterator
400    iter.reset();
401
402    while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
403    {
404        /*int32_t offset = */iter.getOffset();
405
406        if (index == 0) {
407          if(o == 0) {
408            continue;
409          } else { // this is an error, orders exhausted but there are non-ignorable CEs from
410            // going backwards
411            errln("Backward iteration returned a non ignorable after orders are exhausted");
412            break;
413          }
414        }
415
416        index -= 1;
417        if (o != orders[index].order) {
418            if (o == 0)
419                index += 1;
420            else {
421                while (index > 0 && orders[--index].order == 0) {
422                  // nothing...
423                }
424
425                if (o != orders[index].order) {
426                    errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index,
427                    orders[index].order, o);
428                //break;
429                  return;
430                }
431            }
432        }
433
434#if TEST_OFFSETS
435        if (offset != orders[index].offset) {
436          errln("Mismatched offset at index %d: %d vs. %d", index,
437            orders[index].offset, offset);
438       //break;
439         return;
440        }
441#endif
442
443    }
444
445    while (index != 0 && orders[index - 1].order == 0)
446    {
447      index --;
448    }
449
450    if (index != 0)
451    {
452        UnicodeString msg("Didn't get back to beginning - index is ");
453        errln(msg + index);
454
455        iter.reset();
456        err("next: ");
457        while ((o = iter.next(status)) != CollationElementIterator::NULLORDER)
458        {
459            UnicodeString hexString("0x");
460
461            appendHex(o, 8, hexString);
462            hexString += " ";
463            err(hexString);
464        }
465        errln("");
466
467        err("prev: ");
468        while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
469        {
470            UnicodeString hexString("0x");
471
472            appendHex(o, 8, hexString);
473            hexString += " ";
474             err(hexString);
475        }
476        errln("");
477    }
478}
479
480
481/**
482 * Return an integer array containing all of the collation orders
483 * returned by calls to next on the specified iterator
484 */
485IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength)
486{
487    int32_t maxSize = 100;
488    int32_t size = 0;
489    LocalArray<Order> orders(new Order[maxSize]);
490    UErrorCode status = U_ZERO_ERROR;
491    int32_t offset = iter.getOffset();
492
493    int32_t order;
494    while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
495    {
496        if (size == maxSize)
497        {
498            maxSize *= 2;
499            Order *temp = new Order[maxSize];
500
501            uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
502            orders.adoptInstead(temp);
503        }
504
505        orders[size].order  = order;
506        orders[size].offset = offset;
507
508        offset = iter.getOffset();
509        size += 1;
510    }
511    if (U_FAILURE(status)) {
512        errln("CollationElementIterator.next() failed - %s",
513              u_errorName(status));
514    }
515
516    if (maxSize > size)
517    {
518        Order *temp = new Order[size];
519
520        uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
521        orders.adoptInstead(temp);
522    }
523
524    orderLength = size;
525    return orders.orphan();
526}
527
528#endif /* #if !UCONFIG_NO_COLLATION */
529