1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7/**
8 * IntlTestCollator is the medium level test class for everything in the directory "collate".
9 */
10
11/***********************************************************************
12* Modification history
13* Date        Name        Description
14* 02/14/2001  synwee      Compare with cintltst and commented away tests
15*                         that are not run.
16***********************************************************************/
17
18#include "unicode/utypes.h"
19
20#if !UCONFIG_NO_COLLATION
21
22#include "unicode/localpointer.h"
23#include "unicode/uchar.h"
24#include "unicode/ustring.h"
25
26#include "dadrcoll.h"
27
28#include "encoll.h"
29#include "frcoll.h"
30#include "decoll.h"
31#include "escoll.h"
32#include "ficoll.h"
33#include "jacoll.h"
34#include "trcoll.h"
35#include "allcoll.h"
36#include "g7coll.h"
37#include "mnkytst.h"
38#include "apicoll.h"
39#include "regcoll.h"
40#include "currcoll.h"
41#include "itercoll.h"
42#include "tstnorm.h"
43#include "normconf.h"
44#include "thcoll.h"
45#include "srchtest.h"
46#include "ssearch.h"
47#include "cntabcol.h"
48#include "lcukocol.h"
49#include "ucaconf.h"
50#include "svccoll.h"
51#include "cmemory.h"
52#include "alphaindextst.h"
53//#include "rndmcoll.h"
54
55// Set to 1 to test offsets in backAndForth()
56#define TEST_OFFSETS 0
57
58#define TESTCLASS(n,classname)        \
59    case n:                           \
60        name = #classname;            \
61        if (exec) {                   \
62            logln(#classname "---");  \
63            logln("");                \
64            classname t;              \
65            callTest(t, par);         \
66        }                             \
67        break
68
69void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
70{
71    if (exec) {
72        logln("TestSuite Collator: ");
73    }
74
75    switch (index) {
76      TESTCLASS(0, CollationEnglishTest);
77      TESTCLASS(1, CollationFrenchTest);
78      TESTCLASS(2, CollationGermanTest);
79      TESTCLASS(3, CollationSpanishTest);
80      TESTCLASS(4, CollationKanaTest);
81      TESTCLASS(5, CollationTurkishTest);
82      TESTCLASS(6, CollationDummyTest);
83      TESTCLASS(7, G7CollationTest);
84      TESTCLASS(8, CollationMonkeyTest);
85      TESTCLASS(9, CollationAPITest);
86      TESTCLASS(10, CollationRegressionTest);
87      TESTCLASS(11, CollationCurrencyTest);
88      TESTCLASS(12, CollationIteratorTest);
89      TESTCLASS(13, CollationThaiTest);
90      TESTCLASS(14, LotusCollationKoreanTest);
91      TESTCLASS(15, StringSearchTest);
92      TESTCLASS(16, ContractionTableTest);
93#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
94      TESTCLASS(17, DataDrivenCollatorTest);
95#endif
96      TESTCLASS(18, UCAConformanceTest);
97      TESTCLASS(19, CollationServiceTest);
98      TESTCLASS(20, CollationFinnishTest); // removed by weiv - we have changed Finnish collation
99      //TESTCLASS(21, RandomCollatorTest); // See ticket 5747 about reenabling this test.
100      TESTCLASS(21, SSearchTest);
101#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
102      TESTCLASS(22, AlphabeticIndexTest);
103#endif
104
105      default: name = ""; break;
106    }
107}
108
109UCollationResult
110IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) {
111  int32_t partialSKResult = 0;
112  uint8_t sBuf[512], tBuf[512];
113  UCharIterator sIter, tIter;
114  uint32_t sState[2], tState[2];
115  int32_t sSize = pieceSize, tSize = pieceSize;
116  int32_t i = 0;
117  status = U_ZERO_ERROR;
118  sState[0] = 0; sState[1] = 0;
119  tState[0] = 0; tState[1] = 0;
120  while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
121    uiter_setString(&sIter, source, sLen);
122    uiter_setString(&tIter, target, tLen);
123    sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status);
124    tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status);
125
126    if(sState[0] != 0 || tState[0] != 0) {
127      log("State != 0 : %08X %08X\n", sState[0], tState[0]);
128    }
129    log("%i ", i++);
130
131    partialSKResult = memcmp(sBuf, tBuf, pieceSize);
132  }
133
134  if(partialSKResult < 0) {
135      return UCOL_LESS;
136  } else if(partialSKResult > 0) {
137    return UCOL_GREATER;
138  } else {
139    return UCOL_EQUAL;
140  }
141}
142
143void
144IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
145{
146  UErrorCode status = U_ZERO_ERROR;
147
148  UCollator *myCollation = (UCollator *)((RuleBasedCollator *)col)->getUCollator();
149
150  Collator::EComparisonResult compareResult = col->compare(source, target);
151
152  CollationKey srckey, tgtkey;
153  col->getCollationKey(source, srckey, status);
154  col->getCollationKey(target, tgtkey, status);
155  if (U_FAILURE(status)){
156    errln("Creation of collation keys failed\n");
157  }
158  Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey);
159
160  reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result);
161
162    UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
163
164    int32_t sLen = source.length(), tLen = target.length();
165    const UChar* src = source.getBuffer();
166    const UChar* trg = target.getBuffer();
167    UCollationResult compareResultIter = (UCollationResult)result;
168
169    {
170      UCharIterator sIter, tIter;
171      uiter_setString(&sIter, src, sLen);
172      uiter_setString(&tIter, trg, tLen);
173      compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
174      if(compareResultIter != (UCollationResult)result) {
175        errln("Different result for iterative comparison "+source+" "+target);
176      }
177    }
178    /* convert the strings to UTF-8 and do try comparing with char iterator */
179    if(!quick) { /*!QUICK*/
180      char utf8Source[256], utf8Target[256];
181      int32_t utf8SourceLen = 0, utf8TargetLen = 0;
182      u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status);
183      if(U_FAILURE(status)) { /* probably buffer is not big enough */
184        log("Src UTF-8 buffer too small! Will not compare!\n");
185      } else {
186        u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status);
187        if(U_SUCCESS(status)) { /* probably buffer is not big enough */
188          UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result;
189          UCharIterator sIter, tIter;
190          /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
191          uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
192          uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
193       /*uiter_setString(&sIter, source, sLen);
194      uiter_setString(&tIter, target, tLen);*/
195          compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
196          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
197          sIter.move(&sIter, 0, UITER_START);
198          tIter.move(&tIter, 0, UITER_START);
199          compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
200          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
201          if(compareResultUTF8 != compareResultIter) {
202            errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target);
203          }
204          if(compareResultUTF8 != compareResultUTF8Norm) {
205            errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target);
206          }
207        } else {
208          log("Target UTF-8 buffer too small! Did not compare!\n");
209        }
210        if(U_FAILURE(status)) {
211          log("UTF-8 strcoll failed! Ignoring result\n");
212        }
213      }
214    }
215
216    /* testing the partial sortkeys */
217    { /*!QUICK*/
218      int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
219      int32_t partialSizesSize = 1;
220      if(!quick) {
221        partialSizesSize = 7;
222      }
223      int32_t i = 0;
224      log("partial sortkey test piecesize=");
225      for(i = 0; i < partialSizesSize; i++) {
226        UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result;
227        log("%i ", partialSizes[i]);
228
229        partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
230        if(partialSKResult != (UCollationResult)result) {
231          errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")");
232        }
233
234        if(norm != UCOL_ON && !quick) {
235          log("N ");
236          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
237          partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
238          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
239          if(partialSKResult != partialNormalizedSKResult) {
240            errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")");
241          }
242        }
243      }
244      log("\n");
245    }
246/*
247  if (compareResult != result) {
248    errln("String comparison failed in variant test\n");
249  }
250  if (keyResult != result) {
251    errln("Collation key comparison failed in variant test\n");
252  }
253*/
254}
255
256void
257IntlTestCollator::doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result) {
258  doTest(col, UnicodeString(source), UnicodeString(target), result);
259}
260
261void
262IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
263{
264  if(col) {
265    doTestVariant(col, source, target, result);
266    if(result == Collator::LESS) {
267      doTestVariant(col, target, source, Collator::GREATER);
268    } else if (result == Collator::GREATER) {
269      doTestVariant(col, target, source, Collator::LESS);
270    }
271
272    UErrorCode status = U_ZERO_ERROR;
273    LocalPointer<CollationElementIterator> c(((RuleBasedCollator *)col)->createCollationElementIterator(source));
274    logln("Testing iterating source: "+source);
275    backAndForth(*c);
276    c->setText(target, status);
277    logln("Testing iterating target: "+target);
278    backAndForth(*c);
279  }
280}
281
282
283// used for collation result reporting, defined here for convenience
284// (maybe moved later)
285void
286IntlTestCollator::reportCResult( const UnicodeString &source, const UnicodeString &target,
287             CollationKey &sourceKey, CollationKey &targetKey,
288             Collator::EComparisonResult compareResult,
289             Collator::EComparisonResult keyResult,
290                                Collator::EComparisonResult incResult,
291                         Collator::EComparisonResult expectedResult )
292{
293    if (expectedResult < -1 || expectedResult > 1)
294    {
295        errln("***** invalid call to reportCResult ****");
296        return;
297    }
298
299    UBool ok1 = (compareResult == expectedResult);
300    UBool ok2 = (keyResult == expectedResult);
301    UBool ok3 = (incResult == expectedResult);
302
303
304    if (ok1 && ok2 && ok3 && !verbose) {
305        // Keep non-verbose, passing tests fast
306        return;
307    } else {
308        UnicodeString msg1(ok1 ? "Ok: compare(" : "FAIL: compare(");
309        UnicodeString msg2(", "), msg3(") returned "), msg4("; expected ");
310        UnicodeString prettySource, prettyTarget, sExpect, sResult;
311
312        IntlTest::prettify(source, prettySource);
313        IntlTest::prettify(target, prettyTarget);
314        appendCompareResult(compareResult, sResult);
315        appendCompareResult(expectedResult, sExpect);
316
317        if (ok1) {
318            logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
319        } else {
320            errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
321        }
322
323        msg1 = UnicodeString(ok2 ? "Ok: key(" : "FAIL: key(");
324        msg2 = ").compareTo(key(";
325        msg3 = ")) returned ";
326
327        appendCompareResult(keyResult, sResult);
328
329        if (ok2) {
330            logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
331        } else {
332            errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
333
334            msg1 = "  ";
335            msg2 = " vs. ";
336
337            prettify(sourceKey, prettySource);
338            prettify(targetKey, prettyTarget);
339
340            errln(msg1 + prettySource + msg2 + prettyTarget);
341        }
342        msg1 = UnicodeString (ok3 ? "Ok: incCompare(" : "FAIL: incCompare(");
343        msg2 = ", ";
344        msg3 = ") returned ";
345
346        appendCompareResult(incResult, sResult);
347
348        if (ok3) {
349            logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
350        } else {
351            errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
352        }
353    }
354}
355
356UnicodeString&
357IntlTestCollator::appendCompareResult(Collator::EComparisonResult result,
358                  UnicodeString& target)
359{
360    if (result == Collator::LESS)
361    {
362        target += "LESS";
363    }
364    else if (result == Collator::EQUAL)
365    {
366        target += "EQUAL";
367    }
368    else if (result == Collator::GREATER)
369    {
370        target += "GREATER";
371    }
372    else
373    {
374        UnicodeString huh = "?";
375
376        target += (huh + (int32_t)result);
377    }
378
379    return target;
380}
381
382// Produce a printable representation of a CollationKey
383UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeString &target)
384{
385    int32_t i, byteCount;
386    const uint8_t *bytes = source.getByteArray(byteCount);
387
388    target.remove();
389    target += "[";
390
391    for (i = 0; i < byteCount; i += 1)
392    {
393        if (i != 0) {
394            target += " ";
395        }
396        appendHex(bytes[i], 2, target);
397    }
398
399    target += "]";
400
401    return target;
402}
403
404void IntlTestCollator::backAndForth(CollationElementIterator &iter)
405{
406    // Run through the iterator forwards and stick it into an array
407    int32_t orderLength = 0;
408    LocalArray<Order> orders(getOrders(iter, orderLength));
409    UErrorCode status = U_ZERO_ERROR;
410
411    // Now go through it backwards and make sure we get the same values
412    int32_t index = orderLength;
413    int32_t o;
414
415    // reset the iterator
416    iter.reset();
417
418    while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
419    {
420        /*int32_t offset = */iter.getOffset();
421
422        if (index == 0) {
423          if(o == 0) {
424            continue;
425          } else { // this is an error, orders exhausted but there are non-ignorable CEs from
426            // going backwards
427            errln("Backward iteration returned a non ignorable after orders are exhausted");
428            break;
429          }
430        }
431
432        index -= 1;
433        if (o != orders[index].order) {
434            if (o == 0)
435                index += 1;
436            else {
437                while (index > 0 && orders[--index].order == 0) {
438                  // nothing...
439                }
440
441                if (o != orders[index].order) {
442                    errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index,
443                    orders[index].order, o);
444                //break;
445                  return;
446                }
447            }
448        }
449
450#if TEST_OFFSETS
451        if (offset != orders[index].offset) {
452          errln("Mismatched offset at index %d: %d vs. %d", index,
453            orders[index].offset, offset);
454       //break;
455         return;
456        }
457#endif
458
459    }
460
461    while (index != 0 && orders[index - 1].order == 0)
462    {
463      index --;
464    }
465
466    if (index != 0)
467    {
468        UnicodeString msg("Didn't get back to beginning - index is ");
469        errln(msg + index);
470
471        iter.reset();
472        err("next: ");
473        while ((o = iter.next(status)) != CollationElementIterator::NULLORDER)
474        {
475            UnicodeString hexString("0x");
476
477            appendHex(o, 8, hexString);
478            hexString += " ";
479            err(hexString);
480        }
481        errln("");
482
483        err("prev: ");
484        while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
485        {
486            UnicodeString hexString("0x");
487
488            appendHex(o, 8, hexString);
489            hexString += " ";
490             err(hexString);
491        }
492        errln("");
493    }
494}
495
496
497/**
498 * Return an integer array containing all of the collation orders
499 * returned by calls to next on the specified iterator
500 */
501IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength)
502{
503    int32_t maxSize = 100;
504    int32_t size = 0;
505    LocalArray<Order> orders(new Order[maxSize]);
506    UErrorCode status = U_ZERO_ERROR;
507    int32_t offset = iter.getOffset();
508
509    int32_t order;
510    while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
511    {
512        if (size == maxSize)
513        {
514            maxSize *= 2;
515            Order *temp = new Order[maxSize];
516
517            uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
518            orders.adoptInstead(temp);
519        }
520
521        orders[size].order  = order;
522        orders[size].offset = offset;
523
524        offset = iter.getOffset();
525        size += 1;
526    }
527
528    if (maxSize > size)
529    {
530        Order *temp = new Order[size];
531
532        uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
533        orders.adoptInstead(temp);
534    }
535
536    orderLength = size;
537    return orders.orphan();
538}
539
540#endif /* #if !UCONFIG_NO_COLLATION */
541