1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2009, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7/**
8 * IntlTestCollator is the medium level test class for everything in the directory "collate".
9 */
10
11/***********************************************************************
12* Modification history
13* Date        Name        Description
14* 02/14/2001  synwee      Compare with cintltst and commented away tests
15*                         that are not run.
16***********************************************************************/
17
18#include "unicode/utypes.h"
19
20#if !UCONFIG_NO_COLLATION
21
22#include "unicode/localpointer.h"
23#include "unicode/uchar.h"
24#include "unicode/ustring.h"
25
26#include "dadrcoll.h"
27
28#include "encoll.h"
29#include "frcoll.h"
30#include "decoll.h"
31#include "escoll.h"
32#include "ficoll.h"
33#include "jacoll.h"
34#include "trcoll.h"
35#include "allcoll.h"
36#include "g7coll.h"
37#include "mnkytst.h"
38#include "apicoll.h"
39#include "regcoll.h"
40#include "currcoll.h"
41#include "itercoll.h"
42#include "tstnorm.h"
43#include "normconf.h"
44#include "thcoll.h"
45#include "srchtest.h"
46#include "ssearch.h"
47#include "cntabcol.h"
48#include "lcukocol.h"
49#include "ucaconf.h"
50#include "svccoll.h"
51#include "cmemory.h"
52//#include "rndmcoll.h"
53
54// Set to 1 to test offsets in backAndForth()
55#define TEST_OFFSETS 0
56
57#define TESTCLASS(n,classname)        \
58    case n:                           \
59        name = #classname;            \
60        if (exec) {                   \
61            logln(#classname "---");  \
62            logln("");                \
63            classname t;              \
64            callTest(t, par);         \
65        }                             \
66        break
67
68void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
69{
70    if (exec) {
71        logln("TestSuite Collator: ");
72    }
73
74    switch (index) {
75      TESTCLASS(0, CollationEnglishTest);
76      TESTCLASS(1, CollationFrenchTest);
77      TESTCLASS(2, CollationGermanTest);
78      TESTCLASS(3, CollationSpanishTest);
79      TESTCLASS(4, CollationKanaTest);
80      TESTCLASS(5, CollationTurkishTest);
81      TESTCLASS(6, CollationDummyTest);
82      TESTCLASS(7, G7CollationTest);
83      TESTCLASS(8, CollationMonkeyTest);
84      TESTCLASS(9, CollationAPITest);
85      TESTCLASS(10, CollationRegressionTest);
86      TESTCLASS(11, CollationCurrencyTest);
87      TESTCLASS(12, CollationIteratorTest);
88      TESTCLASS(13, CollationThaiTest);
89      TESTCLASS(14, LotusCollationKoreanTest);
90      TESTCLASS(15, StringSearchTest);
91      TESTCLASS(16, ContractionTableTest);
92#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
93      TESTCLASS(17, DataDrivenCollatorTest);
94#endif
95      TESTCLASS(18, UCAConformanceTest);
96      TESTCLASS(19, CollationServiceTest);
97      TESTCLASS(20, CollationFinnishTest); // removed by weiv - we have changed Finnish collation
98      //TESTCLASS(21, RandomCollatorTest); // See ticket 5747 about reenabling this test.
99      TESTCLASS(21, SSearchTest);
100
101      default: name = ""; break;
102    }
103}
104
105UCollationResult
106IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) {
107  int32_t partialSKResult = 0;
108  uint8_t sBuf[512], tBuf[512];
109  UCharIterator sIter, tIter;
110  uint32_t sState[2], tState[2];
111  int32_t sSize = pieceSize, tSize = pieceSize;
112  int32_t i = 0;
113  status = U_ZERO_ERROR;
114  sState[0] = 0; sState[1] = 0;
115  tState[0] = 0; tState[1] = 0;
116  while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
117    uiter_setString(&sIter, source, sLen);
118    uiter_setString(&tIter, target, tLen);
119    sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status);
120    tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status);
121
122    if(sState[0] != 0 || tState[0] != 0) {
123      log("State != 0 : %08X %08X\n", sState[0], tState[0]);
124    }
125    log("%i ", i++);
126
127    partialSKResult = memcmp(sBuf, tBuf, pieceSize);
128  }
129
130  if(partialSKResult < 0) {
131      return UCOL_LESS;
132  } else if(partialSKResult > 0) {
133    return UCOL_GREATER;
134  } else {
135    return UCOL_EQUAL;
136  }
137}
138
139void
140IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
141{
142  UErrorCode status = U_ZERO_ERROR;
143
144  UCollator *myCollation = (UCollator *)((RuleBasedCollator *)col)->getUCollator();
145
146  Collator::EComparisonResult compareResult = col->compare(source, target);
147
148  CollationKey srckey, tgtkey;
149  col->getCollationKey(source, srckey, status);
150  col->getCollationKey(target, tgtkey, status);
151  if (U_FAILURE(status)){
152    errln("Creation of collation keys failed\n");
153  }
154  Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey);
155
156  reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result);
157
158    UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
159
160    int32_t sLen = source.length(), tLen = target.length();
161    const UChar* src = source.getBuffer();
162    const UChar* trg = target.getBuffer();
163    UCollationResult compareResultIter = (UCollationResult)result;
164
165    {
166      UCharIterator sIter, tIter;
167      uiter_setString(&sIter, src, sLen);
168      uiter_setString(&tIter, trg, tLen);
169      compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
170      if(compareResultIter != (UCollationResult)result) {
171        errln("Different result for iterative comparison "+source+" "+target);
172      }
173    }
174    /* convert the strings to UTF-8 and do try comparing with char iterator */
175    if(!quick) { /*!QUICK*/
176      char utf8Source[256], utf8Target[256];
177      int32_t utf8SourceLen = 0, utf8TargetLen = 0;
178      u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status);
179      if(U_FAILURE(status)) { /* probably buffer is not big enough */
180        log("Src UTF-8 buffer too small! Will not compare!\n");
181      } else {
182        u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status);
183        if(U_SUCCESS(status)) { /* probably buffer is not big enough */
184          UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result;
185          UCharIterator sIter, tIter;
186          /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
187          uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
188          uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
189       /*uiter_setString(&sIter, source, sLen);
190      uiter_setString(&tIter, target, tLen);*/
191          compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
192          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
193          sIter.move(&sIter, 0, UITER_START);
194          tIter.move(&tIter, 0, UITER_START);
195          compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
196          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
197          if(compareResultUTF8 != compareResultIter) {
198            errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target);
199          }
200          if(compareResultUTF8 != compareResultUTF8Norm) {
201            errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target);
202          }
203        } else {
204          log("Target UTF-8 buffer too small! Did not compare!\n");
205        }
206        if(U_FAILURE(status)) {
207          log("UTF-8 strcoll failed! Ignoring result\n");
208        }
209      }
210    }
211
212    /* testing the partial sortkeys */
213    { /*!QUICK*/
214      int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
215      int32_t partialSizesSize = 1;
216      if(!quick) {
217        partialSizesSize = 7;
218      }
219      int32_t i = 0;
220      log("partial sortkey test piecesize=");
221      for(i = 0; i < partialSizesSize; i++) {
222        UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result;
223        log("%i ", partialSizes[i]);
224
225        partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
226        if(partialSKResult != (UCollationResult)result) {
227          errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")");
228        }
229
230        if(norm != UCOL_ON && !quick) {
231          log("N ");
232          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
233          partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
234          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
235          if(partialSKResult != partialNormalizedSKResult) {
236            errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")");
237          }
238        }
239      }
240      log("\n");
241    }
242/*
243  if (compareResult != result) {
244    errln("String comparison failed in variant test\n");
245  }
246  if (keyResult != result) {
247    errln("Collation key comparison failed in variant test\n");
248  }
249*/
250}
251
252void
253IntlTestCollator::doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result) {
254  doTest(col, UnicodeString(source), UnicodeString(target), result);
255}
256
257void
258IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
259{
260  if(col) {
261    doTestVariant(col, source, target, result);
262    if(result == Collator::LESS) {
263      doTestVariant(col, target, source, Collator::GREATER);
264    } else if (result == Collator::GREATER) {
265      doTestVariant(col, target, source, Collator::LESS);
266    }
267
268    UErrorCode status = U_ZERO_ERROR;
269    LocalPointer<CollationElementIterator> c(((RuleBasedCollator *)col)->createCollationElementIterator(source));
270    logln("Testing iterating source: "+source);
271    backAndForth(*c);
272    c->setText(target, status);
273    logln("Testing iterating target: "+target);
274    backAndForth(*c);
275  }
276}
277
278
279// used for collation result reporting, defined here for convenience
280// (maybe moved later)
281void
282IntlTestCollator::reportCResult( const UnicodeString &source, const UnicodeString &target,
283             CollationKey &sourceKey, CollationKey &targetKey,
284             Collator::EComparisonResult compareResult,
285             Collator::EComparisonResult keyResult,
286                                Collator::EComparisonResult incResult,
287                         Collator::EComparisonResult expectedResult )
288{
289    if (expectedResult < -1 || expectedResult > 1)
290    {
291        errln("***** invalid call to reportCResult ****");
292        return;
293    }
294
295    UBool ok1 = (compareResult == expectedResult);
296    UBool ok2 = (keyResult == expectedResult);
297    UBool ok3 = (incResult == expectedResult);
298
299
300    if (ok1 && ok2 && ok3 && !verbose) {
301        // Keep non-verbose, passing tests fast
302        return;
303    } else {
304        UnicodeString msg1(ok1 ? "Ok: compare(" : "FAIL: compare(");
305        UnicodeString msg2(", "), msg3(") returned "), msg4("; expected ");
306        UnicodeString prettySource, prettyTarget, sExpect, sResult;
307
308        IntlTest::prettify(source, prettySource);
309        IntlTest::prettify(target, prettyTarget);
310        appendCompareResult(compareResult, sResult);
311        appendCompareResult(expectedResult, sExpect);
312
313        if (ok1) {
314            logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
315        } else {
316            errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
317        }
318
319        msg1 = UnicodeString(ok2 ? "Ok: key(" : "FAIL: key(");
320        msg2 = ").compareTo(key(";
321        msg3 = ")) returned ";
322
323        appendCompareResult(keyResult, sResult);
324
325        if (ok2) {
326            logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
327        } else {
328            errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
329
330            msg1 = "  ";
331            msg2 = " vs. ";
332
333            prettify(sourceKey, prettySource);
334            prettify(targetKey, prettyTarget);
335
336            errln(msg1 + prettySource + msg2 + prettyTarget);
337        }
338        msg1 = UnicodeString (ok3 ? "Ok: incCompare(" : "FAIL: incCompare(");
339        msg2 = ", ";
340        msg3 = ") returned ";
341
342        appendCompareResult(incResult, sResult);
343
344        if (ok3) {
345            logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
346        } else {
347            errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
348        }
349    }
350}
351
352UnicodeString&
353IntlTestCollator::appendCompareResult(Collator::EComparisonResult result,
354                  UnicodeString& target)
355{
356    if (result == Collator::LESS)
357    {
358        target += "LESS";
359    }
360    else if (result == Collator::EQUAL)
361    {
362        target += "EQUAL";
363    }
364    else if (result == Collator::GREATER)
365    {
366        target += "GREATER";
367    }
368    else
369    {
370        UnicodeString huh = "?";
371
372        target += (huh + (int32_t)result);
373    }
374
375    return target;
376}
377
378// Produce a printable representation of a CollationKey
379UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeString &target)
380{
381    int32_t i, byteCount;
382    const uint8_t *bytes = source.getByteArray(byteCount);
383
384    target.remove();
385    target += "[";
386
387    for (i = 0; i < byteCount; i += 1)
388    {
389        if (i != 0) {
390            target += " ";
391        }
392        appendHex(bytes[i], 2, target);
393    }
394
395    target += "]";
396
397    return target;
398}
399
400void IntlTestCollator::backAndForth(CollationElementIterator &iter)
401{
402    // Run through the iterator forwards and stick it into an array
403    int32_t orderLength = 0;
404    LocalArray<Order> orders(getOrders(iter, orderLength));
405    UErrorCode status = U_ZERO_ERROR;
406
407    // Now go through it backwards and make sure we get the same values
408    int32_t index = orderLength;
409    int32_t o;
410
411    // reset the iterator
412    iter.reset();
413
414    while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
415    {
416        /*int32_t offset = */iter.getOffset();
417
418        if (index == 0) {
419          if(o == 0) {
420            continue;
421          } else { // this is an error, orders exhausted but there are non-ignorable CEs from
422            // going backwards
423            errln("Backward iteration returned a non ignorable after orders are exhausted");
424            break;
425          }
426        }
427
428        index -= 1;
429        if (o != orders[index].order) {
430            if (o == 0)
431                index += 1;
432            else {
433                while (index > 0 && orders[--index].order == 0) {
434                  // nothing...
435                }
436
437                if (o != orders[index].order) {
438                    errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index,
439                    orders[index].order, o);
440                //break;
441                  return;
442                }
443            }
444        }
445
446#if TEST_OFFSETS
447        if (offset != orders[index].offset) {
448          errln("Mismatched offset at index %d: %d vs. %d", index,
449            orders[index].offset, offset);
450       //break;
451         return;
452        }
453#endif
454
455    }
456
457    while (index != 0 && orders[index - 1].order == 0)
458    {
459      index --;
460    }
461
462    if (index != 0)
463    {
464        UnicodeString msg("Didn't get back to beginning - index is ");
465        errln(msg + index);
466
467        iter.reset();
468        err("next: ");
469        while ((o = iter.next(status)) != CollationElementIterator::NULLORDER)
470        {
471            UnicodeString hexString("0x");
472
473            appendHex(o, 8, hexString);
474            hexString += " ";
475            err(hexString);
476        }
477        errln("");
478
479        err("prev: ");
480        while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
481        {
482            UnicodeString hexString("0x");
483
484            appendHex(o, 8, hexString);
485            hexString += " ";
486             err(hexString);
487        }
488        errln("");
489    }
490}
491
492
493/**
494 * Return an integer array containing all of the collation orders
495 * returned by calls to next on the specified iterator
496 */
497IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength)
498{
499    int32_t maxSize = 100;
500    int32_t size = 0;
501    LocalArray<Order> orders(new Order[maxSize]);
502    UErrorCode status = U_ZERO_ERROR;
503    int32_t offset = iter.getOffset();
504
505    int32_t order;
506    while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
507    {
508        if (size == maxSize)
509        {
510            maxSize *= 2;
511            Order *temp = new Order[maxSize];
512
513            uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
514            orders.adoptInstead(temp);
515        }
516
517        orders[size].order  = order;
518        orders[size].offset = offset;
519
520        offset = iter.getOffset();
521        size += 1;
522    }
523
524    if (maxSize > size)
525    {
526        Order *temp = new Order[size];
527
528        uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
529        orders.adoptInstead(temp);
530    }
531
532    orderLength = size;
533    return orders.orphan();
534}
535
536#endif /* #if !UCONFIG_NO_COLLATION */
537