tscoll.cpp revision c73f511526464f8e56c242df80552e9b0d94ae3d
1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7/**
8 * IntlTestCollator is the medium level test class for everything in the directory "collate".
9 */
10
11/***********************************************************************
12* Modification history
13* Date        Name        Description
14* 02/14/2001  synwee      Compare with cintltst and commented away tests
15*                         that are not run.
16***********************************************************************/
17
18#include "unicode/utypes.h"
19
20#if !UCONFIG_NO_COLLATION
21
22#include "unicode/localpointer.h"
23#include "unicode/sortkey.h"
24#include "unicode/uchar.h"
25#include "unicode/ustring.h"
26
27#include "encoll.h"
28#include "frcoll.h"
29#include "decoll.h"
30#include "escoll.h"
31#include "ficoll.h"
32#include "jacoll.h"
33#include "trcoll.h"
34#include "allcoll.h"
35#include "g7coll.h"
36#include "mnkytst.h"
37#include "apicoll.h"
38#include "regcoll.h"
39#include "currcoll.h"
40#include "itercoll.h"
41#include "tstnorm.h"
42#include "normconf.h"
43#include "thcoll.h"
44#include "srchtest.h"
45#include "ssearch.h"
46#include "lcukocol.h"
47#include "ucaconf.h"
48#include "svccoll.h"
49#include "cmemory.h"
50#include "alphaindextst.h"
51
52// Set to 1 to test offsets in backAndForth()
53#define TEST_OFFSETS 0
54
55extern IntlTest *createCollationTest();
56
57void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, const char* &name, char* par )
58{
59    if (exec) {
60        logln("TestSuite Collator: ");
61    }
62
63    TESTCASE_AUTO_BEGIN;
64    TESTCASE_AUTO_CLASS(CollationEnglishTest);
65    TESTCASE_AUTO_CLASS(CollationFrenchTest);
66    TESTCASE_AUTO_CLASS(CollationGermanTest);
67    TESTCASE_AUTO_CLASS(CollationSpanishTest);
68    TESTCASE_AUTO_CLASS(CollationKanaTest);
69    TESTCASE_AUTO_CLASS(CollationTurkishTest);
70    TESTCASE_AUTO_CLASS(CollationDummyTest);
71    TESTCASE_AUTO_CLASS(G7CollationTest);
72    TESTCASE_AUTO_CLASS(CollationMonkeyTest);
73    TESTCASE_AUTO_CLASS(CollationAPITest);
74    TESTCASE_AUTO_CLASS(CollationRegressionTest);
75    TESTCASE_AUTO_CLASS(CollationCurrencyTest);
76    TESTCASE_AUTO_CLASS(CollationIteratorTest);
77    TESTCASE_AUTO_CLASS(CollationThaiTest);
78    TESTCASE_AUTO_CLASS(LotusCollationKoreanTest);
79    TESTCASE_AUTO_CLASS(StringSearchTest);
80    TESTCASE_AUTO_CLASS(UCAConformanceTest);
81    TESTCASE_AUTO_CLASS(CollationServiceTest);
82    TESTCASE_AUTO_CLASS(CollationFinnishTest); // removed by weiv - we have changed Finnish collation
83    TESTCASE_AUTO_CLASS(SSearchTest);
84#if !UCONFIG_NO_NORMALIZATION
85    TESTCASE_AUTO_CLASS(AlphabeticIndexTest);
86#endif
87    TESTCASE_AUTO_CREATE_CLASS(CollationTest);
88    TESTCASE_AUTO_END;
89}
90
91UCollationResult
92IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) {
93  int32_t partialSKResult = 0;
94  uint8_t sBuf[512], tBuf[512];
95  UCharIterator sIter, tIter;
96  uint32_t sState[2], tState[2];
97  int32_t sSize = pieceSize, tSize = pieceSize;
98  int32_t i = 0;
99  status = U_ZERO_ERROR;
100  sState[0] = 0; sState[1] = 0;
101  tState[0] = 0; tState[1] = 0;
102  while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
103    uiter_setString(&sIter, source, sLen);
104    uiter_setString(&tIter, target, tLen);
105    sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status);
106    tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status);
107
108    if(sState[0] != 0 || tState[0] != 0) {
109      log("State != 0 : %08X %08X\n", sState[0], tState[0]);
110    }
111    log("%i ", i++);
112
113    partialSKResult = memcmp(sBuf, tBuf, pieceSize);
114  }
115
116  if(partialSKResult < 0) {
117      return UCOL_LESS;
118  } else if(partialSKResult > 0) {
119    return UCOL_GREATER;
120  } else {
121    return UCOL_EQUAL;
122  }
123}
124
125void
126IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
127{
128  UErrorCode status = U_ZERO_ERROR;
129
130  UCollator *myCollation = col->toUCollator();
131
132  Collator::EComparisonResult compareResult = col->compare(source, target);
133
134  CollationKey srckey, tgtkey;
135  col->getCollationKey(source, srckey, status);
136  col->getCollationKey(target, tgtkey, status);
137  if (U_FAILURE(status)){
138    errln("Creation of collation keys failed\n");
139  }
140  Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey);
141
142  reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result);
143
144    UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
145
146    int32_t sLen = source.length(), tLen = target.length();
147    const UChar* src = source.getBuffer();
148    const UChar* trg = target.getBuffer();
149    UCollationResult compareResultIter = (UCollationResult)result;
150
151    {
152      UCharIterator sIter, tIter;
153      uiter_setString(&sIter, src, sLen);
154      uiter_setString(&tIter, trg, tLen);
155      compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
156      if(compareResultIter != (UCollationResult)result) {
157        errln("Different result for iterative comparison "+source+" "+target);
158      }
159    }
160    /* convert the strings to UTF-8 and do try comparing with char iterator */
161    if(!quick) { /*!QUICK*/
162      char utf8Source[256], utf8Target[256];
163      int32_t utf8SourceLen = 0, utf8TargetLen = 0;
164      u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status);
165      if(U_FAILURE(status)) { /* probably buffer is not big enough */
166        log("Src UTF-8 buffer too small! Will not compare!\n");
167      } else {
168        u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status);
169        if(U_SUCCESS(status)) { /* probably buffer is not big enough */
170          UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result;
171          UCharIterator sIter, tIter;
172          /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
173          uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
174          uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
175       /*uiter_setString(&sIter, source, sLen);
176      uiter_setString(&tIter, target, tLen);*/
177          compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
178          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
179          sIter.move(&sIter, 0, UITER_START);
180          tIter.move(&tIter, 0, UITER_START);
181          compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
182          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
183          if(compareResultUTF8 != compareResultIter) {
184            errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target);
185          }
186          if(compareResultUTF8 != compareResultUTF8Norm) {
187            errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target);
188          }
189        } else {
190          log("Target UTF-8 buffer too small! Did not compare!\n");
191        }
192        if(U_FAILURE(status)) {
193          log("UTF-8 strcoll failed! Ignoring result\n");
194        }
195      }
196    }
197
198    /* testing the partial sortkeys */
199    { /*!QUICK*/
200      int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
201      int32_t partialSizesSize = 1;
202      if(!quick) {
203        partialSizesSize = 7;
204      }
205      int32_t i = 0;
206      log("partial sortkey test piecesize=");
207      for(i = 0; i < partialSizesSize; i++) {
208        UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result;
209        log("%i ", partialSizes[i]);
210
211        partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
212        if(partialSKResult != (UCollationResult)result) {
213          errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")");
214        }
215
216        if(norm != UCOL_ON && !quick) {
217          log("N ");
218          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
219          partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
220          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
221          if(partialSKResult != partialNormalizedSKResult) {
222            errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")");
223          }
224        }
225      }
226      log("\n");
227    }
228/*
229  if (compareResult != result) {
230    errln("String comparison failed in variant test\n");
231  }
232  if (keyResult != result) {
233    errln("Collation key comparison failed in variant test\n");
234  }
235*/
236}
237
238void
239IntlTestCollator::doTest(Collator* col, const UChar *source, const UChar *target, Collator::EComparisonResult result) {
240  doTest(col, UnicodeString(source), UnicodeString(target), result);
241}
242
243void
244IntlTestCollator::doTest(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
245{
246  if(col) {
247    doTestVariant(col, source, target, result);
248    if(result == Collator::LESS) {
249      doTestVariant(col, target, source, Collator::GREATER);
250    } else if (result == Collator::GREATER) {
251      doTestVariant(col, target, source, Collator::LESS);
252    }
253
254    UErrorCode status = U_ZERO_ERROR;
255    LocalPointer<CollationElementIterator> c(((RuleBasedCollator *)col)->createCollationElementIterator(source));
256    logln("Testing iterating source: "+source);
257    backAndForth(*c);
258    c->setText(target, status);
259    logln("Testing iterating target: "+target);
260    backAndForth(*c);
261  }
262}
263
264
265// used for collation result reporting, defined here for convenience
266// (maybe moved later)
267void
268IntlTestCollator::reportCResult( const UnicodeString &source, const UnicodeString &target,
269             CollationKey &sourceKey, CollationKey &targetKey,
270             Collator::EComparisonResult compareResult,
271             Collator::EComparisonResult keyResult,
272                                Collator::EComparisonResult incResult,
273                         Collator::EComparisonResult expectedResult )
274{
275    if (expectedResult < -1 || expectedResult > 1)
276    {
277        errln("***** invalid call to reportCResult ****");
278        return;
279    }
280
281    UBool ok1 = (compareResult == expectedResult);
282    UBool ok2 = (keyResult == expectedResult);
283    UBool ok3 = (incResult == expectedResult);
284
285
286    if (ok1 && ok2 && ok3 && !verbose) {
287        // Keep non-verbose, passing tests fast
288        return;
289    } else {
290        UnicodeString msg1(ok1 ? "Ok: compare(" : "FAIL: compare(");
291        UnicodeString msg2(", "), msg3(") returned "), msg4("; expected ");
292        UnicodeString prettySource, prettyTarget, sExpect, sResult;
293
294        IntlTest::prettify(source, prettySource);
295        IntlTest::prettify(target, prettyTarget);
296        appendCompareResult(compareResult, sResult);
297        appendCompareResult(expectedResult, sExpect);
298
299        if (ok1) {
300            logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
301        } else {
302            errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
303        }
304
305        msg1 = UnicodeString(ok2 ? "Ok: key(" : "FAIL: key(");
306        msg2 = ").compareTo(key(";
307        msg3 = ")) returned ";
308
309        appendCompareResult(keyResult, sResult);
310
311        if (ok2) {
312            logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
313        } else {
314            errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
315
316            msg1 = "  ";
317            msg2 = " vs. ";
318
319            prettify(sourceKey, prettySource);
320            prettify(targetKey, prettyTarget);
321
322            errln(msg1 + prettySource + msg2 + prettyTarget);
323        }
324        msg1 = UnicodeString (ok3 ? "Ok: incCompare(" : "FAIL: incCompare(");
325        msg2 = ", ";
326        msg3 = ") returned ";
327
328        appendCompareResult(incResult, sResult);
329
330        if (ok3) {
331            logln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult);
332        } else {
333            errln(msg1 + prettySource + msg2 + prettyTarget + msg3 + sResult + msg4 + sExpect);
334        }
335    }
336}
337
338UnicodeString&
339IntlTestCollator::appendCompareResult(Collator::EComparisonResult result,
340                  UnicodeString& target)
341{
342    if (result == Collator::LESS)
343    {
344        target += "LESS";
345    }
346    else if (result == Collator::EQUAL)
347    {
348        target += "EQUAL";
349    }
350    else if (result == Collator::GREATER)
351    {
352        target += "GREATER";
353    }
354    else
355    {
356        UnicodeString huh = "?";
357
358        target += (huh + (int32_t)result);
359    }
360
361    return target;
362}
363
364// Produce a printable representation of a CollationKey
365UnicodeString &IntlTestCollator::prettify(const CollationKey &source, UnicodeString &target)
366{
367    int32_t i, byteCount;
368    const uint8_t *bytes = source.getByteArray(byteCount);
369
370    target.remove();
371    target += "[";
372
373    for (i = 0; i < byteCount; i += 1)
374    {
375        if (i != 0) {
376            target += " ";
377        }
378        appendHex(bytes[i], 2, target);
379    }
380
381    target += "]";
382
383    return target;
384}
385
386void IntlTestCollator::backAndForth(CollationElementIterator &iter)
387{
388    // Run through the iterator forwards and stick it into an array
389    int32_t orderLength = 0;
390    LocalArray<Order> orders(getOrders(iter, orderLength));
391    UErrorCode status = U_ZERO_ERROR;
392
393    // Now go through it backwards and make sure we get the same values
394    int32_t index = orderLength;
395    int32_t o;
396
397    // reset the iterator
398    iter.reset();
399
400    while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
401    {
402        /*int32_t offset = */iter.getOffset();
403
404        if (index == 0) {
405          if(o == 0) {
406            continue;
407          } else { // this is an error, orders exhausted but there are non-ignorable CEs from
408            // going backwards
409            errln("Backward iteration returned a non ignorable after orders are exhausted");
410            break;
411          }
412        }
413
414        index -= 1;
415        if (o != orders[index].order) {
416            if (o == 0)
417                index += 1;
418            else {
419                while (index > 0 && orders[--index].order == 0) {
420                  // nothing...
421                }
422
423                if (o != orders[index].order) {
424                    errln("Mismatched order at index %d: 0x%0:8X vs. 0x%0:8X", index,
425                    orders[index].order, o);
426                //break;
427                  return;
428                }
429            }
430        }
431
432#if TEST_OFFSETS
433        if (offset != orders[index].offset) {
434          errln("Mismatched offset at index %d: %d vs. %d", index,
435            orders[index].offset, offset);
436       //break;
437         return;
438        }
439#endif
440
441    }
442
443    while (index != 0 && orders[index - 1].order == 0)
444    {
445      index --;
446    }
447
448    if (index != 0)
449    {
450        UnicodeString msg("Didn't get back to beginning - index is ");
451        errln(msg + index);
452
453        iter.reset();
454        err("next: ");
455        while ((o = iter.next(status)) != CollationElementIterator::NULLORDER)
456        {
457            UnicodeString hexString("0x");
458
459            appendHex(o, 8, hexString);
460            hexString += " ";
461            err(hexString);
462        }
463        errln("");
464
465        err("prev: ");
466        while ((o = iter.previous(status)) != CollationElementIterator::NULLORDER)
467        {
468            UnicodeString hexString("0x");
469
470            appendHex(o, 8, hexString);
471            hexString += " ";
472             err(hexString);
473        }
474        errln("");
475    }
476}
477
478
479/**
480 * Return an integer array containing all of the collation orders
481 * returned by calls to next on the specified iterator
482 */
483IntlTestCollator::Order *IntlTestCollator::getOrders(CollationElementIterator &iter, int32_t &orderLength)
484{
485    int32_t maxSize = 100;
486    int32_t size = 0;
487    LocalArray<Order> orders(new Order[maxSize]);
488    UErrorCode status = U_ZERO_ERROR;
489    int32_t offset = iter.getOffset();
490
491    int32_t order;
492    while ((order = iter.next(status)) != CollationElementIterator::NULLORDER)
493    {
494        if (size == maxSize)
495        {
496            maxSize *= 2;
497            Order *temp = new Order[maxSize];
498
499            uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
500            orders.adoptInstead(temp);
501        }
502
503        orders[size].order  = order;
504        orders[size].offset = offset;
505
506        offset = iter.getOffset();
507        size += 1;
508    }
509    if (U_FAILURE(status)) {
510        errln("CollationElementIterator.next() failed - %s",
511              u_errorName(status));
512    }
513
514    if (maxSize > size)
515    {
516        Order *temp = new Order[size];
517
518        uprv_memcpy(temp, orders.getAlias(), size * sizeof(Order));
519        orders.adoptInstead(temp);
520    }
521
522    orderLength = size;
523    return orders.orphan();
524}
525
526#endif /* #if !UCONFIG_NO_COLLATION */
527