16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/********************************************************************
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * COPYRIGHT:
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Copyright (c) 2002-2012, International Business Machines Corporation and
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * others. All Rights Reserved.
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ********************************************************************
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @author Mark E. Davis
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @author Vladimir Weinstein
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_NORMALIZATION
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "intltest.h"
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cstring.h"
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "canittst.h"
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/caniter.h"
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/normlzr.h"
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uchar.h"
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "hash.h"
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define ARRAY_LENGTH(array) ((int32_t)(sizeof (array) / sizeof (*array)))
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define CASE(id,test) case id:                          \
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                          name = #test;                 \
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                          if (exec) {                   \
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              logln(#test "---");       \
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              logln((UnicodeString)""); \
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                              test();                   \
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                          }                             \
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                          break
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid CanonicalIteratorTest::runIndexedTest(int32_t index, UBool exec,
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                         const char* &name, char* /*par*/) {
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    switch (index) {
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        CASE(0, TestBasic);
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        CASE(1, TestExhaustive);
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        CASE(2, TestAPI);
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      default: name = ""; break;
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Convert Java-style strings with \u Unicode escapes into UnicodeString objects
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UnicodeString str(const char *input)
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString str(input, ""); // Invariant conversion
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return str.unescape();
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgCanonicalIteratorTest::CanonicalIteratorTest() :
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgnameTrans(NULL), hexTrans(NULL)
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgCanonicalIteratorTest::~CanonicalIteratorTest()
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_TRANSLITERATION
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(nameTrans != NULL) {
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete(nameTrans);
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(hexTrans != NULL) {
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete(hexTrans);
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid CanonicalIteratorTest::TestExhaustive() {
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode status = U_ZERO_ERROR;
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    CanonicalIterator it("", status);
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        dataerrln("Error creating CanonicalIterator: %s", u_errorName(status));
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 i = 0;
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString s;
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Test static and dynamic class IDs
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(it.getDynamicClassID() != CanonicalIterator::getStaticClassID()){
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("CanonicalIterator::getStaticClassId ! = CanonicalIterator.getDynamicClassID");
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (i = 0; i < 0x10FFFF; quick?i+=0x10:++i) {
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //for (i = 0xae00; i < 0xaf00; ++i) {
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if ((i % 0x100) == 0) {
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            logln("Testing U+%06X", i);
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // skip characters we know don't have decomps
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int8_t type = u_charType(i);
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (type == U_UNASSIGNED || type == U_PRIVATE_USE_CHAR
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            || type == U_SURROGATE) continue;
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        s = i;
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        characterTest(s, i, it);
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        s += (UChar32)0x0345; //"\\u0345";
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        characterTest(s, i, it);
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid CanonicalIteratorTest::TestBasic() {
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode status = U_ZERO_ERROR;
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    static const char * const testArray[][2] = {
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {"\\u00C5d\\u0307\\u0327", "A\\u030Ad\\u0307\\u0327, A\\u030Ad\\u0327\\u0307, A\\u030A\\u1E0B\\u0327, "
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            "A\\u030A\\u1E11\\u0307, \\u00C5d\\u0307\\u0327, \\u00C5d\\u0327\\u0307, "
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            "\\u00C5\\u1E0B\\u0327, \\u00C5\\u1E11\\u0307, \\u212Bd\\u0307\\u0327, "
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            "\\u212Bd\\u0327\\u0307, \\u212B\\u1E0B\\u0327, \\u212B\\u1E11\\u0307"},
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {"\\u010d\\u017E", "c\\u030Cz\\u030C, c\\u030C\\u017E, \\u010Dz\\u030C, \\u010D\\u017E"},
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        {"x\\u0307\\u0327", "x\\u0307\\u0327, x\\u0327\\u0307, \\u1E8B\\u0327"},
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    };
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // This is not interesting for C/C++ as the data is already built beforehand
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // check build
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeSet ss = CanonicalIterator.getSafeStart();
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    logln("Safe Start: " + ss.toPattern(true));
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ss = CanonicalIterator.getStarts('a');
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'),
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB"
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]")
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            );
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // check permute
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted!
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Hashtable *permutations = new Hashtable(FALSE, status);
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    permutations->setValueDeleter(uprv_deleteUObject);
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString toPermute("ABC");
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    CanonicalIterator::permute(toPermute, FALSE, permutations, status);
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    logln("testing permutation");
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    expectEqual("Simple permutation ", "", collectionToString(permutations), "ABC, ACB, BAC, BCA, CAB, CBA");
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete permutations;
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // try samples
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    logln("testing samples");
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Hashtable *set = new Hashtable(FALSE, status);
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    set->setValueDeleter(uprv_deleteUObject);
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i = 0;
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    CanonicalIterator it("", status);
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U_SUCCESS(status)) {
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      for (i = 0; i < ARRAY_LENGTH(testArray); ++i) {
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          //logln("Results for: " + name.transliterate(testArray[i]));
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          UnicodeString testStr = CharsToUnicodeString(testArray[i][0]);
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          it.setSource(testStr, status);
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          set->removeAll();
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          for (;;) {
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              //UnicodeString *result = new UnicodeString(it.next());
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              UnicodeString result(it.next());
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              if (result.isBogus()) {
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                  break;
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              }
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              set->put(result, new UnicodeString(result), status); // Add result to the table
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              //logln(++counter + ": " + hex.transliterate(result));
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              //logln(" = " + name.transliterate(result));
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          }
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          expectEqual(i + UnicodeString(": "), testStr, collectionToString(set), CharsToUnicodeString(testArray[i][1]));
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      }
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      dataerrln("Couldn't instantiate canonical iterator. Error: %s", u_errorName(status));
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete set;
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid CanonicalIteratorTest::characterTest(UnicodeString &s, UChar32 ch, CanonicalIterator &it)
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode status = U_ZERO_ERROR;
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString decomp, comp;
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool gotDecomp = FALSE;
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool gotComp = FALSE;
1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool gotSource = FALSE;
1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Normalizer::decompose(s, FALSE, 0, decomp, status);
1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Normalizer::compose(s, FALSE, 0, comp, status);
1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // skip characters that don't have either decomp.
1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // need quick test for this!
1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (s == decomp && s == comp) {
1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    it.setSource(s, status);
1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (;;) {
1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString item = it.next();
1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (item.isBogus()) break;
1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (item == s) gotSource = TRUE;
1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (item == decomp) gotDecomp = TRUE;
1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (item == comp) gotComp = TRUE;
2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (!gotSource || !gotDecomp || !gotComp) {
2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("FAIL CanonicalIterator: " + s + (int)ch);
2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid CanonicalIteratorTest::expectEqual(const UnicodeString &message, const UnicodeString &item, const UnicodeString &a, const UnicodeString &b) {
2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (!(a==b)) {
2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("FAIL: " + message + getReadable(item));
2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("\t" + getReadable(a));
2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errln("\t" + getReadable(b));
2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        logln("Checked: " + message + getReadable(item));
2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        logln("\t" + getReadable(a));
2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        logln("\t" + getReadable(b));
2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString CanonicalIteratorTest::getReadable(const UnicodeString &s) {
2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UErrorCode status = U_ZERO_ERROR;
2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UnicodeString result = "[";
2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (s.length() == 0) return "";
2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // set up for readable display
2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_TRANSLITERATION
2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(verbose) {
2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      if (nameTrans == NULL)
2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org          nameTrans = Transliterator::createInstance("[^\\ -\\u007F] name", UTRANS_FORWARD, status);
2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      UnicodeString sName = s;
2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      nameTrans->transliterate(sName);
2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      result += sName;
2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      result += ";";
2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (hexTrans == NULL)
2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        hexTrans = Transliterator::createInstance("[^\\ -\\u007F] hex", UTRANS_FORWARD, status);
2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString sHex = s;
2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_TRANSLITERATION
2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(hexTrans) { // maybe there is no data and transliterator cannot be instantiated
2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      hexTrans->transliterate(sHex);
2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result += sHex;
2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result += "]";
2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return result;
2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //return "[" + (verbose ? name->transliterate(s) + "; " : "") + hex->transliterate(s) + "]";
2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC int U_CALLCONV
2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgcompareUnicodeStrings(const void *s1, const void *s2) {
2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UnicodeString **st1 = (UnicodeString **)s1;
2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UnicodeString **st2 = (UnicodeString **)s2;
2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  return (*st1)->compare(**st2);
2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString CanonicalIteratorTest::collectionToString(Hashtable *col) {
2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString result;
2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Iterate over the Hashtable, then qsort.
2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString **resArray = new UnicodeString*[col->count()];
2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t i = 0;
2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UHashElement *ne = NULL;
2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t el = -1;
2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //Iterator it = basic.iterator();
2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ne = col->nextElement(el);
2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //while (it.hasNext())
2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while (ne != NULL) {
2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      //String item = (String) it.next();
2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      UnicodeString *item = (UnicodeString *)(ne->value.pointer);
2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      resArray[i++] = item;
2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      ne = col->nextElement(el);
2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(i = 0; i<col->count(); ++i) {
2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      logln(*resArray[i]);
2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    qsort(resArray, col->count(), sizeof(UnicodeString *), compareUnicodeStrings);
2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    result = *resArray[0];
2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(i = 1; i<col->count(); ++i) {
2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      result += ", ";
2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      result += *resArray[i];
2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Iterator it = col.iterator();
2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while (it.hasNext()) {
2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (result.length() != 0) result.append(", ");
2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result.append(it.next().toString());
2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/
2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete [] resArray;
2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return result;
3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid CanonicalIteratorTest::TestAPI() {
3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UErrorCode status = U_ZERO_ERROR;
3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  // Test reset and getSource
3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UnicodeString start("ljubav");
3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  logln("Testing CanonicalIterator::getSource");
3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  logln("Instantiating canonical iterator with string "+start);
3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  CanonicalIterator can(start, status);
3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if (U_FAILURE(status)) {
3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      dataerrln("Error creating CanonicalIterator: %s", u_errorName(status));
3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      return;
3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UnicodeString source = can.getSource();
3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  logln("CanonicalIterator::getSource returned "+source);
3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(start != source) {
3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    errln("CanonicalIterator.getSource() didn't return the starting string. Expected "+start+", got "+source);
3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  logln("Testing CanonicalIterator::reset");
3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UnicodeString next = can.next();
3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  logln("CanonicalIterator::next returned "+next);
3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  can.reset();
3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  UnicodeString afterReset = can.next();
3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  logln("After reset, CanonicalIterator::next returned "+afterReset);
3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(next != afterReset) {
3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    errln("Next after instantiation ("+next+") is different from next after reset ("+afterReset+").");
3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  logln("Testing getStaticClassID and getDynamicClassID");
3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  if(can.getDynamicClassID() != CanonicalIterator::getStaticClassID()){
3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org      errln("RTTI failed for CanonicalIterator getDynamicClassID != getStaticClassID");
3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org  }
3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* #if !UCONFIG_NO_NORMALIZATION */
339