1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/********************************************************************
2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * COPYRIGHT:
3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Copyright (c) 2002-2010, International Business Machines Corporation and
4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * others. All Rights Reserved.
5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ********************************************************************
6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @author Mark E. Davis
8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @author Vladimir Weinstein
9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h"
12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_NORMALIZATION
14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "intltest.h"
16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cstring.h"
17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "canittst.h"
18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/caniter.h"
19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/normlzr.h"
20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uchar.h"
21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "hash.h"
22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define ARRAY_LENGTH(array) ((int32_t)(sizeof (array) / sizeof (*array)))
24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define CASE(id,test) case id:                          \
26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          name = #test;                 \
27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          if (exec) {                   \
28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              logln(#test "---");       \
29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              logln((UnicodeString)""); \
30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              test();                   \
31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          }                             \
32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          break
33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void CanonicalIteratorTest::runIndexedTest(int32_t index, UBool exec,
35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                         const char* &name, char* /*par*/) {
36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    switch (index) {
37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(0, TestBasic);
38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(1, TestExhaustive);
39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(2, TestAPI);
40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      default: name = ""; break;
41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Convert Java-style strings with \u Unicode escapes into UnicodeString objects
46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UnicodeString str(const char *input)
47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString str(input, ""); // Invariant conversion
49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return str.unescape();
50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)CanonicalIteratorTest::CanonicalIteratorTest() :
55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)nameTrans(NULL), hexTrans(NULL)
56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)CanonicalIteratorTest::~CanonicalIteratorTest()
60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_TRANSLITERATION
62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  if(nameTrans != NULL) {
63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete(nameTrans);
64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  }
65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  if(hexTrans != NULL) {
66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete(hexTrans);
67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  }
68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void CanonicalIteratorTest::TestExhaustive() {
72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    CanonicalIterator it("", status);
74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dataerrln("Error creating CanonicalIterator: %s", u_errorName(status));
76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 i = 0;
79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString s;
80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Test static and dynamic class IDs
81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(it.getDynamicClassID() != CanonicalIterator::getStaticClassID()){
82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("CanonicalIterator::getStaticClassId ! = CanonicalIterator.getDynamicClassID");
83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i = 0; i < 0x10FFFF; quick?i+=0x10:++i) {
85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //for (i = 0xae00; i < 0xaf00; ++i) {
86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if ((i % 0x100) == 0) {
88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            logln("Testing U+%06X", i);
89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // skip characters we know don't have decomps
92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int8_t type = u_charType(i);
93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (type == U_UNASSIGNED || type == U_PRIVATE_USE_CHAR
94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            || type == U_SURROGATE) continue;
95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        s = i;
97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        characterTest(s, i, it);
98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        s += (UChar32)0x0345; //"\\u0345";
100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        characterTest(s, i, it);
101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void CanonicalIteratorTest::TestBasic() {
105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char * const testArray[][2] = {
109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        {"\\u00C5d\\u0307\\u0327", "A\\u030Ad\\u0307\\u0327, A\\u030Ad\\u0327\\u0307, A\\u030A\\u1E0B\\u0327, "
110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            "A\\u030A\\u1E11\\u0307, \\u00C5d\\u0307\\u0327, \\u00C5d\\u0327\\u0307, "
111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            "\\u00C5\\u1E0B\\u0327, \\u00C5\\u1E11\\u0307, \\u212Bd\\u0307\\u0327, "
112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            "\\u212Bd\\u0327\\u0307, \\u212B\\u1E0B\\u0327, \\u212B\\u1E11\\u0307"},
113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        {"\\u010d\\u017E", "c\\u030Cz\\u030C, c\\u030C\\u017E, \\u010Dz\\u030C, \\u010D\\u017E"},
114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        {"x\\u0307\\u0327", "x\\u0307\\u0327, x\\u0327\\u0307, \\u1E8B\\u0327"},
115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0
118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // This is not interesting for C/C++ as the data is already built beforehand
119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // check build
120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet ss = CanonicalIterator.getSafeStart();
121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    logln("Safe Start: " + ss.toPattern(true));
122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ss = CanonicalIterator.getStarts('a');
123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'),
124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB"
125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]")
126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            );
127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // check permute
130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted!
131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Hashtable *permutations = new Hashtable(FALSE, status);
133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    permutations->setValueDeleter(uhash_deleteUnicodeString);
134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString toPermute("ABC");
135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    CanonicalIterator::permute(toPermute, FALSE, permutations, status);
137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    logln("testing permutation");
139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectEqual("Simple permutation ", "", collectionToString(permutations), "ABC, ACB, BAC, BCA, CAB, CBA");
141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete permutations;
143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // try samples
145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    logln("testing samples");
146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Hashtable *set = new Hashtable(FALSE, status);
147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set->setValueDeleter(uhash_deleteUnicodeString);
148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i = 0;
149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    CanonicalIterator it("", status);
150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_SUCCESS(status)) {
151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      for (i = 0; i < ARRAY_LENGTH(testArray); ++i) {
152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          //logln("Results for: " + name.transliterate(testArray[i]));
153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          UnicodeString testStr = CharsToUnicodeString(testArray[i][0]);
154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          it.setSource(testStr, status);
155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          set->removeAll();
156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          for (;;) {
157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              //UnicodeString *result = new UnicodeString(it.next());
158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              UnicodeString result(it.next());
159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              if (result.isBogus()) {
160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  break;
161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              }
162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              set->put(result, new UnicodeString(result), status); // Add result to the table
163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              //logln(++counter + ": " + hex.transliterate(result));
164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              //logln(" = " + name.transliterate(result));
165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          }
166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          expectEqual(i + ": ", testStr, collectionToString(set), CharsToUnicodeString(testArray[i][1]));
167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      }
169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      dataerrln("Couldn't instantiate canonical iterator. Error: %s", u_errorName(status));
171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete set;
173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void CanonicalIteratorTest::characterTest(UnicodeString &s, UChar32 ch, CanonicalIterator &it)
176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString decomp, comp;
179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool gotDecomp = FALSE;
180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool gotComp = FALSE;
181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool gotSource = FALSE;
182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Normalizer::decompose(s, FALSE, 0, decomp, status);
184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Normalizer::compose(s, FALSE, 0, comp, status);
185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // skip characters that don't have either decomp.
187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // need quick test for this!
188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (s == decomp && s == comp) {
189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    it.setSource(s, status);
193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (;;) {
195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString item = it.next();
196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (item.isBogus()) break;
197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (item == s) gotSource = TRUE;
198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (item == decomp) gotDecomp = TRUE;
199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (item == comp) gotComp = TRUE;
200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (!gotSource || !gotDecomp || !gotComp) {
203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL CanonicalIterator: " + s + (int)ch);
204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void CanonicalIteratorTest::expectEqual(const UnicodeString &message, const UnicodeString &item, const UnicodeString &a, const UnicodeString &b) {
208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (!(a==b)) {
209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: " + message + getReadable(item));
210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("\t" + getReadable(a));
211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("\t" + getReadable(b));
212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln("Checked: " + message + getReadable(item));
214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln("\t" + getReadable(a));
215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln("\t" + getReadable(b));
216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString CanonicalIteratorTest::getReadable(const UnicodeString &s) {
220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  UErrorCode status = U_ZERO_ERROR;
221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  UnicodeString result = "[";
222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (s.length() == 0) return "";
223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // set up for readable display
224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_TRANSLITERATION
225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(verbose) {
226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      if (nameTrans == NULL)
227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          nameTrans = Transliterator::createInstance("[^\\ -\\u007F] name", UTRANS_FORWARD, status);
228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      UnicodeString sName = s;
229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      nameTrans->transliterate(sName);
230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      result += sName;
231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      result += ";";
232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (hexTrans == NULL)
234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        hexTrans = Transliterator::createInstance("[^\\ -\\u007F] hex", UTRANS_FORWARD, status);
235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString sHex = s;
237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_TRANSLITERATION
238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(hexTrans) { // maybe there is no data and transliterator cannot be instantiated
239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      hexTrans->transliterate(sHex);
240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    result += sHex;
243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    result += "]";
244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return result;
245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //return "[" + (verbose ? name->transliterate(s) + "; " : "") + hex->transliterate(s) + "]";
246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC int U_CALLCONV
249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)compareUnicodeStrings(const void *s1, const void *s2) {
250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  UnicodeString **st1 = (UnicodeString **)s1;
251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  UnicodeString **st2 = (UnicodeString **)s2;
252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  return (*st1)->compare(**st2);
254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString CanonicalIteratorTest::collectionToString(Hashtable *col) {
258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString result;
259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Iterate over the Hashtable, then qsort.
261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString **resArray = new UnicodeString*[col->count()];
263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i = 0;
264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UHashElement *ne = NULL;
266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t el = -1;
267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //Iterator it = basic.iterator();
268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ne = col->nextElement(el);
269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //while (it.hasNext())
270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (ne != NULL) {
271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      //String item = (String) it.next();
272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      UnicodeString *item = (UnicodeString *)(ne->value.pointer);
273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      resArray[i++] = item;
274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      ne = col->nextElement(el);
275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i = 0; i<col->count(); ++i) {
278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      logln(*resArray[i]);
279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    qsort(resArray, col->count(), sizeof(UnicodeString *), compareUnicodeStrings);
282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    result = *resArray[0];
284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i = 1; i<col->count(); ++i) {
286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      result += ", ";
287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      result += *resArray[i];
288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Iterator it = col.iterator();
292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while (it.hasNext()) {
293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (result.length() != 0) result.append(", ");
294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        result.append(it.next().toString());
295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/
297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete [] resArray;
299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return result;
301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void CanonicalIteratorTest::TestAPI() {
304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  UErrorCode status = U_ZERO_ERROR;
305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  // Test reset and getSource
306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  UnicodeString start("ljubav");
307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  logln("Testing CanonicalIterator::getSource");
308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  logln("Instantiating canonical iterator with string "+start);
309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  CanonicalIterator can(start, status);
310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  if (U_FAILURE(status)) {
311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      dataerrln("Error creating CanonicalIterator: %s", u_errorName(status));
312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      return;
313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  }
314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  UnicodeString source = can.getSource();
315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  logln("CanonicalIterator::getSource returned "+source);
316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  if(start != source) {
317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    errln("CanonicalIterator.getSource() didn't return the starting string. Expected "+start+", got "+source);
318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  }
319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  logln("Testing CanonicalIterator::reset");
320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  UnicodeString next = can.next();
321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  logln("CanonicalIterator::next returned "+next);
322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  can.reset();
324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  UnicodeString afterReset = can.next();
326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  logln("After reset, CanonicalIterator::next returned "+afterReset);
327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  if(next != afterReset) {
329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    errln("Next after instantiation ("+next+") is different from next after reset ("+afterReset+").");
330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  }
331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  logln("Testing getStaticClassID and getDynamicClassID");
333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  if(can.getDynamicClassID() != CanonicalIterator::getStaticClassID()){
334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      errln("RTTI failed for CanonicalIterator getDynamicClassID != getStaticClassID");
335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  }
336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* #if !UCONFIG_NO_NORMALIZATION */
339