1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 2002-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************
6 *
7 * @author Mark E. Davis
8 * @author Vladimir Weinstein
9 */
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_NORMALIZATION
14
15#include "intltest.h"
16#include "cstring.h"
17#include "canittst.h"
18#include "unicode/caniter.h"
19#include "unicode/normlzr.h"
20#include "unicode/uchar.h"
21#include "hash.h"
22
23#define ARRAY_LENGTH(array) ((int32_t)(sizeof (array) / sizeof (*array)))
24
25#define CASE(id,test) case id:                          \
26                          name = #test;                 \
27                          if (exec) {                   \
28                              logln(#test "---");       \
29                              logln((UnicodeString)""); \
30                              test();                   \
31                          }                             \
32                          break
33
34void CanonicalIteratorTest::runIndexedTest(int32_t index, UBool exec,
35                                         const char* &name, char* /*par*/) {
36    switch (index) {
37        CASE(0, TestBasic);
38        CASE(1, TestExhaustive);
39        CASE(2, TestAPI);
40      default: name = ""; break;
41    }
42}
43
44/**
45 * Convert Java-style strings with \u Unicode escapes into UnicodeString objects
46static UnicodeString str(const char *input)
47{
48    UnicodeString str(input, ""); // Invariant conversion
49    return str.unescape();
50}
51 */
52
53
54CanonicalIteratorTest::CanonicalIteratorTest() :
55nameTrans(NULL), hexTrans(NULL)
56{
57}
58
59CanonicalIteratorTest::~CanonicalIteratorTest()
60{
61#if !UCONFIG_NO_TRANSLITERATION
62  if(nameTrans != NULL) {
63    delete(nameTrans);
64  }
65  if(hexTrans != NULL) {
66    delete(hexTrans);
67  }
68#endif
69}
70
71void CanonicalIteratorTest::TestExhaustive() {
72    UErrorCode status = U_ZERO_ERROR;
73    CanonicalIterator it("", status);
74    if (U_FAILURE(status)) {
75        dataerrln("Error creating CanonicalIterator: %s", u_errorName(status));
76        return;
77    }
78    UChar32 i = 0;
79    UnicodeString s;
80    // Test static and dynamic class IDs
81    if(it.getDynamicClassID() != CanonicalIterator::getStaticClassID()){
82        errln("CanonicalIterator::getStaticClassId ! = CanonicalIterator.getDynamicClassID");
83    }
84    for (i = 0; i < 0x10FFFF; quick?i+=0x10:++i) {
85        //for (i = 0xae00; i < 0xaf00; ++i) {
86
87        if ((i % 0x100) == 0) {
88            logln("Testing U+%06X", i);
89        }
90
91        // skip characters we know don't have decomps
92        int8_t type = u_charType(i);
93        if (type == U_UNASSIGNED || type == U_PRIVATE_USE_CHAR
94            || type == U_SURROGATE) continue;
95
96        s = i;
97        characterTest(s, i, it);
98
99        s += (UChar32)0x0345; //"\\u0345";
100        characterTest(s, i, it);
101    }
102}
103
104void CanonicalIteratorTest::TestBasic() {
105
106    UErrorCode status = U_ZERO_ERROR;
107
108    static const char * const testArray[][2] = {
109        {"\\u00C5d\\u0307\\u0327", "A\\u030Ad\\u0307\\u0327, A\\u030Ad\\u0327\\u0307, A\\u030A\\u1E0B\\u0327, "
110            "A\\u030A\\u1E11\\u0307, \\u00C5d\\u0307\\u0327, \\u00C5d\\u0327\\u0307, "
111            "\\u00C5\\u1E0B\\u0327, \\u00C5\\u1E11\\u0307, \\u212Bd\\u0307\\u0327, "
112            "\\u212Bd\\u0327\\u0307, \\u212B\\u1E0B\\u0327, \\u212B\\u1E11\\u0307"},
113        {"\\u010d\\u017E", "c\\u030Cz\\u030C, c\\u030C\\u017E, \\u010Dz\\u030C, \\u010D\\u017E"},
114        {"x\\u0307\\u0327", "x\\u0307\\u0327, x\\u0327\\u0307, \\u1E8B\\u0327"},
115    };
116
117#if 0
118    // This is not interesting for C/C++ as the data is already built beforehand
119    // check build
120    UnicodeSet ss = CanonicalIterator.getSafeStart();
121    logln("Safe Start: " + ss.toPattern(true));
122    ss = CanonicalIterator.getStarts('a');
123    expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'),
124        new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB"
125        + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]")
126            );
127#endif
128
129    // check permute
130    // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted!
131
132    Hashtable *permutations = new Hashtable(FALSE, status);
133    permutations->setValueDeleter(uprv_deleteUObject);
134    UnicodeString toPermute("ABC");
135
136    CanonicalIterator::permute(toPermute, FALSE, permutations, status);
137
138    logln("testing permutation");
139
140    expectEqual("Simple permutation ", "", collectionToString(permutations), "ABC, ACB, BAC, BCA, CAB, CBA");
141
142    delete permutations;
143
144    // try samples
145    logln("testing samples");
146    Hashtable *set = new Hashtable(FALSE, status);
147    set->setValueDeleter(uprv_deleteUObject);
148    int32_t i = 0;
149    CanonicalIterator it("", status);
150    if(U_SUCCESS(status)) {
151      for (i = 0; i < ARRAY_LENGTH(testArray); ++i) {
152          //logln("Results for: " + name.transliterate(testArray[i]));
153          UnicodeString testStr = CharsToUnicodeString(testArray[i][0]);
154          it.setSource(testStr, status);
155          set->removeAll();
156          for (;;) {
157              //UnicodeString *result = new UnicodeString(it.next());
158              UnicodeString result(it.next());
159              if (result.isBogus()) {
160                  break;
161              }
162              set->put(result, new UnicodeString(result), status); // Add result to the table
163              //logln(++counter + ": " + hex.transliterate(result));
164              //logln(" = " + name.transliterate(result));
165          }
166          expectEqual(i + UnicodeString(": "), testStr, collectionToString(set), CharsToUnicodeString(testArray[i][1]));
167
168      }
169    } else {
170      dataerrln("Couldn't instantiate canonical iterator. Error: %s", u_errorName(status));
171    }
172    delete set;
173}
174
175void CanonicalIteratorTest::characterTest(UnicodeString &s, UChar32 ch, CanonicalIterator &it)
176{
177    UErrorCode status = U_ZERO_ERROR;
178    UnicodeString decomp, comp;
179    UBool gotDecomp = FALSE;
180    UBool gotComp = FALSE;
181    UBool gotSource = FALSE;
182
183    Normalizer::decompose(s, FALSE, 0, decomp, status);
184    Normalizer::compose(s, FALSE, 0, comp, status);
185
186    // skip characters that don't have either decomp.
187    // need quick test for this!
188    if (s == decomp && s == comp) {
189        return;
190    }
191
192    it.setSource(s, status);
193
194    for (;;) {
195        UnicodeString item = it.next();
196        if (item.isBogus()) break;
197        if (item == s) gotSource = TRUE;
198        if (item == decomp) gotDecomp = TRUE;
199        if (item == comp) gotComp = TRUE;
200    }
201
202    if (!gotSource || !gotDecomp || !gotComp) {
203        errln("FAIL CanonicalIterator: " + s + (int)ch);
204    }
205}
206
207void CanonicalIteratorTest::expectEqual(const UnicodeString &message, const UnicodeString &item, const UnicodeString &a, const UnicodeString &b) {
208    if (!(a==b)) {
209        errln("FAIL: " + message + getReadable(item));
210        errln("\t" + getReadable(a));
211        errln("\t" + getReadable(b));
212    } else {
213        logln("Checked: " + message + getReadable(item));
214        logln("\t" + getReadable(a));
215        logln("\t" + getReadable(b));
216    }
217}
218
219UnicodeString CanonicalIteratorTest::getReadable(const UnicodeString &s) {
220  UErrorCode status = U_ZERO_ERROR;
221  UnicodeString result = "[";
222    if (s.length() == 0) return "";
223    // set up for readable display
224#if !UCONFIG_NO_TRANSLITERATION
225    if(verbose) {
226      if (nameTrans == NULL)
227          nameTrans = Transliterator::createInstance("[^\\ -\\u007F] name", UTRANS_FORWARD, status);
228      UnicodeString sName = s;
229      nameTrans->transliterate(sName);
230      result += sName;
231      result += ";";
232    }
233    if (hexTrans == NULL)
234        hexTrans = Transliterator::createInstance("[^\\ -\\u007F] hex", UTRANS_FORWARD, status);
235#endif
236    UnicodeString sHex = s;
237#if !UCONFIG_NO_TRANSLITERATION
238    if(hexTrans) { // maybe there is no data and transliterator cannot be instantiated
239      hexTrans->transliterate(sHex);
240    }
241#endif
242    result += sHex;
243    result += "]";
244    return result;
245    //return "[" + (verbose ? name->transliterate(s) + "; " : "") + hex->transliterate(s) + "]";
246}
247
248U_CFUNC int U_CALLCONV
249compareUnicodeStrings(const void *s1, const void *s2) {
250  UnicodeString **st1 = (UnicodeString **)s1;
251  UnicodeString **st2 = (UnicodeString **)s2;
252
253  return (*st1)->compare(**st2);
254}
255
256
257UnicodeString CanonicalIteratorTest::collectionToString(Hashtable *col) {
258    UnicodeString result;
259
260    // Iterate over the Hashtable, then qsort.
261
262    UnicodeString **resArray = new UnicodeString*[col->count()];
263    int32_t i = 0;
264
265    const UHashElement *ne = NULL;
266    int32_t el = -1;
267    //Iterator it = basic.iterator();
268    ne = col->nextElement(el);
269    //while (it.hasNext())
270    while (ne != NULL) {
271      //String item = (String) it.next();
272      UnicodeString *item = (UnicodeString *)(ne->value.pointer);
273      resArray[i++] = item;
274      ne = col->nextElement(el);
275    }
276
277    for(i = 0; i<col->count(); ++i) {
278      logln(*resArray[i]);
279    }
280
281    qsort(resArray, col->count(), sizeof(UnicodeString *), compareUnicodeStrings);
282
283    result = *resArray[0];
284
285    for(i = 1; i<col->count(); ++i) {
286      result += ", ";
287      result += *resArray[i];
288    }
289
290/*
291    Iterator it = col.iterator();
292    while (it.hasNext()) {
293        if (result.length() != 0) result.append(", ");
294        result.append(it.next().toString());
295    }
296*/
297
298    delete [] resArray;
299
300    return result;
301}
302
303void CanonicalIteratorTest::TestAPI() {
304  UErrorCode status = U_ZERO_ERROR;
305  // Test reset and getSource
306  UnicodeString start("ljubav");
307  logln("Testing CanonicalIterator::getSource");
308  logln("Instantiating canonical iterator with string "+start);
309  CanonicalIterator can(start, status);
310  if (U_FAILURE(status)) {
311      dataerrln("Error creating CanonicalIterator: %s", u_errorName(status));
312      return;
313  }
314  UnicodeString source = can.getSource();
315  logln("CanonicalIterator::getSource returned "+source);
316  if(start != source) {
317    errln("CanonicalIterator.getSource() didn't return the starting string. Expected "+start+", got "+source);
318  }
319  logln("Testing CanonicalIterator::reset");
320  UnicodeString next = can.next();
321  logln("CanonicalIterator::next returned "+next);
322
323  can.reset();
324
325  UnicodeString afterReset = can.next();
326  logln("After reset, CanonicalIterator::next returned "+afterReset);
327
328  if(next != afterReset) {
329    errln("Next after instantiation ("+next+") is different from next after reset ("+afterReset+").");
330  }
331
332  logln("Testing getStaticClassID and getDynamicClassID");
333  if(can.getDynamicClassID() != CanonicalIterator::getStaticClassID()){
334      errln("RTTI failed for CanonicalIterator getDynamicClassID != getStaticClassID");
335  }
336}
337
338#endif /* #if !UCONFIG_NO_NORMALIZATION */
339