1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/******************************************************************** 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * COPYRIGHT: 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Copyright (c) 2002-2010, International Business Machines Corporation and 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * others. All Rights Reserved. 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ******************************************************************** 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @author Mark E. Davis 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @author Vladimir Weinstein 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_NORMALIZATION 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "intltest.h" 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cstring.h" 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "canittst.h" 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/caniter.h" 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/normlzr.h" 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uchar.h" 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "hash.h" 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define ARRAY_LENGTH(array) ((int32_t)(sizeof (array) / sizeof (*array))) 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define CASE(id,test) case id: \ 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) name = #test; \ 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (exec) { \ 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln(#test "---"); \ 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln((UnicodeString)""); \ 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) test(); \ 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } \ 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void CanonicalIteratorTest::runIndexedTest(int32_t index, UBool exec, 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char* &name, char* /*par*/) { 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) switch (index) { 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CASE(0, TestBasic); 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CASE(1, TestExhaustive); 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CASE(2, TestAPI); 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) default: name = ""; break; 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Convert Java-style strings with \u Unicode escapes into UnicodeString objects 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UnicodeString str(const char *input) 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString str(input, ""); // Invariant conversion 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return str.unescape(); 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)CanonicalIteratorTest::CanonicalIteratorTest() : 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)nameTrans(NULL), hexTrans(NULL) 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)CanonicalIteratorTest::~CanonicalIteratorTest() 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_TRANSLITERATION 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(nameTrans != NULL) { 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete(nameTrans); 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(hexTrans != NULL) { 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete(hexTrans); 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void CanonicalIteratorTest::TestExhaustive() { 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CanonicalIterator it("", status); 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dataerrln("Error creating CanonicalIterator: %s", u_errorName(status)); 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 i = 0; 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString s; 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Test static and dynamic class IDs 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(it.getDynamicClassID() != CanonicalIterator::getStaticClassID()){ 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("CanonicalIterator::getStaticClassId ! = CanonicalIterator.getDynamicClassID"); 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i = 0; i < 0x10FFFF; quick?i+=0x10:++i) { 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //for (i = 0xae00; i < 0xaf00; ++i) { 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((i % 0x100) == 0) { 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("Testing U+%06X", i); 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // skip characters we know don't have decomps 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int8_t type = u_charType(i); 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (type == U_UNASSIGNED || type == U_PRIVATE_USE_CHAR 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) || type == U_SURROGATE) continue; 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s = i; 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) characterTest(s, i, it); 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s += (UChar32)0x0345; //"\\u0345"; 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) characterTest(s, i, it); 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void CanonicalIteratorTest::TestBasic() { 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static const char * const testArray[][2] = { 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) {"\\u00C5d\\u0307\\u0327", "A\\u030Ad\\u0307\\u0327, A\\u030Ad\\u0327\\u0307, A\\u030A\\u1E0B\\u0327, " 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "A\\u030A\\u1E11\\u0307, \\u00C5d\\u0307\\u0327, \\u00C5d\\u0327\\u0307, " 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u00C5\\u1E0B\\u0327, \\u00C5\\u1E11\\u0307, \\u212Bd\\u0307\\u0327, " 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) "\\u212Bd\\u0327\\u0307, \\u212B\\u1E0B\\u0327, \\u212B\\u1E11\\u0307"}, 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) {"\\u010d\\u017E", "c\\u030Cz\\u030C, c\\u030C\\u017E, \\u010Dz\\u030C, \\u010D\\u017E"}, 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) {"x\\u0307\\u0327", "x\\u0307\\u0327, x\\u0327\\u0307, \\u1E8B\\u0327"}, 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) }; 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // This is not interesting for C/C++ as the data is already built beforehand 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // check build 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet ss = CanonicalIterator.getSafeStart(); 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("Safe Start: " + ss.toPattern(true)); 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ss = CanonicalIterator.getStarts('a'); 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'), 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB" 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) + "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]") 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ); 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // check permute 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted! 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Hashtable *permutations = new Hashtable(FALSE, status); 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) permutations->setValueDeleter(uhash_deleteUnicodeString); 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString toPermute("ABC"); 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CanonicalIterator::permute(toPermute, FALSE, permutations, status); 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("testing permutation"); 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expectEqual("Simple permutation ", "", collectionToString(permutations), "ABC, ACB, BAC, BCA, CAB, CBA"); 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete permutations; 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // try samples 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("testing samples"); 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Hashtable *set = new Hashtable(FALSE, status); 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) set->setValueDeleter(uhash_deleteUnicodeString); 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i = 0; 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CanonicalIterator it("", status); 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_SUCCESS(status)) { 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i = 0; i < ARRAY_LENGTH(testArray); ++i) { 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //logln("Results for: " + name.transliterate(testArray[i])); 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString testStr = CharsToUnicodeString(testArray[i][0]); 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) it.setSource(testStr, status); 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) set->removeAll(); 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (;;) { 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //UnicodeString *result = new UnicodeString(it.next()); 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString result(it.next()); 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (result.isBogus()) { 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) set->put(result, new UnicodeString(result), status); // Add result to the table 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //logln(++counter + ": " + hex.transliterate(result)); 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //logln(" = " + name.transliterate(result)); 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expectEqual(i + ": ", testStr, collectionToString(set), CharsToUnicodeString(testArray[i][1])); 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dataerrln("Couldn't instantiate canonical iterator. Error: %s", u_errorName(status)); 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete set; 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void CanonicalIteratorTest::characterTest(UnicodeString &s, UChar32 ch, CanonicalIterator &it) 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString decomp, comp; 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool gotDecomp = FALSE; 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool gotComp = FALSE; 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool gotSource = FALSE; 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Normalizer::decompose(s, FALSE, 0, decomp, status); 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Normalizer::compose(s, FALSE, 0, comp, status); 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // skip characters that don't have either decomp. 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // need quick test for this! 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (s == decomp && s == comp) { 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) it.setSource(s, status); 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (;;) { 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString item = it.next(); 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (item.isBogus()) break; 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (item == s) gotSource = TRUE; 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (item == decomp) gotDecomp = TRUE; 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (item == comp) gotComp = TRUE; 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (!gotSource || !gotDecomp || !gotComp) { 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("FAIL CanonicalIterator: " + s + (int)ch); 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void CanonicalIteratorTest::expectEqual(const UnicodeString &message, const UnicodeString &item, const UnicodeString &a, const UnicodeString &b) { 208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (!(a==b)) { 209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("FAIL: " + message + getReadable(item)); 210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("\t" + getReadable(a)); 211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("\t" + getReadable(b)); 212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("Checked: " + message + getReadable(item)); 214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("\t" + getReadable(a)); 215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("\t" + getReadable(b)); 216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString CanonicalIteratorTest::getReadable(const UnicodeString &s) { 220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString result = "["; 222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (s.length() == 0) return ""; 223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // set up for readable display 224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_TRANSLITERATION 225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(verbose) { 226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (nameTrans == NULL) 227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nameTrans = Transliterator::createInstance("[^\\ -\\u007F] name", UTRANS_FORWARD, status); 228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString sName = s; 229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nameTrans->transliterate(sName); 230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result += sName; 231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result += ";"; 232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (hexTrans == NULL) 234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) hexTrans = Transliterator::createInstance("[^\\ -\\u007F] hex", UTRANS_FORWARD, status); 235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString sHex = s; 237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_TRANSLITERATION 238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(hexTrans) { // maybe there is no data and transliterator cannot be instantiated 239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) hexTrans->transliterate(sHex); 240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result += sHex; 243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result += "]"; 244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //return "[" + (verbose ? name->transliterate(s) + "; " : "") + hex->transliterate(s) + "]"; 246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC int U_CALLCONV 249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)compareUnicodeStrings(const void *s1, const void *s2) { 250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString **st1 = (UnicodeString **)s1; 251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString **st2 = (UnicodeString **)s2; 252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (*st1)->compare(**st2); 254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString CanonicalIteratorTest::collectionToString(Hashtable *col) { 258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString result; 259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Iterate over the Hashtable, then qsort. 261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString **resArray = new UnicodeString*[col->count()]; 263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i = 0; 264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UHashElement *ne = NULL; 266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t el = -1; 267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //Iterator it = basic.iterator(); 268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ne = col->nextElement(el); 269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //while (it.hasNext()) 270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (ne != NULL) { 271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //String item = (String) it.next(); 272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString *item = (UnicodeString *)(ne->value.pointer); 273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) resArray[i++] = item; 274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ne = col->nextElement(el); 275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i = 0; i<col->count(); ++i) { 278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln(*resArray[i]); 279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) qsort(resArray, col->count(), sizeof(UnicodeString *), compareUnicodeStrings); 282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = *resArray[0]; 284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i = 1; i<col->count(); ++i) { 286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result += ", "; 287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result += *resArray[i]; 288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) Iterator it = col.iterator(); 292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (it.hasNext()) { 293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (result.length() != 0) result.append(", "); 294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result.append(it.next().toString()); 295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete [] resArray; 299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void CanonicalIteratorTest::TestAPI() { 304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode status = U_ZERO_ERROR; 305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Test reset and getSource 306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString start("ljubav"); 307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("Testing CanonicalIterator::getSource"); 308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("Instantiating canonical iterator with string "+start); 309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CanonicalIterator can(start, status); 310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dataerrln("Error creating CanonicalIterator: %s", u_errorName(status)); 312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString source = can.getSource(); 315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("CanonicalIterator::getSource returned "+source); 316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(start != source) { 317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("CanonicalIterator.getSource() didn't return the starting string. Expected "+start+", got "+source); 318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("Testing CanonicalIterator::reset"); 320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString next = can.next(); 321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("CanonicalIterator::next returned "+next); 322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) can.reset(); 324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString afterReset = can.next(); 326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("After reset, CanonicalIterator::next returned "+afterReset); 327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(next != afterReset) { 329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("Next after instantiation ("+next+") is different from next after reset ("+afterReset+")."); 330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) logln("Testing getStaticClassID and getDynamicClassID"); 333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(can.getDynamicClassID() != CanonicalIterator::getStaticClassID()){ 334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errln("RTTI failed for CanonicalIterator getDynamicClassID != getStaticClassID"); 335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* #if !UCONFIG_NO_NORMALIZATION */ 339