1fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/* 2fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2012-2014, International Business Machines 4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others. All Rights Reserved. 5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationtest.cpp 7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2012apr27 9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer 10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/ 11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h" 13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION 15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/coll.h" 17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/errorcode.h" 18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/localpointer.h" 19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/normalizer2.h" 20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/sortkey.h" 21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/std_string.h" 22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/strenum.h" 23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/tblcoll.h" 24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uiter.h" 25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uniset.h" 26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/unistr.h" 27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/usetiter.h" 28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/ustring.h" 29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "charstr.h" 30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cmemory.h" 31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h" 32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h" 33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationfcd.h" 34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationiterator.h" 35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationroot.h" 36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationrootelements.h" 37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationruleparser.h" 38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationweights.h" 39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cstring.h" 40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "intltest.h" 41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "normalizer2impl.h" 42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "ucbuf.h" 43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uhash.h" 44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uitercollationiterator.h" 45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "utf16collationiterator.h" 46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "utf8collationiterator.h" 47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr32.h" 48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr64.h" 49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "writesrc.h" 50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius// TODO: Move to ucbuf.h 54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_DEFINE_LOCAL_OPEN_POINTER(LocalUCHARBUFPointer, UCHARBUF, ucbuf_close); 55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass CodePointIterator; 57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius// TODO: try to share code with IntlTestCollator; for example, prettify(CollationKey) 59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass CollationTest : public IntlTest { 61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic: 62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationTest() 63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius : fcd(NULL), nfd(NULL), 64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fileLineNumber(0), 65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius coll(NULL) {} 66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ~CollationTest() { 68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius delete coll; 69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL); 72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void TestMinMax(); 74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void TestImplicits(); 75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void TestNulTerminated(); 76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void TestIllegalUTF8(); 77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void TestShortFCDData(); 78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void TestFCD(); 79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void TestCollationWeights(); 80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void TestRootElements(); 81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void TestTailoredElements(); 82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void TestDataDriven(); 83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate: 85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void checkFCD(const char *name, CollationIterator &ci, CodePointIterator &cpi); 86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void checkAllocWeights(CollationWeights &cw, 87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t lowerLimit, uint32_t upperLimit, int32_t n, 88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t someLength, int32_t minCount); 89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static UnicodeString printSortKey(const uint8_t *p, int32_t length); 91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static UnicodeString printCollationKey(const CollationKey &key); 92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Helpers & fields for data-driven test. 94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static UBool isCROrLF(UChar c) { return c == 0xa || c == 0xd; } 95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static UBool isSpace(UChar c) { return c == 9 || c == 0x20 || c == 0x3000; } 96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static UBool isSectionStarter(UChar c) { return c == 0x25 || c == 0x2a || c == 0x40; } // %*@ 97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t skipSpaces(int32_t i) { 98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(isSpace(fileLine[i])) { ++i; } 99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return i; 100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool readLine(UCHARBUF *f, IcuTestErrorCode &errorCode); 103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void parseString(int32_t &start, UnicodeString &prefix, UnicodeString &s, UErrorCode &errorCode); 104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Collation::Level parseRelationAndString(UnicodeString &s, IcuTestErrorCode &errorCode); 105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void parseAndSetAttribute(IcuTestErrorCode &errorCode); 106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void parseAndSetReorderCodes(int32_t start, IcuTestErrorCode &errorCode); 107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void buildTailoring(UCHARBUF *f, IcuTestErrorCode &errorCode); 108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void setRootCollator(IcuTestErrorCode &errorCode); 109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void setLocaleCollator(IcuTestErrorCode &errorCode); 110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool needsNormalization(const UnicodeString &s, UErrorCode &errorCode) const; 112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool getSortKeyParts(const UChar *s, int32_t length, 114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CharString &dest, int32_t partSize, 115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius IcuTestErrorCode &errorCode); 116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool getCollationKey(const char *norm, const UnicodeString &line, 117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *s, int32_t length, 118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationKey &key, IcuTestErrorCode &errorCode); 119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool checkCompareTwo(const char *norm, const UnicodeString &prevFileLine, 120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UnicodeString &prevString, const UnicodeString &s, 121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCollationResult expectedOrder, Collation::Level expectedLevel, 122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius IcuTestErrorCode &errorCode); 123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void checkCompareStrings(UCHARBUF *f, IcuTestErrorCode &errorCode); 124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const Normalizer2 *fcd, *nfd; 126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString fileLine; 127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t fileLineNumber; 128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString fileTestName; 129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Collator *coll; 130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusextern IntlTest *createCollationTest() { 133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return new CollationTest(); 134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { 137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(exec) { 138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius logln("TestSuite CollationTest: "); 139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius TESTCASE_AUTO_BEGIN; 141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius TESTCASE_AUTO(TestMinMax); 142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius TESTCASE_AUTO(TestImplicits); 143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius TESTCASE_AUTO(TestNulTerminated); 144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius TESTCASE_AUTO(TestIllegalUTF8); 145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius TESTCASE_AUTO(TestShortFCDData); 146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius TESTCASE_AUTO(TestFCD); 147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius TESTCASE_AUTO(TestCollationWeights); 148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius TESTCASE_AUTO(TestRootElements); 149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius TESTCASE_AUTO(TestTailoredElements); 150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius TESTCASE_AUTO(TestDataDriven); 151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius TESTCASE_AUTO_END; 152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestMinMax() { 155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius IcuTestErrorCode errorCode(*this, "TestMinMax"); 156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setRootCollator(errorCode); 158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isFailure()) { 159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.reset(); 160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(coll); 163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(rbc == NULL) { 164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("the root collator is not a RuleBasedCollator"); 165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const UChar s[2] = { 0xfffe, 0xffff }; 169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UVector64 ces(errorCode); 170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rbc->internalGetCEs(UnicodeString(FALSE, s, 2), ces, errorCode); 171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.assertSuccess(); 172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(ces.size() != 2) { 173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("expected 2 CEs for <FFFE, FFFF>, got %d", (int)ces.size()); 174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ce = ces.elementAti(0); 177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t expected = 178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ((int64_t)Collation::MERGE_SEPARATOR_PRIMARY << 32) | 179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Collation::MERGE_SEPARATOR_LOWER32; 180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(ce != expected) { 181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("CE(U+fffe)=%04lx != 02.02.02", (long)ce); 182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ce = ces.elementAti(1); 185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius expected = Collation::makeCE(Collation::MAX_PRIMARY); 186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(ce != expected) { 187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("CE(U+ffff)=%04lx != max..", (long)ce); 188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestImplicits() { 192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius IcuTestErrorCode errorCode(*this, "TestImplicits"); 193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CollationData *cd = CollationRoot::getData(errorCode); 195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.logDataIfFailureAndReset("CollationRoot::getBaseData()")) { 196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Implicit primary weights should be assigned for the following sets, 200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // and sort in ascending order by set and then code point. 201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // See http://www.unicode.org/reports/tr10/#Implicit_Weights 202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // core Han Unified Ideographs 203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSet coreHan("[\\p{unified_ideograph}&" 204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "[\\p{Block=CJK_Unified_Ideographs}" 205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "\\p{Block=CJK_Compatibility_Ideographs}]]", 206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode); 207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // all other Unified Han ideographs 208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSet otherHan("[\\p{unified ideograph}-" 209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "[\\p{Block=CJK_Unified_Ideographs}" 210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "\\p{Block=CJK_Compatibility_Ideographs}]]", 211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode); 212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSet unassigned("[[:Cn:][:Cs:][:Co:]]", errorCode); 213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius unassigned.remove(0xfffe, 0xffff); // These have special CLDR root mappings. 214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.logIfFailureAndReset("UnicodeSet")) { 215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UnicodeSet *sets[] = { &coreHan, &otherHan, &unassigned }; 218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 prev = 0; 219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t prevPrimary = 0; 220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UTF16CollationIterator ci(cd, FALSE, NULL, NULL, NULL); 221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t i = 0; i < LENGTHOF(sets); ++i) { 222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius LocalPointer<UnicodeSetIterator> iter(new UnicodeSetIterator(*sets[i])); 223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(iter->next()) { 224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c = iter->getCodepoint(); 225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString s(c); 226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ci.setText(s.getBuffer(), s.getBuffer() + s.length()); 227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ce = ci.nextCE(errorCode); 228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ce2 = ci.nextCE(errorCode); 229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.logIfFailureAndReset("CollationIterator.nextCE()")) { 230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(ce == Collation::NO_CE || ce2 != Collation::NO_CE) { 233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("CollationIterator.nextCE(U+%04lx) did not yield exactly one CE", (long)c); 234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius continue; 235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((ce & 0xffffffff) != Collation::COMMON_SEC_AND_TER_CE) { 237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("CollationIterator.nextCE(U+%04lx) has non-common sec/ter weights: %08lx", 238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (long)c, (long)(ce & 0xffffffff)); 239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius continue; 240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t primary = (uint32_t)(ce >> 32); 242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!(primary > prevPrimary)) { 243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("CE(U+%04lx)=%04lx.. not greater than CE(U+%04lx)=%04lx..", 244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (long)c, (long)primary, (long)prev, (long)prevPrimary); 245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius prev = c; 247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius prevPrimary = primary; 248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestNulTerminated() { 253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius IcuTestErrorCode errorCode(*this, "TestNulTerminated"); 254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CollationData *data = CollationRoot::getData(errorCode); 255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.logDataIfFailureAndReset("CollationRoot::getData()")) { 256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const UChar s[] = { 0x61, 0x62, 0x61, 0x62, 0 }; 260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UTF16CollationIterator ci1(data, FALSE, s, s, s + 2); 262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UTF16CollationIterator ci2(data, FALSE, s + 2, s + 2, NULL); 263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t i = 0;; ++i) { 264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ce1 = ci1.nextCE(errorCode); 265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ce2 = ci2.nextCE(errorCode); 266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.logIfFailureAndReset("CollationIterator.nextCE()")) { 267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(ce1 != ce2) { 270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("CollationIterator.nextCE(with length) != nextCE(NUL-terminated) at CE %d", (int)i); 271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(ce1 == Collation::NO_CE) { break; } 274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 275fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 276fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestIllegalUTF8() { 278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius IcuTestErrorCode errorCode(*this, "TestIllegalUTF8"); 279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setRootCollator(errorCode); 281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isFailure()) { 282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.reset(); 283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius coll->setAttribute(UCOL_STRENGTH, UCOL_IDENTICAL, errorCode); 286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const char *strings[] = { 288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // U+FFFD 289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "a\xef\xbf\xbdz", 290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // illegal byte sequences 291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "a\x80z", // trail byte 292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "a\xc1\x81z", // non-shortest form 293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "a\xe0\x82\x83z", // non-shortest form 294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "a\xed\xa0\x80z", // lead surrogate: would be U+D800 295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "a\xed\xbf\xbfz", // trail surrogate: would be U+DFFF 296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "a\xf0\x8f\xbf\xbfz", // non-shortest form 297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "a\xf4\x90\x80\x80z" // out of range: would be U+110000 298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius }; 299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius StringPiece fffd(strings[0]); 301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t i = 1; i < LENGTHOF(strings); ++i) { 302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius StringPiece illegal(strings[i]); 303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCollationResult order = coll->compareUTF8(fffd, illegal, errorCode); 304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(order != UCOL_EQUAL) { 305fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("compareUTF8(U+FFFD, string %d with illegal UTF-8)=%d != UCOL_EQUAL", 306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (int)i, order); 307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace { 312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid addLeadSurrogatesForSupplementary(const UnicodeSet &src, UnicodeSet &dest) { 314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(UChar32 c = 0x10000; c < 0x110000;) { 315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 next = c + 0x400; 316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(src.containsSome(c, next - 1)) { 317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dest.add(U16_LEAD(c)); 318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius c = next; 320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 321fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 322fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} // namespace 324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 325fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestShortFCDData() { 326fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // See CollationFCD class comments. 327fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius IcuTestErrorCode errorCode(*this, "TestShortFCDData"); 328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSet expectedLccc("[:^lccc=0:]", errorCode); 329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.assertSuccess(); 330fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius expectedLccc.add(0xdc00, 0xdfff); // add all trail surrogates 331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addLeadSurrogatesForSupplementary(expectedLccc, expectedLccc); 332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSet lccc; // actual 333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(UChar32 c = 0; c <= 0xffff; ++c) { 334fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(CollationFCD::hasLccc(c)) { lccc.add(c); } 335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSet diff(expectedLccc); 337fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius diff.removeAll(lccc); 338fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius diff.remove(0x10000, 0x10ffff); // hasLccc() only works for the BMP 339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString empty("[]"); 340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString diffString; 341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius diff.toPattern(diffString, TRUE); 342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius assertEquals("CollationFCD::hasLccc() expected-actual", empty, diffString); 343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius diff = lccc; 344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius diff.removeAll(expectedLccc); 345fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius diff.toPattern(diffString, TRUE); 346fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius assertEquals("CollationFCD::hasLccc() actual-expected", empty, diffString, TRUE); 347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSet expectedTccc("[:^tccc=0:]", errorCode); 349fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (errorCode.isSuccess()) { 350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addLeadSurrogatesForSupplementary(expectedLccc, expectedTccc); 351fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius addLeadSurrogatesForSupplementary(expectedTccc, expectedTccc); 352fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSet tccc; // actual 353fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(UChar32 c = 0; c <= 0xffff; ++c) { 354fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(CollationFCD::hasTccc(c)) { tccc.add(c); } 355fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 356fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius diff = expectedTccc; 357fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius diff.removeAll(tccc); 358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius diff.remove(0x10000, 0x10ffff); // hasTccc() only works for the BMP 359fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius assertEquals("CollationFCD::hasTccc() expected-actual", empty, diffString); 360fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius diff = tccc; 361fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius diff.removeAll(expectedTccc); 362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius diff.toPattern(diffString, TRUE); 363fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius assertEquals("CollationFCD::hasTccc() actual-expected", empty, diffString); 364fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 365fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 367fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass CodePointIterator { 368fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic: 369fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CodePointIterator(const UChar32 *cp, int32_t length) : cp(cp), length(length), pos(0) {} 370fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void resetToStart() { pos = 0; } 371fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 next() { return (pos < length) ? cp[pos++] : U_SENTINEL; } 372fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 previous() { return (pos > 0) ? cp[--pos] : U_SENTINEL; } 373fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t getLength() const { return length; } 374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int getIndex() const { return (int)pos; } 375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate: 376fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar32 *cp; 377fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t length; 378fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t pos; 379fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 380fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 381fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::checkFCD(const char *name, 382fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationIterator &ci, CodePointIterator &cpi) { 383fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius IcuTestErrorCode errorCode(*this, "checkFCD"); 384fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 385fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Iterate forward to the limit. 386fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 387fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c1 = ci.nextCodePoint(errorCode); 388fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c2 = cpi.next(); 389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(c1 != c2) { 390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("%s.nextCodePoint(to limit, 1st pass) = U+%04lx != U+%04lx at %d", 391fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius name, (long)c1, (long)c2, cpi.getIndex()); 392fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 393fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 394fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(c1 < 0) { break; } 395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 396fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Iterate backward most of the way. 398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t n = (cpi.getLength() * 2) / 3; n > 0; --n) { 399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c1 = ci.previousCodePoint(errorCode); 400fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c2 = cpi.previous(); 401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(c1 != c2) { 402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("%s.previousCodePoint() = U+%04lx != U+%04lx at %d", 403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius name, (long)c1, (long)c2, cpi.getIndex()); 404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 406fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 407fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 408fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Forward again. 409fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 410fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c1 = ci.nextCodePoint(errorCode); 411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c2 = cpi.next(); 412fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(c1 != c2) { 413fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("%s.nextCodePoint(to limit again) = U+%04lx != U+%04lx at %d", 414fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius name, (long)c1, (long)c2, cpi.getIndex()); 415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 416fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 417fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(c1 < 0) { break; } 418fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 419fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Iterate backward to the start. 421fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 422fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c1 = ci.previousCodePoint(errorCode); 423fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c2 = cpi.previous(); 424fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(c1 != c2) { 425fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("%s.previousCodePoint(to start) = U+%04lx != U+%04lx at %d", 426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius name, (long)c1, (long)c2, cpi.getIndex()); 427fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 429fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(c1 < 0) { break; } 430fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 431fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 432fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 433fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestFCD() { 434fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius IcuTestErrorCode errorCode(*this, "TestFCD"); 435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CollationData *data = CollationRoot::getData(errorCode); 436fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.logDataIfFailureAndReset("CollationRoot::getData()")) { 437fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 438fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 439fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 440fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Input string, not FCD, NUL-terminated. 441fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const UChar s[] = { 442fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0x308, 0xe1, 0x62, 0x301, 0x327, 0x430, 0x62, 443fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U16_LEAD(0x1D15F), U16_TRAIL(0x1D15F), // MUSICAL SYMBOL QUARTER NOTE=1D158 1D165, ccc=0, 216 444fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0x327, 0x308, // ccc=202, 230 445fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D), // MUSICAL SYMBOL COMBINING AUGMENTATION DOT, ccc=226 446fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U16_LEAD(0x1D15F), U16_TRAIL(0x1D15F), 447fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D), 448fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0xac01, 449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0xe7, // Character with tccc!=0 decomposed together with mis-ordered sequence. 450fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U16_LEAD(0x1D16D), U16_TRAIL(0x1D16D), U16_LEAD(0x1D165), U16_TRAIL(0x1D165), 451fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0xe1, // Character with tccc!=0 decomposed together with decomposed sequence. 452fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0xf73, 0xf75, // Tibetan composite vowels must be decomposed. 453fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0x4e00, 0xf81, 454fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0 455fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius }; 456fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Expected code points. 457fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const UChar32 cp[] = { 458fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0x308, 0xe1, 0x62, 0x327, 0x301, 0x430, 0x62, 459fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0x1D158, 0x327, 0x1D165, 0x1D16D, 0x308, 460fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0x1D15F, 0x1D16D, 461fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0xac01, 462fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0x63, 0x327, 0x1D165, 0x1D16D, 463fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0x61, 464fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0xf71, 0xf71, 0xf72, 0xf74, 0x301, 465fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0x4e00, 0xf71, 0xf80 466fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius }; 467fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 468fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius FCDUTF16CollationIterator u16ci(data, FALSE, s, s, NULL); 469fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.logIfFailureAndReset("FCDUTF16CollationIterator constructor")) { 470fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 471fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 472fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CodePointIterator cpi(cp, LENGTHOF(cp)); 473fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkFCD("FCDUTF16CollationIterator", u16ci, cpi); 474fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 475fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if U_HAVE_STD_STRING 476fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius cpi.resetToStart(); 477fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius std::string utf8; 478fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString(s).toUTF8String(utf8); 479fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius FCDUTF8CollationIterator u8ci(data, FALSE, 480fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius reinterpret_cast<const uint8_t *>(utf8.c_str()), 0, -1); 481fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.logIfFailureAndReset("FCDUTF8CollationIterator constructor")) { 482fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 483fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 484fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkFCD("FCDUTF8CollationIterator", u8ci, cpi); 485fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif 486fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 487fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius cpi.resetToStart(); 488fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCharIterator iter; 489fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uiter_setString(&iter, s, LENGTHOF(s) - 1); // -1: without the terminating NUL 490fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius FCDUIterCollationIterator uici(data, FALSE, iter, 0); 491fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.logIfFailureAndReset("FCDUIterCollationIterator constructor")) { 492fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 493fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 494fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkFCD("FCDUIterCollationIterator", uici, cpi); 495fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 496fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 497fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::checkAllocWeights(CollationWeights &cw, 498fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t lowerLimit, uint32_t upperLimit, int32_t n, 499fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t someLength, int32_t minCount) { 500fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!cw.allocWeights(lowerLimit, upperLimit, n)) { 501fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("CollationWeights::allocWeights(%lx, %lx, %ld) = FALSE", 502fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (long)lowerLimit, (long)upperLimit, (long)n); 503fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 504fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 505fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t previous = lowerLimit; 506fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t count = 0; // number of weights that have someLength 507fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t i = 0; i < n; ++i) { 508fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t w = cw.nextWeight(); 509fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(w == 0xffffffff) { 510fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() " 511fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "returns only %ld weights", 512fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (long)lowerLimit, (long)upperLimit, (long)n, (long)i); 513fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 514fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 515fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!(previous < w && w < upperLimit)) { 516fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() " 517fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "number %ld -> %lx not between %lx and %lx", 518fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (long)lowerLimit, (long)upperLimit, (long)n, 519fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (long)(i + 1), (long)w, (long)previous, (long)upperLimit); 520fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 521fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 522fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(CollationWeights::lengthOfWeight(w) == someLength) { ++count; } 523fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 524fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(count < minCount) { 525fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("CollationWeights::allocWeights(%lx, %lx, %ld).nextWeight() " 526fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "returns only %ld < %ld weights of length %d", 527fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (long)lowerLimit, (long)upperLimit, (long)n, 528fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (long)count, (long)minCount, (int)someLength); 529fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 530fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 531fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 532fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestCollationWeights() { 533fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationWeights cw; 534fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 535fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Non-compressible primaries use 254 second bytes 02..FF. 536fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius logln("CollationWeights.initForPrimary(non-compressible)"); 537fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius cw.initForPrimary(FALSE); 538fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Expect 1 weight 11 and 254 weights 12xx. 539fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkAllocWeights(cw, 0x10000000, 0x13000000, 255, 1, 1); 540fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkAllocWeights(cw, 0x10000000, 0x13000000, 255, 2, 254); 541fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Expect 255 two-byte weights from the ranges 10ff, 11xx, 1202. 542fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkAllocWeights(cw, 0x10fefe40, 0x12030300, 260, 2, 255); 543fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Expect 254 two-byte weights from the ranges 10ff and 11xx. 544fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkAllocWeights(cw, 0x10fefe40, 0x12030300, 600, 2, 254); 545fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Expect 254^2=64516 three-byte weights. 546fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // During computation, there should be 3 three-byte ranges 547fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // 10ffff, 11xxxx, 120202. 548fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // The middle one should be split 64515:1, 549fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // and the newly-split-off range and the last ranged lengthened. 550fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkAllocWeights(cw, 0x10fffe00, 0x12020300, 1 + 64516 + 254 + 1, 3, 64516); 551fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Expect weights 1102 & 1103. 552fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkAllocWeights(cw, 0x10ff0000, 0x11040000, 2, 2, 2); 553fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Expect weights 102102 & 102103. 554fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkAllocWeights(cw, 0x1020ff00, 0x10210400, 2, 3, 2); 555fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 556fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Compressible primaries use 251 second bytes 04..FE. 557fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius logln("CollationWeights.initForPrimary(compressible)"); 558fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius cw.initForPrimary(TRUE); 559fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Expect 1 weight 11 and 251 weights 12xx. 560fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkAllocWeights(cw, 0x10000000, 0x13000000, 252, 1, 1); 561fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkAllocWeights(cw, 0x10000000, 0x13000000, 252, 2, 251); 562fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Expect 252 two-byte weights from the ranges 10fe, 11xx, 1204. 563fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkAllocWeights(cw, 0x10fdfe40, 0x12050300, 260, 2, 252); 564fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Expect weights 1104 & 1105. 565fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkAllocWeights(cw, 0x10fe0000, 0x11060000, 2, 2, 2); 566fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Expect weights 102102 & 102103. 567fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkAllocWeights(cw, 0x1020ff00, 0x10210400, 2, 3, 2); 568fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 569fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Secondary and tertiary weights use only bytes 3 & 4. 570fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius logln("CollationWeights.initForSecondary()"); 571fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius cw.initForSecondary(); 572fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Expect weights fbxx and all four fc..ff. 573fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkAllocWeights(cw, 0xfb20, 0x10000, 20, 3, 4); 574fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 575fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius logln("CollationWeights.initForTertiary()"); 576fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius cw.initForTertiary(); 577fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Expect weights 3dxx and both 3e & 3f. 578fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkAllocWeights(cw, 0x3d02, 0x4000, 10, 3, 2); 579fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 580fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 581fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace { 582fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 583fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool isValidCE(const CollationRootElements &re, const CollationData &data, 584fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t p, uint32_t s, uint32_t ctq) { 585fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t p1 = p >> 24; 586fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t p2 = (p >> 16) & 0xff; 587fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t p3 = (p >> 8) & 0xff; 588fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t p4 = p & 0xff; 589fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t s1 = s >> 8; 590fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t s2 = s & 0xff; 591fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // ctq = Case, Tertiary, Quaternary 592fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t c = (ctq & Collation::CASE_MASK) >> 14; 593fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t t = ctq & Collation::ONLY_TERTIARY_MASK; 594fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t t1 = t >> 8; 595fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t t2 = t & 0xff; 596fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t q = ctq & Collation::QUATERNARY_MASK; 597fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // No leading zero bytes. 598fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((p != 0 && p1 == 0) || (s != 0 && s1 == 0) || (t != 0 && t1 == 0)) { 599fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 600fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 601fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // No intermediate zero bytes. 602fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(p1 != 0 && p2 == 0 && (p & 0xffff) != 0) { 603fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 604fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 605fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(p2 != 0 && p3 == 0 && p4 != 0) { 606fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 607fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 608fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Minimum & maximum lead bytes. 609fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((p1 != 0 && p1 <= Collation::MERGE_SEPARATOR_BYTE) || 610fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (s1 != 0 && s1 <= Collation::MERGE_SEPARATOR_BYTE) || 611fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (t1 != 0 && t1 <= Collation::MERGE_SEPARATOR_BYTE)) { 612fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 613fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 614fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(t1 != 0 && t1 > 0x3f) { 615fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 616fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 617fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(c > 2) { 618fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 619fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 620fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // The valid byte range for the second primary byte depends on compressibility. 621fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(p2 != 0) { 622fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(data.isCompressibleLeadByte(p1)) { 623fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(p2 <= Collation::PRIMARY_COMPRESSION_LOW_BYTE || 624fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Collation::PRIMARY_COMPRESSION_HIGH_BYTE <= p2) { 625fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 626fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 627fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 628fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(p2 <= Collation::LEVEL_SEPARATOR_BYTE) { 629fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 630fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 631fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 632fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 633fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Other bytes just need to avoid the level separator. 634fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Trailing zeros are ok. 635fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(Collation::LEVEL_SEPARATOR_BYTE == 1); 636fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(p3 == Collation::LEVEL_SEPARATOR_BYTE || p4 == Collation::LEVEL_SEPARATOR_BYTE || 637fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius s2 == Collation::LEVEL_SEPARATOR_BYTE || t2 == Collation::LEVEL_SEPARATOR_BYTE) { 638fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 639fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 640fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Well-formed CEs. 641fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(p == 0) { 642fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(s == 0) { 643fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(t == 0) { 644fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Completely ignorable CE. 645fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Quaternary CEs are not supported. 646fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(c != 0 || q != 0) { 647fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 648fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 649fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 650fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Tertiary CE. 651fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(t < re.getTertiaryBoundary() || c != 2) { 652fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 653fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 654fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 655fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 656fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Secondary CE. 657fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(s < re.getSecondaryBoundary() || t == 0 || t >= re.getTertiaryBoundary()) { 658fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 659fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 660fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 661fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 662fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Primary CE. 663fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(s == 0 || (Collation::COMMON_WEIGHT16 < s && s <= re.getLastCommonSecondary()) || 664fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius s >= re.getSecondaryBoundary()) { 665fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 666fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 667fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(t == 0 || t >= re.getTertiaryBoundary()) { 668fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 669fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 670fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 671fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return TRUE; 672fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 673fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 674fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool isValidCE(const CollationRootElements &re, const CollationData &data, int64_t ce) { 675fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t p = (uint32_t)(ce >> 32); 676fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t secTer = (uint32_t)ce; 677fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return isValidCE(re, data, p, secTer >> 16, secTer & 0xffff); 678fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 679fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 680fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass RootElementsIterator { 681fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic: 682fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius RootElementsIterator(const CollationData &root) 683fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius : data(root), 684fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius elements(root.rootElements), length(root.rootElementsLength), 685fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius pri(0), secTer(0), 686fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius index((int32_t)elements[CollationRootElements::IX_FIRST_TERTIARY_INDEX]) {} 687fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 688fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool next() { 689fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(index >= length) { return FALSE; } 690fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t p = elements[index]; 691fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(p == CollationRootElements::PRIMARY_SENTINEL) { return FALSE; } 692fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((p & CollationRootElements::SEC_TER_DELTA_FLAG) != 0) { 693fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++index; 694fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius secTer = p & ~CollationRootElements::SEC_TER_DELTA_FLAG; 695fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return TRUE; 696fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 697fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((p & CollationRootElements::PRIMARY_STEP_MASK) != 0) { 698fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // End of a range, enumerate the primaries in the range. 699fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t step = (int32_t)p & CollationRootElements::PRIMARY_STEP_MASK; 700fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius p &= 0xffffff00; 701fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(pri == p) { 702fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Finished the range, return the next CE after it. 703fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++index; 704fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return next(); 705fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 706fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(pri < p); 707fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Return the next primary in this range. 708fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool isCompressible = data.isCompressiblePrimary(pri); 709fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((pri & 0xffff) == 0) { 710fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius pri = Collation::incTwoBytePrimaryByOffset(pri, isCompressible, step); 711fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 712fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius pri = Collation::incThreeBytePrimaryByOffset(pri, isCompressible, step); 713fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 714fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return TRUE; 715fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 716fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Simple primary CE. 717fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++index; 718fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius pri = p; 719fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius secTer = Collation::COMMON_SEC_AND_TER_CE; 720fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return TRUE; 721fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 722fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 723fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t getPrimary() const { return pri; } 724fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t getSecTer() const { return secTer; } 725fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 726fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate: 727fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CollationData &data; 728fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const uint32_t *elements; 729fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t length; 730fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 731fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t pri; 732fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t secTer; 733fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t index; 734fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 735fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 736fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} // namespace 737fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 738fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestRootElements() { 739fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius IcuTestErrorCode errorCode(*this, "TestRootElements"); 740fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CollationData *root = CollationRoot::getData(errorCode); 741fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.logDataIfFailureAndReset("CollationRoot::getData()")) { 742fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 743fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 744fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationRootElements rootElements(root->rootElements, root->rootElementsLength); 745fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius RootElementsIterator iter(*root); 746fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 747fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // We check each root CE for validity, 748fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // and we also verify that there is a tailoring gap between each two CEs. 749fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationWeights cw1c; // compressible primary weights 750fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationWeights cw1u; // uncompressible primary weights 751fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationWeights cw2; 752fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationWeights cw3; 753fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 754fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius cw1c.initForPrimary(TRUE); 755fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius cw1u.initForPrimary(FALSE); 756fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius cw2.initForSecondary(); 757fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius cw3.initForTertiary(); 758fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 759fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Note: The root elements do not include Han-implicit or unassigned-implicit CEs, 760fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // nor the special merge-separator CE for U+FFFE. 761fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t prevPri = 0; 762fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t prevSec = 0; 763fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t prevTer = 0; 764fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(iter.next()) { 765fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t pri = iter.getPrimary(); 766fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t secTer = iter.getSecTer(); 767fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // CollationRootElements CEs must have 0 case and quaternary bits. 768fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((secTer & Collation::CASE_AND_QUATERNARY_MASK) != 0) { 769fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("CollationRootElements CE has non-zero case and/or quaternary bits: %08lx %08lx", 770fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (long)pri, (long)secTer); 771fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 772fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t sec = secTer >> 16; 773fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t ter = secTer & Collation::ONLY_TERTIARY_MASK; 774fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t ctq = ter; 775fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(pri == 0 && sec == 0 && ter != 0) { 776fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Tertiary CEs must have uppercase bits, 777fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // but they are not stored in the CollationRootElements. 778fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ctq |= 0x8000; 779fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 780fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!isValidCE(rootElements, *root, pri, sec, ctq)) { 781fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("invalid root CE %08lx %08lx", (long)pri, (long)secTer); 782fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 783fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(pri != prevPri) { 784fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t newWeight = 0; 785fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(prevPri == 0 || prevPri >= Collation::FFFD_PRIMARY) { 786fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // There is currently no tailoring gap after primary ignorables, 787fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // and we forbid tailoring after U+FFFD and U+FFFF. 788fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(root->isCompressiblePrimary(prevPri)) { 789fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!cw1c.allocWeights(prevPri, pri, 1)) { 790fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("no primary/compressible tailoring gap between %08lx and %08lx", 791fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (long)prevPri, (long)pri); 792fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 793fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius newWeight = cw1c.nextWeight(); 794fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 795fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 796fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!cw1u.allocWeights(prevPri, pri, 1)) { 797fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("no primary/uncompressible tailoring gap between %08lx and %08lx", 798fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (long)prevPri, (long)pri); 799fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 800fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius newWeight = cw1u.nextWeight(); 801fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 802fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 803fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(newWeight != 0 && !(prevPri < newWeight && newWeight < pri)) { 804fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("mis-allocated primary weight, should get %08lx < %08lx < %08lx", 805fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (long)prevPri, (long)newWeight, (long)pri); 806fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 807fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(sec != prevSec) { 808fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t lowerLimit = 809fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius prevSec == 0 ? rootElements.getSecondaryBoundary() - 0x100 : prevSec; 810fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!cw2.allocWeights(lowerLimit, sec, 1)) { 811fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("no secondary tailoring gap between %04x and %04x", lowerLimit, sec); 812fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 813fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t newWeight = cw2.nextWeight(); 814fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!(prevSec < newWeight && newWeight < sec)) { 815fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("mis-allocated secondary weight, should get %04x < %04x < %04x", 816fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (long)lowerLimit, (long)newWeight, (long)sec); 817fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 818fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 819fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(ter != prevTer) { 820fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t lowerLimit = 821fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius prevTer == 0 ? rootElements.getTertiaryBoundary() - 0x100 : prevTer; 822fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!cw3.allocWeights(lowerLimit, ter, 1)) { 823fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("no teriary tailoring gap between %04x and %04x", lowerLimit, ter); 824fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 825fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t newWeight = cw3.nextWeight(); 826fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!(prevTer < newWeight && newWeight < ter)) { 827fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("mis-allocated secondary weight, should get %04x < %04x < %04x", 828fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (long)lowerLimit, (long)newWeight, (long)ter); 829fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 830fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 831fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 832fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("duplicate root CE %08lx %08lx", (long)pri, (long)secTer); 833fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 834fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 835fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius prevPri = pri; 836fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius prevSec = sec; 837fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius prevTer = ter; 838fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 839fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 840fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 841fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestTailoredElements() { 842fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius IcuTestErrorCode errorCode(*this, "TestTailoredElements"); 843fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CollationData *root = CollationRoot::getData(errorCode); 844fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.logDataIfFailureAndReset("CollationRoot::getData()")) { 845fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 846fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 847fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationRootElements rootElements(root->rootElements, root->rootElementsLength); 848fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 849fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UHashtable *prevLocales = uhash_open(uhash_hashChars, uhash_compareChars, NULL, errorCode); 850fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.logIfFailureAndReset("failed to create a hash table")) { 851fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 852fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 853fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uhash_setKeyDeleter(prevLocales, uprv_free); 854fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // TestRootElements() tests the root collator which does not have tailorings. 855fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uhash_puti(prevLocales, uprv_strdup(""), 1, errorCode); 856fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uhash_puti(prevLocales, uprv_strdup("root"), 1, errorCode); 857fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uhash_puti(prevLocales, uprv_strdup("root@collation=standard"), 1, errorCode); 858fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 859fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UVector64 ces(errorCode); 860fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius LocalPointer<StringEnumeration> locales(Collator::getAvailableLocales()); 861fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(locales.isValid()); 862fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const char *localeID = "root"; 863fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 864fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Locale locale(localeID); 865fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius LocalPointer<StringEnumeration> types( 866fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Collator::getKeywordValuesForLocale("collation", locale, FALSE, errorCode)); 867fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.assertSuccess(); 868fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const char *type = NULL; // default type 869fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 870fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Locale localeWithType(locale); 871fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(type != NULL) { 872fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius localeWithType.setKeywordValue("collation", type, errorCode); 873fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 874fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.assertSuccess(); 875fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius LocalPointer<Collator> coll(Collator::createInstance(localeWithType, errorCode)); 876fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.logIfFailureAndReset("Collator::createInstance(%s)", 877fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius localeWithType.getName())) { 878fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius continue; 879fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 880fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Locale actual = coll->getLocale(ULOC_ACTUAL_LOCALE, errorCode); 881fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(uhash_geti(prevLocales, actual.getName()) != 0) { 882fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius continue; 883fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 884fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uhash_puti(prevLocales, uprv_strdup(actual.getName()), 1, errorCode); 885fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.assertSuccess(); 886fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius logln("TestTailoredElements(): requested %s -> actual %s", 887fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius localeWithType.getName(), actual.getName()); 888fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(coll.getAlias()); 889fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(rbc == NULL) { 890fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius continue; 891fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 892fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Note: It would be better to get tailored strings such that we can 893fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // identify the prefix, and only get the CEs for the prefix+string, 894fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // not also for the prefix. 895fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // There is currently no API for that. 896fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // It would help in an unusual case where a contraction starting in the prefix 897fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // extends past its end, and we do not see the intended mapping. 898fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // For example, for a mapping p|st, if there is also a contraction ps, 899fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // then we get CEs(ps)+CEs(t), rather than CEs(p|st). 900fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius LocalPointer<UnicodeSet> tailored(coll->getTailoredSet(errorCode)); 901fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.assertSuccess(); 902fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSetIterator iter(*tailored); 903fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(iter.next()) { 904fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UnicodeString &s = iter.getString(); 905fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ces.removeAllElements(); 906fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rbc->internalGetCEs(s, ces, errorCode); 907fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.assertSuccess(); 908fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t i = 0; i < ces.size(); ++i) { 909fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ce = ces.elementAti(i); 910fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!isValidCE(rootElements, *root, ce)) { 911fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("invalid tailored CE %016llx at CE index %d from string:", 912fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (long long)ce, (int)i); 913fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(prettify(s)); 914fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 915fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 916fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 917fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while((type = types->next(NULL, errorCode)) != NULL); 918fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while((localeID = locales->next(NULL, errorCode)) != NULL); 919fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uhash_close(prevLocales); 920fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 921fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 922fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUnicodeString CollationTest::printSortKey(const uint8_t *p, int32_t length) { 923fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString s; 924fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t i = 0; i < length; ++i) { 925fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i > 0) { s.append((UChar)0x20); } 926fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint8_t b = p[i]; 927fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(b == 0) { 928fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius s.append((UChar)0x2e); // period 929fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(b == 1) { 930fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius s.append((UChar)0x7c); // vertical bar 931fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 932fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius appendHex(b, 2, s); 933fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 934fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 935fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return s; 936fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 937fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 938fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUnicodeString CollationTest::printCollationKey(const CollationKey &key) { 939fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t length; 940fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const uint8_t *p = key.getByteArray(length); 941fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return printSortKey(p, length); 942fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 943fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 944fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool CollationTest::readLine(UCHARBUF *f, IcuTestErrorCode &errorCode) { 945fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t lineLength; 946fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *line = ucbuf_readline(f, &lineLength, errorCode); 947fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(line == NULL || errorCode.isFailure()) { 948fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fileLine.remove(); 949fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 950fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 951fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++fileLineNumber; 952fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Strip trailing CR/LF, comments, and spaces. 953fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *comment = u_memchr(line, 0x23, lineLength); // '#' 954fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(comment != NULL) { 955fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius lineLength = (int32_t)(comment - line); 956fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 957fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(lineLength > 0 && isCROrLF(line[lineLength - 1])) { --lineLength; } 958fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 959fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(lineLength > 0 && isSpace(line[lineLength - 1])) { --lineLength; } 960fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fileLine.setTo(FALSE, line, lineLength); 961fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return TRUE; 962fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 963fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 964fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::parseString(int32_t &start, UnicodeString &prefix, UnicodeString &s, 965fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode) { 966fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t length = fileLine.length(); 967fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t i; 968fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(i = start; i < length && !isSpace(fileLine[i]); ++i) {} 969fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t pipeIndex = fileLine.indexOf((UChar)0x7c, start, i - start); // '|' 970fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(pipeIndex >= 0) { 971fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius prefix = fileLine.tempSubStringBetween(start, pipeIndex).unescape(); 972fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(prefix.isEmpty()) { 973fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("empty prefix on line %d", (int)fileLineNumber); 974fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 975fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode = U_PARSE_ERROR; 976fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 977fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 978fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius start = pipeIndex + 1; 979fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 980fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius prefix.remove(); 981fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 982fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius s = fileLine.tempSubStringBetween(start, i).unescape(); 983fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(s.isEmpty()) { 984fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("empty string on line %d", (int)fileLineNumber); 985fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 986fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode = U_PARSE_ERROR; 987fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 988fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 989fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius start = i; 990fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 991fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 992fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollation::Level CollationTest::parseRelationAndString(UnicodeString &s, IcuTestErrorCode &errorCode) { 993fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Collation::Level relation; 994fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t start; 995fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(fileLine[0] == 0x3c) { // < 996fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar second = fileLine[1]; 997fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius start = 2; 998fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius switch(second) { 999fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case 0x31: // <1 1000fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius relation = Collation::PRIMARY_LEVEL; 1001fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 1002fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case 0x32: // <2 1003fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius relation = Collation::SECONDARY_LEVEL; 1004fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 1005fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case 0x33: // <3 1006fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius relation = Collation::TERTIARY_LEVEL; 1007fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 1008fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case 0x34: // <4 1009fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius relation = Collation::QUATERNARY_LEVEL; 1010fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 1011fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case 0x63: // <c 1012fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius relation = Collation::CASE_LEVEL; 1013fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 1014fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius case 0x69: // <i 1015fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius relation = Collation::IDENTICAL_LEVEL; 1016fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 1017fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius default: // just < 1018fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius relation = Collation::NO_LEVEL; 1019fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius start = 1; 1020fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 1021fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1022fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(fileLine[0] == 0x3d) { // = 1023fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius relation = Collation::ZERO_LEVEL; 1024fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius start = 1; 1025fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 1026fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius start = 0; 1027fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1028fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(start == 0 || !isSpace(fileLine[start])) { 1029fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("no relation (= < <1 <2 <c <3 <4 <i) at beginning of line %d", (int)fileLineNumber); 1030fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1031fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.set(U_PARSE_ERROR); 1032fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return Collation::NO_LEVEL; 1033fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1034fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius start = skipSpaces(start); 1035fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString prefix; 1036fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseString(start, prefix, s, errorCode); 1037fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isSuccess() && !prefix.isEmpty()) { 1038fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("prefix string not allowed for test string: on line %d", (int)fileLineNumber); 1039fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1040fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.set(U_PARSE_ERROR); 1041fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return Collation::NO_LEVEL; 1042fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1043fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(start < fileLine.length()) { 1044fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("unexpected line contents after test string on line %d", (int)fileLineNumber); 1045fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1046fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.set(U_PARSE_ERROR); 1047fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return Collation::NO_LEVEL; 1048fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1049fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return relation; 1050fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 1051fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1052fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic const struct { 1053fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const char *name; 1054fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UColAttribute attr; 1055fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} attributes[] = { 1056fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "backwards", UCOL_FRENCH_COLLATION }, 1057fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "alternate", UCOL_ALTERNATE_HANDLING }, 1058fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "caseFirst", UCOL_CASE_FIRST }, 1059fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "caseLevel", UCOL_CASE_LEVEL }, 1060fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // UCOL_NORMALIZATION_MODE is turned on and off automatically. 1061fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "strength", UCOL_STRENGTH }, 1062fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // UCOL_HIRAGANA_QUATERNARY_MODE is deprecated. 1063fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "numeric", UCOL_NUMERIC_COLLATION } 1064fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 1065fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1066fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic const struct { 1067fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const char *name; 1068fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UColAttributeValue value; 1069fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} attributeValues[] = { 1070fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "default", UCOL_DEFAULT }, 1071fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "primary", UCOL_PRIMARY }, 1072fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "secondary", UCOL_SECONDARY }, 1073fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "tertiary", UCOL_TERTIARY }, 1074fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "quaternary", UCOL_QUATERNARY }, 1075fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "identical", UCOL_IDENTICAL }, 1076fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "off", UCOL_OFF }, 1077fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "on", UCOL_ON }, 1078fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "shifted", UCOL_SHIFTED }, 1079fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "non-ignorable", UCOL_NON_IGNORABLE }, 1080fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "lower", UCOL_LOWER_FIRST }, 1081fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { "upper", UCOL_UPPER_FIRST } 1082fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 1083fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1084fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::parseAndSetAttribute(IcuTestErrorCode &errorCode) { 1085fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t start = skipSpaces(1); 1086fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t equalPos = fileLine.indexOf(0x3d); 1087fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(equalPos < 0) { 1088fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(fileLine.compare(start, 7, UNICODE_STRING("reorder", 7)) == 0) { 1089fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseAndSetReorderCodes(start + 7, errorCode); 1090fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1091fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1092fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("missing '=' on line %d", (int)fileLineNumber); 1093fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1094fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.set(U_PARSE_ERROR); 1095fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1096fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1097fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1098fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString attrString = fileLine.tempSubStringBetween(start, equalPos); 1099fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString valueString = fileLine.tempSubString(equalPos+1); 1100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(attrString == UNICODE_STRING("maxVariable", 11)) { 1101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UColReorderCode max; 1102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(valueString == UNICODE_STRING("space", 5)) { 1103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius max = UCOL_REORDER_CODE_SPACE; 1104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(valueString == UNICODE_STRING("punct", 5)) { 1105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius max = UCOL_REORDER_CODE_PUNCTUATION; 1106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(valueString == UNICODE_STRING("symbol", 6)) { 1107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius max = UCOL_REORDER_CODE_SYMBOL; 1108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(valueString == UNICODE_STRING("currency", 8)) { 1109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius max = UCOL_REORDER_CODE_CURRENCY; 1110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 1111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("invalid attribute value name on line %d", (int)fileLineNumber); 1112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.set(U_PARSE_ERROR); 1114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius coll->setMaxVariable(max, errorCode); 1117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isFailure()) { 1118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("setMaxVariable() failed on line %d: %s", 1119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (int)fileLineNumber, errorCode.errorName()); 1120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fileLine.remove(); 1124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UColAttribute attr; 1128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t i = 0;; ++i) { 1129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i == LENGTHOF(attributes)) { 1130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("invalid attribute name on line %d", (int)fileLineNumber); 1131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.set(U_PARSE_ERROR); 1133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(attrString == UnicodeString(attributes[i].name, -1, US_INV)) { 1136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius attr = attributes[i].attr; 1137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 1138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UColAttributeValue value; 1142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t i = 0;; ++i) { 1143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i == LENGTHOF(attributeValues)) { 1144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("invalid attribute value name on line %d", (int)fileLineNumber); 1145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.set(U_PARSE_ERROR); 1147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(valueString == UnicodeString(attributeValues[i].name, -1, US_INV)) { 1150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius value = attributeValues[i].value; 1151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 1152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius coll->setAttribute(attr, value, errorCode); 1156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isFailure()) { 1157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("illegal attribute=value combination on line %d: %s", 1158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (int)fileLineNumber, errorCode.errorName()); 1159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fileLine.remove(); 1163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 1164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::parseAndSetReorderCodes(int32_t start, IcuTestErrorCode &errorCode) { 1166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UVector32 reorderCodes(errorCode); 1167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(start < fileLine.length()) { 1168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius start = skipSpaces(start); 1169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t limit = start; 1170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(limit < fileLine.length() && !isSpace(fileLine[limit])) { ++limit; } 1171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CharString name; 1172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius name.appendInvariantChars(fileLine.tempSubStringBetween(start, limit), errorCode); 1173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t code = CollationRuleParser::getReorderCode(name.data()); 1174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(code < -1) { 1175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("invalid reorder code '%s' on line %d", name.data(), (int)fileLineNumber); 1176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.set(U_PARSE_ERROR); 1178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius reorderCodes.addElement(code, errorCode); 1181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius start = limit; 1182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius coll->setReorderCodes(reorderCodes.getBuffer(), reorderCodes.size(), errorCode); 1184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isFailure()) { 1185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("setReorderCodes() failed on line %d: %s", (int)fileLineNumber, errorCode.errorName()); 1186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fileLine.remove(); 1190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 1191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::buildTailoring(UCHARBUF *f, IcuTestErrorCode &errorCode) { 1193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString rules; 1194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(readLine(f, errorCode)) { 1195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(fileLine.isEmpty()) { continue; } 1196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(isSectionStarter(fileLine[0])) { break; } 1197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rules.append(fileLine.unescape()); 1198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isFailure()) { return; } 1200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius logln(rules); 1201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UParseError parseError; 1203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString reason; 1204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius delete coll; 1205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius coll = new RuleBasedCollator(rules, parseError, reason, errorCode); 1206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(coll == NULL) { 1207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("unable to allocate a new collator"); 1208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.set(U_MEMORY_ALLOCATION_ERROR); 1209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isFailure()) { 1212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("RuleBasedCollator(rules) failed - %s", errorCode.errorName()); 1213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(UnicodeString(" reason: ") + reason); 1214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(parseError.offset >= 0) { infoln(" rules offset: %d", (int)parseError.offset); } 1215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) { 1216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(UnicodeString(" snippet: ...") + 1217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseError.preContext + "(!)" + parseError.postContext + "..."); 1218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 1220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius assertEquals("no error reason when RuleBasedCollator(rules) succeeds", 1221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString(), reason); 1222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 1224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::setRootCollator(IcuTestErrorCode &errorCode) { 1226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isFailure()) { return; } 1227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius delete coll; 1228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius coll = Collator::createInstance(Locale::getRoot(), errorCode); 1229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isFailure()) { 1230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dataerrln("unable to create a root collator"); 1231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 1234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::setLocaleCollator(IcuTestErrorCode &errorCode) { 1236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isFailure()) { return; } 1237dbc22bd174be483711cea006f3189d8289835830ccornelius int32_t at = fileLine.indexOf((UChar)0x40, 9); // @ is not invariant 1238dbc22bd174be483711cea006f3189d8289835830ccornelius if(at >= 0) { 1239dbc22bd174be483711cea006f3189d8289835830ccornelius fileLine.setCharAt(at, (UChar)0x2a); // * 1240dbc22bd174be483711cea006f3189d8289835830ccornelius } 1241dbc22bd174be483711cea006f3189d8289835830ccornelius CharString localeID; 1242dbc22bd174be483711cea006f3189d8289835830ccornelius localeID.appendInvariantChars(fileLine.tempSubString(9), errorCode); 1243dbc22bd174be483711cea006f3189d8289835830ccornelius if(at >= 0) { 1244dbc22bd174be483711cea006f3189d8289835830ccornelius localeID.data()[at - 9] = '@'; 1245dbc22bd174be483711cea006f3189d8289835830ccornelius } 1246dbc22bd174be483711cea006f3189d8289835830ccornelius Locale locale(localeID.data()); 1247dbc22bd174be483711cea006f3189d8289835830ccornelius if(fileLine.length() == 9 || errorCode.isFailure() || locale.isBogus()) { 1248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("invalid language tag on line %d", (int)fileLineNumber); 1249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isSuccess()) { errorCode.set(U_PARSE_ERROR); } 1251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius logln("creating a collator for locale ID %s", locale.getName()); 1255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Collator *newColl = Collator::createInstance(locale, errorCode); 1256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isFailure()) { 1257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dataerrln("unable to create a collator for locale %s on line %d", 1258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius locale.getName(), (int)fileLineNumber); 1259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius delete coll; 1263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius coll = newColl; 1264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 1265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool CollationTest::needsNormalization(const UnicodeString &s, UErrorCode &errorCode) const { 1267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode) || !fcd->isNormalized(s, errorCode)) { return TRUE; } 1268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // In some sequences with Tibetan composite vowel signs, 1269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // even if the string passes the FCD check, 1270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // those composites must be decomposed. 1271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Check if s contains 0F71 immediately followed by 0F73 or 0F75 or 0F81. 1272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t index = 0; 1273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while((index = s.indexOf((UChar)0xf71, index)) >= 0) { 1274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(++index < s.length()) { 1275fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar c = s[index]; 1276fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(c == 0xf73 || c == 0xf75 || c == 0xf81) { return TRUE; } 1277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 1281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool CollationTest::getSortKeyParts(const UChar *s, int32_t length, 1283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CharString &dest, int32_t partSize, 1284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius IcuTestErrorCode &errorCode) { 1285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isFailure()) { return FALSE; } 1286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint8_t part[32]; 1287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(partSize <= LENGTHOF(part)); 1288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCharIterator iter; 1289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uiter_setString(&iter, s, length); 1290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t state[2] = { 0, 0 }; 1291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(;;) { 1292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t partLength = coll->internalNextSortKeyPart(&iter, state, part, partSize, errorCode); 1293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool done = partLength < partSize; 1294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(done) { 1295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // At the end, append the next byte as well which should be 00. 1296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++partLength; 1297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dest.append(reinterpret_cast<char *>(part), partLength, errorCode); 1299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(done) { 1300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return errorCode.isSuccess(); 1301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 1304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1305fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool CollationTest::getCollationKey(const char *norm, const UnicodeString &line, 1306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *s, int32_t length, 1307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationKey &key, IcuTestErrorCode &errorCode) { 1308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isFailure()) { return FALSE; } 1309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius coll->getCollationKey(s, length, key, errorCode); 1310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isFailure()) { 1311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("Collator(%s).getCollationKey() failed: %s", 1313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius norm, errorCode.errorName()); 1314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(line); 1315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t keyLength; 1318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const uint8_t *keyBytes = key.getByteArray(keyLength); 1319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(keyLength == 0 || keyBytes[keyLength - 1] != 0) { 1320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1321fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("Collator(%s).getCollationKey() wrote an empty or unterminated key", 1322fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius norm); 1323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(line); 1324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(key)); 1325fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1326fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1327fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t numLevels = coll->getAttribute(UCOL_STRENGTH, errorCode); 1329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(numLevels < UCOL_IDENTICAL) { 1330fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++numLevels; 1331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 1332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius numLevels = 5; 1333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1334fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(coll->getAttribute(UCOL_CASE_LEVEL, errorCode) == UCOL_ON) { 1335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++numLevels; 1336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1337fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.assertSuccess(); 1338fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t numLevelSeparators = 0; 1339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t i = 0; i < (keyLength - 1); ++i) { 1340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint8_t b = keyBytes[i]; 1341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(b == 0) { 1342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("Collator(%s).getCollationKey() contains a 00 byte", norm); 1344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(line); 1345fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(key)); 1346fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(b == 1) { ++numLevelSeparators; } 1349fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(numLevelSeparators != (numLevels - 1)) { 1351fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1352fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("Collator(%s).getCollationKey() has %d level separators for %d levels", 1353fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius norm, (int)numLevelSeparators, (int)numLevels); 1354fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(line); 1355fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(key)); 1356fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1357fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1359fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // If s contains U+FFFE, check that merged segments make the same key. 1360fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius LocalMemory<uint8_t> mergedKey; 1361fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t mergedKeyLength = 0; 1362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t mergedKeyCapacity = 0; 1363fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t sLength = (length >= 0) ? length : u_strlen(s); 1364fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t segmentStart = 0; 1365fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t i = 0;;) { 1366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i == sLength) { 1367fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(segmentStart == 0) { 1368fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // s does not contain any U+FFFE. 1369fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 1370fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1371fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(s[i] != 0xfffe) { 1372fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++i; 1373fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius continue; 1374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Get the sort key for another segment and merge it into mergedKey. 1376fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationKey key1(mergedKey.getAlias(), mergedKeyLength); // copies the bytes 1377fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationKey key2; 1378fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius coll->getCollationKey(s + segmentStart, i - segmentStart, key2, errorCode); 1379fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t key1Length, key2Length; 1380fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const uint8_t *key1Bytes = key1.getByteArray(key1Length); 1381fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const uint8_t *key2Bytes = key2.getByteArray(key2Length); 1382fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint8_t *dest; 1383fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t minCapacity = key1Length + key2Length; 1384fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(key1Length > 0) { --minCapacity; } 1385fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(minCapacity <= mergedKeyCapacity) { 1386fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dest = mergedKey.getAlias(); 1387fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 1388fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(minCapacity <= 200) { 1389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius mergedKeyCapacity = 200; 1390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(minCapacity <= 2 * mergedKeyCapacity) { 1391fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius mergedKeyCapacity *= 2; 1392fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 1393fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius mergedKeyCapacity = minCapacity; 1394fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dest = mergedKey.allocateInsteadAndReset(mergedKeyCapacity); 1396fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(dest != NULL || mergedKeyCapacity == 0); 1398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(key1Length == 0) { 1399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // key2 is the sort key for the first segment. 1400fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uprv_memcpy(dest, key2Bytes, key2Length); 1401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius mergedKeyLength = key2Length; 1402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 1403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius mergedKeyLength = 1404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ucol_mergeSortkeys(key1Bytes, key1Length, key2Bytes, key2Length, 1405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dest, mergedKeyCapacity); 1406fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1407fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(i == sLength) { break; } 1408fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius segmentStart = ++i; 1409fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1410fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(segmentStart != 0 && 1411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (mergedKeyLength != keyLength || 1412fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uprv_memcmp(mergedKey.getAlias(), keyBytes, keyLength) != 0)) { 1413fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1414fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("Collator(%s).getCollationKey(with U+FFFE) != " 1415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "ucol_mergeSortkeys(segments)", 1416fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius norm); 1417fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(line); 1418fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(key)); 1419fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printSortKey(mergedKey.getAlias(), mergedKeyLength)); 1420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1421fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1422fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1423fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Check that internalNextSortKeyPart() makes the same key, with several part sizes. 1424fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const int32_t partSizes[] = { 32, 3, 1 }; 1425fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t psi = 0; psi < LENGTHOF(partSizes); ++psi) { 1426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t partSize = partSizes[psi]; 1427fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CharString parts; 1428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!getSortKeyParts(s, length, parts, 32, errorCode)) { 1429fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1430fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("Collator(%s).internalNextSortKeyPart(%d) failed: %s", 1431fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius norm, (int)partSize, errorCode.errorName()); 1432fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(line); 1433fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1434fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(keyLength != parts.length() || uprv_memcmp(keyBytes, parts.data(), keyLength) != 0) { 1436fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1437fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("Collator(%s).getCollationKey() != internalNextSortKeyPart(%d)", 1438fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius norm, (int)partSize); 1439fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(line); 1440fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(key)); 1441fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printSortKey(reinterpret_cast<uint8_t *>(parts.data()), parts.length())); 1442fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1443fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1444fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1445fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return TRUE; 1446fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 1447fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1448fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace { 1449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1450fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/** 1451fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Replaces unpaired surrogates with U+FFFD. 1452fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Returns s if no replacement was made, otherwise buffer. 1453fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 1454fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusconst UnicodeString &surrogatesToFFFD(const UnicodeString &s, UnicodeString &buffer) { 1455fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t i = 0; 1456fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(i < s.length()) { 1457fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c = s.char32At(i); 1458fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_IS_SURROGATE(c)) { 1459fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(buffer.length() < i) { 1460fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius buffer.append(s, buffer.length(), i - buffer.length()); 1461fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1462fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius buffer.append((UChar)0xfffd); 1463fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1464fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius i += U16_LENGTH(c); 1465fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1466fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(buffer.isEmpty()) { 1467fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return s; 1468fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1469fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(buffer.length() < i) { 1470fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius buffer.append(s, buffer.length(), i - buffer.length()); 1471fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1472fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return buffer; 1473fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 1474fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1475fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 1476fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1477fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool CollationTest::checkCompareTwo(const char *norm, const UnicodeString &prevFileLine, 1478fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UnicodeString &prevString, const UnicodeString &s, 1479fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCollationResult expectedOrder, Collation::Level expectedLevel, 1480fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius IcuTestErrorCode &errorCode) { 1481fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isFailure()) { return FALSE; } 1482fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1483fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Get the sort keys first, for error debug output. 1484fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationKey prevKey; 1485fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!getCollationKey(norm, prevFileLine, prevString.getBuffer(), prevString.length(), 1486fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius prevKey, errorCode)) { 1487fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1488fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1489fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationKey key; 1490fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!getCollationKey(norm, fileLine, s.getBuffer(), s.length(), key, errorCode)) { return FALSE; } 1491fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1492fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCollationResult order = coll->compare(prevString, s, errorCode); 1493fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(order != expectedOrder || errorCode.isFailure()) { 1494fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1495fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("line %d Collator(%s).compare(previous, current) wrong order: %d != %d (%s)", 1496fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (int)fileLineNumber, norm, order, expectedOrder, errorCode.errorName()); 1497fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(prevFileLine); 1498fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1499fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(prevKey)); 1500fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(key)); 1501fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1502fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1503fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius order = coll->compare(s, prevString, errorCode); 1504fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(order != -expectedOrder || errorCode.isFailure()) { 1505fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1506fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("line %d Collator(%s).compare(current, previous) wrong order: %d != %d (%s)", 1507fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (int)fileLineNumber, norm, order, -expectedOrder, errorCode.errorName()); 1508fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(prevFileLine); 1509fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1510fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(prevKey)); 1511fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(key)); 1512fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1513fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1514fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Test NUL-termination if the strings do not contain NUL characters. 1515fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool containNUL = prevString.indexOf((UChar)0) >= 0 || s.indexOf((UChar)0) >= 0; 1516fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!containNUL) { 1517fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius order = coll->compare(prevString.getBuffer(), -1, s.getBuffer(), -1, errorCode); 1518fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(order != expectedOrder || errorCode.isFailure()) { 1519fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1520fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("line %d Collator(%s).compare(previous-NUL, current-NUL) wrong order: %d != %d (%s)", 1521fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (int)fileLineNumber, norm, order, expectedOrder, errorCode.errorName()); 1522fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(prevFileLine); 1523fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1524fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(prevKey)); 1525fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(key)); 1526fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1527fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1528fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius order = coll->compare(s.getBuffer(), -1, prevString.getBuffer(), -1, errorCode); 1529fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(order != -expectedOrder || errorCode.isFailure()) { 1530fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1531fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("line %d Collator(%s).compare(current-NUL, previous-NUL) wrong order: %d != %d (%s)", 1532fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (int)fileLineNumber, norm, order, -expectedOrder, errorCode.errorName()); 1533fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(prevFileLine); 1534fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1535fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(prevKey)); 1536fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(key)); 1537fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1538fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1539fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1540fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1541fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if U_HAVE_STD_STRING 1542fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // compare(UTF-16) treats unpaired surrogates like unassigned code points. 1543fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Unpaired surrogates cannot be converted to UTF-8. 1544fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Create valid UTF-16 strings if necessary, and use those for 1545fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // both the expected compare() result and for the input to compare(UTF-8). 1546fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString prevBuffer, sBuffer; 1547fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UnicodeString &prevValid = surrogatesToFFFD(prevString, prevBuffer); 1548fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UnicodeString &sValid = surrogatesToFFFD(s, sBuffer); 1549fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius std::string prevUTF8, sUTF8; 1550fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString(prevValid).toUTF8String(prevUTF8); 1551fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString(sValid).toUTF8String(sUTF8); 1552fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCollationResult expectedUTF8Order; 1553fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(&prevValid == &prevString && &sValid == &s) { 1554fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius expectedUTF8Order = expectedOrder; 1555fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 1556fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius expectedUTF8Order = coll->compare(prevValid, sValid, errorCode); 1557fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1558fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1559fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius order = coll->compareUTF8(prevUTF8, sUTF8, errorCode); 1560fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(order != expectedUTF8Order || errorCode.isFailure()) { 1561fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1562fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("line %d Collator(%s).compareUTF8(previous, current) wrong order: %d != %d (%s)", 1563fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (int)fileLineNumber, norm, order, expectedUTF8Order, errorCode.errorName()); 1564fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(prevFileLine); 1565fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1566fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(prevKey)); 1567fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(key)); 1568fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1569fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1570fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius order = coll->compareUTF8(sUTF8, prevUTF8, errorCode); 1571fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(order != -expectedUTF8Order || errorCode.isFailure()) { 1572fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1573fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("line %d Collator(%s).compareUTF8(current, previous) wrong order: %d != %d (%s)", 1574fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (int)fileLineNumber, norm, order, -expectedUTF8Order, errorCode.errorName()); 1575fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(prevFileLine); 1576fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1577fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(prevKey)); 1578fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(key)); 1579fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1580fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1581fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Test NUL-termination if the strings do not contain NUL characters. 1582fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!containNUL) { 1583fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius order = coll->internalCompareUTF8(prevUTF8.c_str(), -1, sUTF8.c_str(), -1, errorCode); 1584fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(order != expectedUTF8Order || errorCode.isFailure()) { 1585fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1586fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("line %d Collator(%s).internalCompareUTF8(previous-NUL, current-NUL) wrong order: %d != %d (%s)", 1587fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (int)fileLineNumber, norm, order, expectedUTF8Order, errorCode.errorName()); 1588fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(prevFileLine); 1589fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1590fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(prevKey)); 1591fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(key)); 1592fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1593fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1594fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius order = coll->internalCompareUTF8(sUTF8.c_str(), -1, prevUTF8.c_str(), -1, errorCode); 1595fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(order != -expectedUTF8Order || errorCode.isFailure()) { 1596fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1597fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("line %d Collator(%s).internalCompareUTF8(current-NUL, previous-NUL) wrong order: %d != %d (%s)", 1598fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (int)fileLineNumber, norm, order, -expectedUTF8Order, errorCode.errorName()); 1599fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(prevFileLine); 1600fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1601fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(prevKey)); 1602fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(key)); 1603fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1604fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1605fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1606fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif 1607fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1608fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCharIterator leftIter; 1609fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCharIterator rightIter; 1610fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uiter_setString(&leftIter, prevString.getBuffer(), prevString.length()); 1611fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uiter_setString(&rightIter, s.getBuffer(), s.length()); 1612fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius order = coll->compare(leftIter, rightIter, errorCode); 1613fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(order != expectedOrder || errorCode.isFailure()) { 1614fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1615fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("line %d Collator(%s).compare(UCharIterator: previous, current) " 1616fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius "wrong order: %d != %d (%s)", 1617fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (int)fileLineNumber, norm, order, expectedOrder, errorCode.errorName()); 1618fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(prevFileLine); 1619fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1620fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(prevKey)); 1621fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(key)); 1622fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1623fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1624fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1625fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius order = prevKey.compareTo(key, errorCode); 1626fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(order != expectedOrder || errorCode.isFailure()) { 1627fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1628fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("line %d Collator(%s).getCollationKey(previous, current).compareTo() wrong order: %d != %d (%s)", 1629fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (int)fileLineNumber, norm, order, expectedOrder, errorCode.errorName()); 1630fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(prevFileLine); 1631fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1632fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(prevKey)); 1633fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(key)); 1634fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1635fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1636fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(order != UCOL_EQUAL && expectedLevel != Collation::NO_LEVEL) { 1637fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t prevKeyLength; 1638fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const uint8_t *prevBytes = prevKey.getByteArray(prevKeyLength); 1639fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t keyLength; 1640fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const uint8_t *bytes = key.getByteArray(keyLength); 1641fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t level = Collation::PRIMARY_LEVEL; 1642fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for(int32_t i = 0;; ++i) { 1643fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint8_t b = prevBytes[i]; 1644fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(b != bytes[i]) { break; } 1645fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(b == Collation::LEVEL_SEPARATOR_BYTE) { 1646fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++level; 1647fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(level == Collation::CASE_LEVEL && 1648fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius coll->getAttribute(UCOL_CASE_LEVEL, errorCode) == UCOL_OFF) { 1649fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++level; 1650fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1651fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1652fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1653fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(level != expectedLevel) { 1654fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileTestName); 1655fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("line %d Collator(%s).getCollationKey(previous, current).compareTo()=%d wrong level: %d != %d", 1656fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (int)fileLineNumber, norm, order, level, expectedLevel); 1657fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(prevFileLine); 1658fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1659fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(prevKey)); 1660fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(printCollationKey(key)); 1661fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 1662fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1663fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1664fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return TRUE; 1665fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 1666fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1667fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::checkCompareStrings(UCHARBUF *f, IcuTestErrorCode &errorCode) { 1668fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isFailure()) { return; } 1669fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString prevFileLine = UNICODE_STRING("(none)", 6); 1670fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString prevString, s; 1671fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius prevString.getTerminatedBuffer(); // Ensure NUL-termination. 1672fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(readLine(f, errorCode)) { 1673fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(fileLine.isEmpty()) { continue; } 1674fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(isSectionStarter(fileLine[0])) { break; } 1675fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Collation::Level relation = parseRelationAndString(s, errorCode); 1676fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.isFailure()) { 1677fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.reset(); 1678fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 1679fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1680fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCollationResult expectedOrder = (relation == Collation::ZERO_LEVEL) ? UCOL_EQUAL : UCOL_LESS; 1681fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Collation::Level expectedLevel = relation; 1682fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius s.getTerminatedBuffer(); // Ensure NUL-termination. 1683fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool isOk = TRUE; 1684fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!needsNormalization(prevString, errorCode) && !needsNormalization(s, errorCode)) { 1685fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, errorCode); 1686fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius isOk = checkCompareTwo("normalization=on", prevFileLine, prevString, s, 1687fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius expectedOrder, expectedLevel, errorCode); 1688fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1689fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(isOk) { 1690fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, errorCode); 1691fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius isOk = checkCompareTwo("normalization=off", prevFileLine, prevString, s, 1692fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius expectedOrder, expectedLevel, errorCode); 1693fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1694fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(isOk && (!nfd->isNormalized(prevString, errorCode) || !nfd->isNormalized(s, errorCode))) { 1695fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString pn = nfd->normalize(prevString, errorCode); 1696fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString n = nfd->normalize(s, errorCode); 1697fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius pn.getTerminatedBuffer(); 1698fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius n.getTerminatedBuffer(); 1699fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.assertSuccess(); 1700fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius isOk = checkCompareTwo("NFD input", prevFileLine, pn, n, 1701fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius expectedOrder, expectedLevel, errorCode); 1702fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1703fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!isOk) { 1704fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode.reset(); // already reported 1705fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1706fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius prevFileLine = fileLine; 1707fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius prevString = s; 1708fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius prevString.getTerminatedBuffer(); // Ensure NUL-termination. 1709fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1710fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 1711fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1712fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid CollationTest::TestDataDriven() { 1713fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius IcuTestErrorCode errorCode(*this, "TestDataDriven"); 1714fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1715fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fcd = Normalizer2Factory::getFCDInstance(errorCode); 1716fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius nfd = Normalizer2Factory::getNFDInstance(errorCode); 1717fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.logDataIfFailureAndReset("Normalizer2Factory::getFCDInstance() or getNFDInstance()")) { 1718fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1719fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1720fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1721fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CharString path(getSourceTestData(errorCode), errorCode); 1722fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius path.appendPathPart("collationtest.txt", errorCode); 1723fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const char *codePage = "UTF-8"; 1724fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius LocalUCHARBUFPointer f(ucbuf_open(path.data(), &codePage, TRUE, FALSE, errorCode)); 1725fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode.logIfFailureAndReset("ucbuf_open(collationtest.txt)")) { 1726fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1727fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1728fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius while(errorCode.isSuccess()) { 1729fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Read a new line if necessary. 1730fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Sub-parsers leave the first line set that they do not handle. 1731fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(fileLine.isEmpty()) { 1732fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!readLine(f.getAlias(), errorCode)) { break; } 1733fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius continue; 1734fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1735fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!isSectionStarter(fileLine[0])) { 1736fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("syntax error on line %d", (int)fileLineNumber); 1737fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1738fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1739fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1740fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(fileLine.startsWith(UNICODE_STRING("** test: ", 9))) { 1741fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fileTestName = fileLine; 1742fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius logln(fileLine); 1743fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fileLine.remove(); 1744fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(fileLine == UNICODE_STRING("@ root", 6)) { 1745fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setRootCollator(errorCode); 1746fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fileLine.remove(); 1747fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(fileLine.startsWith(UNICODE_STRING("@ locale ", 9))) { 1748fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setLocaleCollator(errorCode); 1749fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fileLine.remove(); 1750fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(fileLine == UNICODE_STRING("@ rules", 7)) { 1751fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius buildTailoring(f.getAlias(), errorCode); 1752fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(fileLine[0] == 0x25 && isSpace(fileLine[1])) { // % 1753fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius parseAndSetAttribute(errorCode); 1754fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(fileLine == UNICODE_STRING("* compare", 9)) { 1755fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkCompareStrings(f.getAlias(), errorCode); 1756fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 1757fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errln("syntax error on line %d", (int)fileLineNumber); 1758fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius infoln(fileLine); 1759fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 1760fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1761fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 1762fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 1763fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1764fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // !UCONFIG_NO_COLLATION 1765