1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************** 354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius* Copyright (C) 1999-2012 International Business Machines Corporation and 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************** 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Date Name Description 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 10/20/99 alan Creation. 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 03/22/2000 Madhu Added additional tests 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************** 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h> 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h> 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "usettest.h" 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h" 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h" 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/usetiter.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/parsepos.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/symtable.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uversion.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "hash.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ 306d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("fail in file \"%s\", line %d: \"%s\"", __FILE__, __LINE__, \ 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_errorName(status));}} 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define TEST_ASSERT(expr) {if (!(expr)) { \ 346d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("fail in file \"%s\", line %d", __FILE__, __LINE__); }} 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) { 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat; 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.toPattern(pat); 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return left + UnicodeSetTest::escape(pat); 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define CASE(id,test) case id: \ 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = #test; \ 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) { \ 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(#test "---"); \ 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(); \ 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru test(); \ 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } \ 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::UnicodeSetTest() : utf8Cnv(NULL) { 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUConverter *UnicodeSetTest::openUTF8Converter() { 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(utf8Cnv==NULL) { 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utf8Cnv=ucnv_open("UTF-8", &errorCode); 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return utf8Cnv; 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::~UnicodeSetTest() { 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_close(utf8Cnv); 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::runIndexedTest(int32_t index, UBool exec, 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* &name, char* /*par*/) { 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if (exec) logln((UnicodeString)"TestSuite UnicodeSetTest"); 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (index) { 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(0,TestPatterns); 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(1,TestAddRemove); 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(2,TestCategories); 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(3,TestCloneEqualHash); 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(4,TestMinimalRep); 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(5,TestAPI); 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(6,TestScriptSet); 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(7,TestPropertySet); 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(8,TestClone); 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(9,TestExhaustive); 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(10,TestToPattern); 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(11,TestIndexOf); 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(12,TestStrings); 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(13,Testj2268); 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(14,TestCloseOver); 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(15,TestEscapePattern); 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(16,TestInvalidCodePoint); 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(17,TestSymbolTable); 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(18,TestSurrogate); 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(19,TestPosixClasses); 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(20,TestIteration); 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(21,TestFreezable); 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(22,TestSpan); 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(23,TestStringSpan); 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: name = ""; break; 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char NOT[] = "%%%%"; 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * UVector was improperly copying contents 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This code will crash this is still true 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::Testj2268() { 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet t; 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.add(UnicodeString("abc")); 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet test(t); 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString ustrPat; 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru test.toPattern(ustrPat, TRUE); 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test toPattern(). 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestToPattern() { 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test that toPattern() round trips with syntax characters and 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // whitespace. 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char* OTHER_TOPATTERN_TESTS[] = { 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[[:latin:]&[:greek:]]", 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[[:latin:]-[:greek:]]", 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:nonspacing mark:]", 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NULL 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t j=0; OTHER_TOPATTERN_TESTS[j]!=NULL; ++j) { 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec = U_ZERO_ERROR; 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s(OTHER_TOPATTERN_TESTS[j], ec); 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1336d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln((UnicodeString)"FAIL: bad pattern " + OTHER_TOPATTERN_TESTS[j] + " - " + UnicodeString(u_errorName(ec))); 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkPat(OTHER_TOPATTERN_TESTS[j], s); 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (UChar32 i = 0; i <= 0x10FFFF; ++i) { 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((i <= 0xFF && !u_isalpha(i)) || u_isspace(i)) { 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // check various combinations to make sure they all work. 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i != 0 && !toPatternAux(i, i)){ 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!toPatternAux(0, i)){ 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!toPatternAux(i, 0xFFFF)){ 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test pattern behavior of multicharacter strings. 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec = U_ZERO_ERROR; 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet* s = new UnicodeSet("[a-z {aa} {ab}]", ec); 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This loop isn't a loop. It's here to make the compiler happy. 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If you're curious, try removing it and changing the 'break' 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // statements (except for the last) to goto's. 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) break; 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* exp1[] = {"aa", "ab", NOT, "ac", NULL}; 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectToPattern(*s, "[a-z{aa}{ab}]", exp1); 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s->add("ac"); 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* exp2[] = {"aa", "ab", "ac", NOT, "xy", NULL}; 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectToPattern(*s, "[a-z{aa}{ab}{ac}]", exp2); 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\{l} {r\\}}]"), ec); 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) break; 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* exp3[] = {"{l", "r}", NOT, "xy", NULL}; 176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{r\\}}{\\{l}]"), exp3); 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s->add("[]"); 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* exp4[] = {"{l", "r}", "[]", NOT, "xy", NULL}; 180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\[\\]}{r\\}}{\\{l}]"), exp4); 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\u4E01\\u4E02}{\\n\\r}]"), ec); 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) break; 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* exp5[] = {"\\u4E01\\u4E02", "\n\r", NULL}; 185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]"), exp5); 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // j2189 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s->clear(); 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s->add(UnicodeString("abc", "")); 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s->add(UnicodeString("abc", "")); 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* exp6[] = {"abc", NOT, "ab", NULL}; 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectToPattern(*s, "[{abc}]", exp6); 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) errln("FAIL: pattern parse error"); 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete s; 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // JB#3400: For 2 character ranges prefer [ab] to [a-b] 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s; 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.add((UChar)97, (UChar)98); // 'a', 'b' 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectToPattern(s, "[ab]", NULL); 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSetTest::toPatternAux(UChar32 start, UChar32 end) { 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // use Integer.toString because Utility.hex doesn't handle ints 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat = ""; 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO do these in hex 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //String source = "0x" + Integer.toString(start,16).toUpperCase(); 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if (start != end) source += "..0x" + Integer.toString(end,16).toUpperCase(); 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString source; 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source = source + (uint32_t)start; 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (start != end) 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source = source + ".." + (uint32_t)end; 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet testSet; 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSet.add(start, end); 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return checkPat(source, testSet); 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSetTest::checkPat(const UnicodeString& source, 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet& testSet) { 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // What we want to make sure of is that a pattern generated 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // by toPattern(), with or without escaped unprintables, can 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // be passed back into the UnicodeSet constructor. 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat0; 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSet.toPattern(pat0, TRUE); 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!checkPat(source + " (escaped)", testSet, pat0)) return FALSE; 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //String pat1 = unescapeLeniently(pat0); 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if (!checkPat(source + " (in code)", testSet, pat1)) return false; 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat2; 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSet.toPattern(pat2, FALSE); 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!checkPat(source, testSet, pat2)) return FALSE; 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //String pat3 = unescapeLeniently(pat2); 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if (!checkPat(source + " (in code)", testSet, pat3)) return false; 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //logln(source + " => " + pat0 + ", " + pat1 + ", " + pat2 + ", " + pat3); 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)source + " => " + pat0 + ", " + pat2); 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSetTest::checkPat(const UnicodeString& source, 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet& testSet, 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& pat) { 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet testSet2(pat, ec); 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (testSet2 != testSet) { 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"Fail toPattern: " + source + " => " + pat); 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::TestPatterns(void) { 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set; 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, UnicodeString("[[a-m]&[d-z]&[k-y]]", ""), "km"); 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, UnicodeString("[[a-z]-[m-y]-[d-r]]", ""), "aczz"); 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, UnicodeString("[a\\-z]", ""), "--aazz"); 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, UnicodeString("[-az]", ""), "--aazz"); 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, UnicodeString("[az-]", ""), "--aazz"); 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, UnicodeString("[[[a-z]-[aeiou]i]]", ""), "bdfnptvz"); 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Throw in a test of complement 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.complement(); 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString exp; 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.append((UChar)0x0000).append("aeeoouu").append((UChar)(0x007a+1)).append((UChar)0xFFFF); 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, exp); 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::TestCategories(void) { 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* pat = " [:Lu:] "; // Whitespace ok outside [:..:] 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set(pat, status); 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 2846d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln((UnicodeString)"Fail: Can't construct set with " + pat + " - " + UnicodeString(u_errorName(status))); 2856d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru return; 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectContainment(set, pat, "ABC", "abc"); 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 i; 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t failures = 0; 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Make sure generation of L doesn't pollute cached Lu set 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // First generate L, then Lu 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern("[:L:]", status); 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<0x200; ++i) { 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool l = u_isalpha((UChar)i); 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (l != set.contains(i)) { 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: L contains " + (unsigned short)i + " = " + 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.contains(i)); 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (++failures == 10) break; 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern("[:Lu:]", status); 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<0x200; ++i) { 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool lu = (u_charType((UChar)i) == U_UPPERCASE_LETTER); 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lu != set.contains(i)) { 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: Lu contains " + (unsigned short)i + " = " + 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.contains(i)); 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (++failures == 20) break; 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::TestCloneEqualHash(void) { 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // set1 and set2 used to be built with the obsolete constructor taking 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // UCharCategory values; replaced with pattern constructors 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // markus 20030502 322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *set1=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Lowercase Letter}"), status); // :Ll: Letter, lowercase 323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *set1a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Ll:]"), status); // Letter, lowercase 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)){ 3256d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln((UnicodeString)"FAIL: Can't construst set with category->Ll" + " - " + UnicodeString(u_errorName(status))); 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *set2=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Decimal Number}"), status); //Number, Decimal digit 329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *set2a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Nd:]"), status); //Number, Decimal digit 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)){ 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: Can't construct set with category->Nd"); 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*set1 != *set1a) { 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: category constructor for Ll broken"); 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*set2 != *set2a) { 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: category constructor for Nd broken"); 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete set1a; 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete set2a; 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Testing copy construction"); 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *set1copy=new UnicodeSet(*set1); 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(*set1 != *set1copy || *set1 == *set2 || 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru getPairs(*set1) != getPairs(*set1copy) || 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set1->hashCode() != set1copy->hashCode()){ 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL : Error in copy construction"); 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Testing =operator"); 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set1equal=*set1; 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set2equal=*set2; 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(set1equal != *set1 || set1equal != *set1copy || set2equal != *set2 || 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set2equal == *set1 || set2equal == *set1copy || set2equal == set1equal){ 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: Error in =operator"); 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Testing clone()"); 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *set1clone=(UnicodeSet*)set1->clone(); 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *set2clone=(UnicodeSet*)set2->clone(); 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(*set1clone != *set1 || *set1clone != *set1copy || *set1clone != set1equal || 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *set2clone != *set2 || *set2clone == *set1copy || *set2clone != set2equal || 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *set2clone == *set1 || *set2clone == set1equal || *set2clone == *set1clone){ 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: Error in clone"); 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Testing hashcode"); 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(set1->hashCode() != set1equal.hashCode() || set1->hashCode() != set1clone->hashCode() || 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set2->hashCode() != set2equal.hashCode() || set2->hashCode() != set2clone->hashCode() || 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set1copy->hashCode() != set1equal.hashCode() || set1copy->hashCode() != set1clone->hashCode() || 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set1->hashCode() == set2->hashCode() || set1copy->hashCode() == set2->hashCode() || 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set2->hashCode() == set1clone->hashCode() || set2->hashCode() == set1equal.hashCode() ){ 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: Error in hashCode()"); 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete set1; 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete set1copy; 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete set2; 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete set1clone; 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete set2clone; 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::TestAddRemove(void) { 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set; // Construct empty set 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.isEmpty() == TRUE, "set should be empty"); 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 0, "size should be 0"); 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.complement(); 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 0x110000, "size should be 0x110000"); 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0061, 0x007a); 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "az"); 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.isEmpty() == FALSE, "set should not be empty"); 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() != 0, "size should not be equal to 0"); 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 26, "size should be equal to 26"); 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.remove(0x006d, 0x0070); 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "alqz"); 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 22, "size should be equal to 22"); 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.remove(0x0065, 0x0067); 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "adhlqz"); 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 19, "size should be equal to 19"); 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.remove(0x0064, 0x0069); 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "acjlqz"); 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 16, "size should be equal to 16"); 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.remove(0x0063, 0x0072); 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "absz"); 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 10, "size should be equal to 10"); 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0066, 0x0071); 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "abfqsz"); 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 22, "size should be equal to 22"); 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.remove(0x0061, 0x0067); 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "hqsz"); 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.remove(0x0061, 0x007a); 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, ""); 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.isEmpty() == TRUE, "set should be empty"); 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 0, "size should be 0"); 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0061); 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.isEmpty() == FALSE, "set should not be empty"); 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 1, "size should not be equal to 1"); 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0062); 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0063); 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "ac"); 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 3, "size should not be equal to 3"); 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0070); 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0071); 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "acpq"); 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 5, "size should not be equal to 5"); 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, ""); 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.isEmpty() == TRUE, "set should be empty"); 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 0, "size should be 0"); 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Try removing an entire set from another set 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, "[c-x]", "cx"); 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set2; 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set2, "[f-ky-za-bc[vw]]", "acfkvwyz"); 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.removeAll(set2); 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "deluxx"); 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Try adding an entire set to another set 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, "[jackiemclean]", "aacceein"); 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set2, "[hitoshinamekatajamesanderson]", "aadehkmort"); 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(set2); 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "aacehort"); 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.containsAll(set2) == TRUE, "set should contain all the elements in set2"); 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Try retaining an set of elements contained in another set (intersection) 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set3; 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set3, "[a-c]", "ac"); 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.containsAll(set3) == FALSE, "set doesn't contain all the elements in set3"); 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set3.remove(0x0062); 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set3, "aacc"); 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.containsAll(set3) == TRUE, "set should contain all the elements in set3"); 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.retainAll(set3); 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "aacc"); 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == set3.size(), "set.size() should be set3.size()"); 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.containsAll(set3) == TRUE, "set should contain all the elements in set3"); 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() != set3.size(), "set.size() != set3.size()"); 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test commutativity 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, "[hitoshinamekatajamesanderson]", "aadehkmort"); 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set2, "[jackiemclean]", "aacceein"); 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(set2); 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "aacehort"); 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.containsAll(set2) == TRUE, "set should contain all the elements in set2"); 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Make sure minimal representation is maintained. 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestMinimalRep() { 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This is pretty thoroughly tested by checkCanonicalRep() 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // run against the exhaustive operation results. Use the code 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // here for debugging specific spot problems. 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1 overlap against 2 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set("[h-km-q]", status); 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set2("[i-o]", status); 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(set2); 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "hq"); 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // right 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern("[a-m]", status); 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set2.applyPattern("[e-o]", status); 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(set2); 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "ao"); 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // left 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern("[e-o]", status); 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set2.applyPattern("[a-m]", status); 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(set2); 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "ao"); 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1 overlap against 3 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern("[a-eg-mo-w]", status); 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set2.applyPattern("[d-q]", status); 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(set2); 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "aw"); 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestAPI() { 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // default ct 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set; 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!set.isEmpty() || set.getRangeCount() != 0) { 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, set should be empty but isn't: " + 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set); 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // clear(), isEmpty() 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0061); 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.isEmpty()) { 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, set shouldn't be empty but is: " + 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set); 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!set.isEmpty()) { 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, set should be empty but isn't: " + 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set); 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // size() 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.size() != 0) { 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, size should be 0, but is " + set.size() + 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ": " + set); 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0061); 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.size() != 1) { 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, size should be 1, but is " + set.size() + 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ": " + set); 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0031, 0x0039); 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.size() != 10) { 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, size should be 10, but is " + set.size() + 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ": " + set); 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // contains(first, last) 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern("[A-Y 1-8 b-d l-y]", status); 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i<set.getRangeCount(); ++i) { 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 a = set.getRangeStart(i); 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 b = set.getRangeEnd(i); 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!set.contains(a, b)) { 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, should contain " + (unsigned short)a + '-' + (unsigned short)b + 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " but doesn't: " + set); 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.contains((UChar32)(a-1), b)) { 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, shouldn't contain " + 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (unsigned short)(a-1) + '-' + (unsigned short)b + 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " but does: " + set); 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.contains(a, (UChar32)(b+1))) { 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, shouldn't contain " + 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (unsigned short)a + '-' + (unsigned short)(b+1) + 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " but does: " + set); 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Ported InversionList test. 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet a((UChar32)3,(UChar32)10); 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet b((UChar32)7,(UChar32)15); 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet c; 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"a [3-10]: " + a); 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"b [7-15]: " + b); 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = a; 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c.addAll(b); 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet exp((UChar32)3,(UChar32)15); 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == exp) { 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"c.set(a).add(b): " + c); 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: c.set(a).add(b) = " + c + ", expect " + exp); 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c.complement(); 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.set((UChar32)0, (UChar32)2); 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.add((UChar32)16, UnicodeSet::MAX_VALUE); 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == exp) { 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"c.complement(): " + c); 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: c.complement() = " + c + ", expect " + exp); 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c.complement(); 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.set((UChar32)3, (UChar32)15); 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == exp) { 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"c.complement(): " + c); 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: c.complement() = " + c + ", expect " + exp); 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = a; 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c.complementAll(b); 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.set((UChar32)3,(UChar32)6); 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.add((UChar32)11,(UChar32) 15); 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == exp) { 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"c.set(a).exclusiveOr(b): " + c); 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: c.set(a).exclusiveOr(b) = " + c + ", expect " + exp); 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp = c; 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(setToBits(c), c); 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == exp) { 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"bitsToSet(setToBits(c)): " + c); 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: bitsToSet(setToBits(c)) = " + c + ", expect " + exp); 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Additional tests for coverage JB#2118 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::complement(class UnicodeString const &) 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::complementAll(class UnicodeString const &) 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::containsNone(class UnicodeSet const &) 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::containsNone(long,long) 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::containsSome(class UnicodeSet const &) 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::containsSome(long,long) 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::removeAll(class UnicodeString const &) 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::retain(long) 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::retainAll(class UnicodeString const &) 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::serialize(unsigned short *,long,enum UErrorCode &) 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSetIterator::getString(void) 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.complement("ab"); 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.applyPattern("[{ab}]", status); 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set != exp) { errln("FAIL: complement(\"ab\")"); return; } 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetIterator iset(set); 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!iset.next() || !iset.isString()) { 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSetIterator::next/isString"); 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (iset.getString() != "ab") { 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSetIterator::getString"); 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add((UChar32)0x61, (UChar32)0x7A); 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.complementAll("alan"); 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.applyPattern("[{ab}b-kmo-z]", status); 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set != exp) { errln("FAIL: complementAll(\"alan\")"); return; } 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.applyPattern("[a-z]", status); 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); } 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); } 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.applyPattern("[aln]", status); 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); } 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); } 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.containsNone((UChar32)0x61, (UChar32)0x7A)) { 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: containsNone(UChar32, UChar32)"); 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!set.containsSome((UChar32)0x61, (UChar32)0x7A)) { 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: containsSome(UChar32, UChar32)"); 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!set.containsNone((UChar32)0x41, (UChar32)0x5A)) { 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: containsNone(UChar32, UChar32)"); 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.containsSome((UChar32)0x41, (UChar32)0x5A)) { 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: containsSome(UChar32, UChar32)"); 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.removeAll("liu"); 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.applyPattern("[{ab}b-hj-kmo-tv-z]", status); 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set != exp) { errln("FAIL: removeAll(\"liu\")"); return; } 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.retainAll("star"); 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.applyPattern("[rst]", status); 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set != exp) { errln("FAIL: retainAll(\"star\")"); return; } 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.retain((UChar32)0x73); 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.applyPattern("[s]", status); 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set != exp) { errln("FAIL: retain('s')"); return; } 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t buf[32]; 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t slen = set.serialize(buf, sizeof(buf)/sizeof(buf[0]), status); 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL: serialize"); return; } 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (slen != 3 || buf[0] != 2 || buf[1] != 0x73 || buf[2] != 0x74) { 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: serialize"); 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 700b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 701b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru // Conversions to and from USet 702b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeSet *uniset = &set; 703b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru USet *uset = uniset->toUSet(); 704b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT((void *)uset == (void *)uniset); 705b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeSet *setx = UnicodeSet::fromUSet(uset); 706b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT((void *)setx == (void *)uset); 707b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UnicodeSet *constSet = uniset; 708b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const USet *constUSet = constSet->toUSet(); 709b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT((void *)constUSet == (void *)constSet); 710b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UnicodeSet *constSetx = UnicodeSet::fromUSet(constUSet); 711b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru TEST_ASSERT((void *)constSetx == (void *)constUSet); 71250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 71350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // span(UnicodeString) and spanBack(UnicodeString) convenience methods 71450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString longString=UNICODE_STRING_SIMPLE("aaaaaaaaaabbbbbbbbbbcccccccccc"); 71550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeSet ac(0x61, 0x63); 71650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.remove(0x62).freeze(); 71750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( ac.span(longString, -5, USET_SPAN_CONTAINED)!=10 || 71850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.span(longString, 0, USET_SPAN_CONTAINED)!=10 || 71950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.span(longString, 5, USET_SPAN_CONTAINED)!=10 || 72050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.span(longString, 10, USET_SPAN_CONTAINED)!=10 || 72150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.span(longString, 15, USET_SPAN_CONTAINED)!=15 || 72250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.span(longString, 20, USET_SPAN_CONTAINED)!=30 || 72350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.span(longString, 25, USET_SPAN_CONTAINED)!=30 || 72450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.span(longString, 30, USET_SPAN_CONTAINED)!=30 || 72550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.span(longString, 35, USET_SPAN_CONTAINED)!=30 || 72650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.span(longString, INT32_MAX, USET_SPAN_CONTAINED)!=30 72750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 72850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("UnicodeSet.span(UnicodeString, ...) returns incorrect end indexes"); 72950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 73050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( ac.spanBack(longString, -5, USET_SPAN_CONTAINED)!=0 || 73150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.spanBack(longString, 0, USET_SPAN_CONTAINED)!=0 || 73250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.spanBack(longString, 5, USET_SPAN_CONTAINED)!=0 || 73350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.spanBack(longString, 10, USET_SPAN_CONTAINED)!=0 || 73450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.spanBack(longString, 15, USET_SPAN_CONTAINED)!=15 || 73550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.spanBack(longString, 20, USET_SPAN_CONTAINED)!=20 || 73650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.spanBack(longString, 25, USET_SPAN_CONTAINED)!=20 || 73750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.spanBack(longString, 30, USET_SPAN_CONTAINED)!=20 || 73850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.spanBack(longString, 35, USET_SPAN_CONTAINED)!=20 || 73950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ac.spanBack(longString, INT32_MAX, USET_SPAN_CONTAINED)!=20 74050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 74150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("UnicodeSet.spanBack(UnicodeString, ...) returns incorrect start indexes"); 74250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestIteration() { 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i = 0; 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int outerLoop; 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 6 code points, 3 ranges, 2 strings, 8 total elements 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Iteration will access them in sorted order - a, b, c, y, z, U0001abcd, "str1", "str2" 752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet set(UNICODE_STRING_SIMPLE("[zabyc\\U0001abcd{str1}{str2}]"), ec); 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(ec); 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetIterator it(set); 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (outerLoop=0; outerLoop<3; outerLoop++) { 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Run the test multiple times, to check that iterator.reset() is working. 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<10; i++) { 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool nextv = it.next(); 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isString = it.isString(); 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t codePoint = it.getCodepoint(); 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //int32_t codePointEnd = it.getCodepointEnd(); 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s = it.getString(); 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (i) { 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0: 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == TRUE); 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(isString == FALSE); 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(codePoint==0x61); 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s == "a"); 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == TRUE); 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(isString == FALSE); 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(codePoint==0x62); 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s == "b"); 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == TRUE); 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(isString == FALSE); 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(codePoint==0x63); 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s == "c"); 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == TRUE); 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(isString == FALSE); 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(codePoint==0x79); 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s == "y"); 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 4: 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == TRUE); 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(isString == FALSE); 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(codePoint==0x7a); 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s == "z"); 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 5: 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == TRUE); 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(isString == FALSE); 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(codePoint==0x1abcd); 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s == UnicodeString((UChar32)0x1abcd)); 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 6: 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == TRUE); 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(isString == TRUE); 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s == "str1"); 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 7: 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == TRUE); 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(isString == TRUE); 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s == "str2"); 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 8: 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == FALSE); 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 9: 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == FALSE); 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru it.reset(); // prepare to run the iteration again. 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestStrings() { 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet* testList[] = { 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet::createFromAll("abc"), 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru new UnicodeSet("[a-c]", ec), 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &(UnicodeSet::createFrom("ch")->add('a','z').add("ll")), 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru new UnicodeSet("[{ll}{ch}a-z]", ec), 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet::createFrom("ab}c"), 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru new UnicodeSet("[{ab\\}c}]", ec), 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &((new UnicodeSet('a','z'))->add('A', 'Z').retain('M','m').complement('X')), 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]", ec), 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NULL 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: couldn't construct test sets"); 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; testList[i] != NULL; i+=2) { 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(ec)) { 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat0, pat1; 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testList[i]->toPattern(pat0, TRUE); 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testList[i+1]->toPattern(pat1, TRUE); 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*testList[i] == *testList[i+1]) { 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Ok: " + pat0 + " == " + pat1); 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"FAIL: " + pat0 + " != " + pat1); 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete testList[i]; 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete testList[i+1]; 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test the [:Latin:] syntax. 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestScriptSet() { 869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectContainment(UNICODE_STRING_SIMPLE("[:Latin:]"), "aA", CharsToUnicodeString("\\u0391\\u03B1")); 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectContainment(UNICODE_STRING_SIMPLE("[:Greek:]"), CharsToUnicodeString("\\u0391\\u03B1"), "aA"); 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Jitterbug 1423 */ 874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectContainment(UNICODE_STRING_SIMPLE("[[:Common:][:Inherited:]]"), CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA"); 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test the [:Latin:] syntax. 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestPropertySet() { 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char* const DATA[] = { 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Pattern, Chars IN, Chars NOT in 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:Latin:]", 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "aA", 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0391\\u03B1", 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\p{Greek}]", 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0391\\u03B1", 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "aA", 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\P{ GENERAL Category = upper case letter }", 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abc", 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ABC", 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 89750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Combining class: @since ICU 2.2 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check both symbolic and numeric 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\p{ccc=Nukta}", 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0ABC", 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abc", 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\p{Canonical Combining Class = 11}", 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u05B1", 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u05B2", 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:c c c = iota subscript :]", 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0345", 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyz", 91150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Bidi class: @since ICU 2.2 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\p{bidiclass=lefttoright}", 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abc", 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0671\\u0672", 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Binary properties: @since ICU 2.2 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\p{ideographic}", 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u4E0A", 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "x", 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:math=false:]", 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "q)*(", 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // weiv: )(and * were removed from math in Unicode 4.0.1 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //"(*+)", 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "+<>^", 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // JB#1767 \N{}, \p{ASCII} 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:Ascii:]", 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abc\\u0000\\u007F", 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0080\\u4E00", 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\N{ latin small letter a }[:name= latin small letter z:]]", 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "az", 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "qrs", 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // JB#2015 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:any:]", 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "a\\U0010FFFF", 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "", 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:nv=0.5:]", 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u00BD\\u0F2A", 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u00BC", 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // JB#2653: Age 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:Age=1.1:]", 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u03D6", // 1.1 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u03D8\\u03D9", // 3.2 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:Age=3.1:]", 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u1800\\u3400\\U0002f800", 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0220\\u034f\\u30ff\\u33ff\\ufe73\\U00010000\\U00050000", 955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // JB#2350: Case_Sensitive 957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:Case Sensitive:]", 958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "A\\u1FFC\\U00010410", 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ";\\u00B4\\U00010500", 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // JB#2832: C99-compatibility props 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:blank:]", 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " \\u0009", 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "1-9A-Z", 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:graph:]", 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "19AZ", 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " \\u0003\\u0007\\u0009\\u000A\\u000D", 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:punct:]", 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "!@#%&*()[]{}-_\\/;:,.?'\"", 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "09azAZ", 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:xdigit:]", 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "09afAF", 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "gG!", 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Regex compatibility test 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[-b]", // leading '-' is literal 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-b", 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ac", 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[^-b]", // leading '-' is literal 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ac", 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-b", 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[b-]", // trailing '-' is literal 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-b", 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ac", 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[^b-]", // trailing '-' is literal 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ac", 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-b", 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[a-b-]", // trailing '-' is literal 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ab-", 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "c=", 998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[[a-q]&[p-z]-]", // trailing '-' is literal 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "pq-", 1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "or=", 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\s|\\)|:|$|\\>]", // from regex tests 1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "s|):$>", 1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abc", 1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\uDC00cd]", // JB#2906: isolated trail at start 1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "cd\\uDC00", 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ab\\uD800\\U00010000", 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[ab\\uD800]", // JB#2906: isolated trail at start 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ab\\uD800", 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "cd\\uDC00\\U00010000", 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[ab\\uD800cd]", // JB#2906: isolated lead in middle 1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abcd\\uD800", 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ef\\uDC00\\U00010000", 1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[ab\\uDC00cd]", // JB#2906: isolated trail in middle 1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abcd\\uDC00", 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ef\\uD800\\U00010000", 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 102350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:^lccc=0:]", // Lead canonical class 1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0300\\u0301", 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abcd\\u00c0\\u00c5", 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:^tccc=0:]", // Trail canonical class 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0300\\u0301\\u00c0\\u00c5", 1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abcd", 1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0300\\u0301\\u00c0\\u00c5", 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abcd", 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now) 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "", 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abcd\\u0300\\u0301\\u00c0\\u00c5", 1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not 1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0F73\\u0F75\\u0F81", 1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abcd\\u0300\\u0301\\u00c0\\u00c5", 104350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif /* !UCONFIG_NO_NORMALIZATION */ 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:Assigned:]", 1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD", 104727f654740f2a26ad62a5c155af9199af9e69b889claireho "\\u0888\\uFDD3\\uFFFE\\U00050005", 104827f654740f2a26ad62a5c155af9199af9e69b889claireho 104927f654740f2a26ad62a5c155af9199af9e69b889claireho // Script_Extensions, new in Unicode 6.0 105027f654740f2a26ad62a5c155af9199af9e69b889claireho "[:scx=Arab:]", 105127f654740f2a26ad62a5c155af9199af9e69b889claireho "\\u061E\\u061F\\u0620\\u0621\\u063F\\u0640\\u0650\\u065E\\uFDF1\\uFDF2\\uFDF3", 105254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius "\\u061D\\uFDEF\\uFDFE", 105327f654740f2a26ad62a5c155af9199af9e69b889claireho 105427f654740f2a26ad62a5c155af9199af9e69b889claireho // U+FDF2 has Script=Arabic and also Arab in its Script_Extensions, 105527f654740f2a26ad62a5c155af9199af9e69b889claireho // so scx-sc is missing U+FDF2. 105627f654740f2a26ad62a5c155af9199af9e69b889claireho "[[:Script_Extensions=Arabic:]-[:Arab:]]", 105727f654740f2a26ad62a5c155af9199af9e69b889claireho "\\u0640\\u064B\\u0650\\u0655\\uFDFD", 105827f654740f2a26ad62a5c155af9199af9e69b889claireho "\\uFDF2" 1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]); 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<DATA_LEN; i+=3) { 1064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectContainment(UnicodeString(DATA[i], -1, US_INV), CharsToUnicodeString(DATA[i+1]), 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CharsToUnicodeString(DATA[i+2])); 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test that Posix style character classes [:digit:], etc. 1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * have the Unicode definitions from TR 18. 1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestPosixClasses() { 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:alpha:]", status); 1077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Alphabetic}"), status); 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:lower:]", status); 1084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{lowercase}"), status); 1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:upper:]", status); 1091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Uppercase}"), status); 1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:punct:]", status); 1098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=Punctuation}"), status); 1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:digit:]", status); 1105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=DecimalNumber}"), status); 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:xdigit:]", status); 1112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{DecimalNumber}\\p{HexDigit}]"), status); 1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:alnum:]", status); 1119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Alphabetic}\\p{DecimalNumber}]"), status); 1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:space:]", status); 1126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Whitespace}"), status); 1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:blank:]", status); 1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]"), 1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status); 1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:cntrl:]", status); 1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Control}"), status); 1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:graph:]", status); 1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]"), status); 1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:print:]", status); 1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("[[:graph:][:blank:]-[\\p{Control}]]") ,status); 1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test cloning of UnicodeSet. For C++, we test the copy constructor. 1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestClone() { 1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s("[abcxyz]", ec); 1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet t(s); 1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectContainment(t, "abc", "def"); 1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test the indexOf() and charAt() methods. 1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestIndexOf() { 1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set("[a-cx-y3578]", ec); 1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet constructor"); 1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<set.size(); ++i) { 1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c = set.charAt(i); 1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.indexOf(c) != i) { 1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: charAt(%d) = %X => indexOf() => %d", 1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i, c, set.indexOf(c)); 1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c = set.charAt(set.size()); 1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c != -1) { 1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: charAt(<out of range>) = %X", c); 1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t j = set.indexOf((UChar32)0x71/*'q'*/); 1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (j != -1) { 1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: indexOf('q') = " + j); 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test closure API. 1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestCloseOver() { 1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char CASE[] = {(char)USET_CASE_INSENSITIVE}; 1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char CASE_MAPPINGS[] = {(char)USET_ADD_CASE_MAPPINGS}; 1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* DATA[] = { 1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // selector, input, output 1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, 1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[aq\\u00DF{Bc}{bC}{Fi}]", 1213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "[aAqQ\\u00DF\\u1E9E\\uFB01{ss}{bc}{fi}]", // U+1E9E LATIN CAPITAL LETTER SHARP S is new in Unicode 5.1 1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, 1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u01F1]", // 'DZ' 1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u01F1\\u01F2\\u01F3]", 1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, 1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u1FB4]", 1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u1FB4{\\u03AC\\u03B9}]", 1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, 1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[{F\\uFB01}]", 1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\uFB03{ffi}]", 1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, // make sure binary search finds limits 1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[a\\uFF3A]", 1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[aA\\uFF3A\\uFF5A]", 1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, 1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[a-z]","[A-Za-z\\u017F\\u212A]", 1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, 1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[abc]","[A-Ca-c]", 1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, 1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[ABC]","[A-Ca-c]", 1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[i]", "[iI]", 1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\u0130]", "[\\u0130{i\\u0307}]", // dotted I 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[{i\\u0307}]", "[\\u0130{i\\u0307}]", // i with dot 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\u0131]", "[\\u0131]", // dotless i 1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\u0390]", "[\\u0390\\u1FD3{\\u03B9\\u0308\\u0301}]", 1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\u03c2]", "[\\u03a3\\u03c2\\u03c3]", // sigmas 1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\u03f2]", "[\\u03f2\\u03f9]", // lunate sigmas 1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\u03f7]", "[\\u03f7\\u03f8]", 1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\u1fe3]", "[\\u03b0\\u1fe3{\\u03c5\\u0308\\u0301}]", 1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\ufb05]", "[\\ufb05\\ufb06{st}]", 1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[{st}]", "[\\ufb05\\ufb06{st}]", 1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\U0001044F]", "[\\U00010427\\U0001044F]", 1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[{a\\u02BE}]", "[\\u1E9A{a\\u02BE}]", // first in sorted table 1261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[{\\u1f7c\\u03b9}]", "[\\u1ff2{\\u1f7c\\u03b9}]", // last in sorted table 1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 126450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_FILE_IO 1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE_MAPPINGS, 1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[aq\\u00DF{Bc}{bC}{Fi}]", 1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[aAqQ\\u00DF{ss}{Ss}{SS}{Bc}{BC}{bC}{bc}{FI}{Fi}{fi}]", 126850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE_MAPPINGS, 1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u01F1]", // 'DZ' 1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u01F1\\u01F2\\u01F3]", 1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE_MAPPINGS, 1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[a-z]", 1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[A-Za-z]", 1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NULL 1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s; 1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet t; 1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString buf; 1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; DATA[i]!=NULL; i+=3) { 1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t selector = DATA[i][0]; 1286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString pat(DATA[i+1], -1, US_INV); 1287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString exp(DATA[i+2], -1, US_INV); 1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.applyPattern(pat, ec); 1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.closeOver(selector); 1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.applyPattern(exp, ec); 1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: applyPattern failed"); 1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s == t) { 1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Ok: " + pat + ".closeOver(" + selector + ") => " + exp); 1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 12986d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln((UnicodeString)"FAIL: " + pat + ".closeOver(" + selector + ") => " + 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.toPattern(buf, TRUE) + ", expected " + exp); 1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0 1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Unused test code. 1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This was used to compare the old implementation (using USET_CASE) 1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * with the new one (using 0x100 temporarily) 1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * while transitioning from hardcoded case closure tables in uniset.cpp 1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (moved to uniset_props.cpp) to building the data by gencase into ucase.icu. 1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and using ucase.c functions for closure. 1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * See Jitterbug 3432 RFE: Move uniset.cpp data to a data file 1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Note: The old and new implementation never fully matched because 1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the old implementation turned out to not map U+0130 and U+0131 correctly 1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (dotted I and dotless i) and because the old implementation's data tables 1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * were outdated compared to Unicode 4.0.1 at the time of the change to the 1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * new implementation. (So sigmas and some other characters were not handled 1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * according to the newer Unicode version.) 1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet sens("[:case_sensitive:]", ec), sens2, s2; 1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetIterator si(sens); 1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str, buf2; 1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *pStr; 1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(si.next()) { 1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!si.isString()) { 1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=si.getCodepoint(); 1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.clear(); 1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.add(c); 1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.setTo(c); 1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.foldCase(); 1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sens2.add(str); 1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t=s; 1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.closeOver(USET_CASE); 1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.closeOver(0x100); 1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s!=t) { 1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: closeOver(U+%04x) differs: ", c); 1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"old "+s.toPattern(buf, TRUE)+" new: "+t.toPattern(buf2, TRUE)); 1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // remove all code points 1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // should contain all full case folding mapping strings 1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sens2.remove(0, 0x10ffff); 1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru si.reset(sens2); 1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(si.next()) { 1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(si.isString()) { 1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pStr=&si.getString(); 1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.clear(); 1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.add(*pStr); 1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t=s2=s; 1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.closeOver(USET_CASE); 1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.closeOver(0x100); 1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s!=t) { 1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: closeOver("+s2.toPattern(buf, TRUE)+") differs: "); 1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"old "+s.toPattern(buf, TRUE)+" new: "+t.toPattern(buf2, TRUE)); 1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test the pattern API 1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.applyPattern("[abc]", USET_CASE_INSENSITIVE, NULL, ec); 1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: applyPattern failed"); 1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectContainment(s, "abcABC", "defDEF"); 1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet v("[^abc]", USET_CASE_INSENSITIVE, NULL, ec); 1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: constructor failed"); 1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectContainment(v, "defDEF", "abcABC"); 1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet cm("[abck]", USET_ADD_CASE_MAPPINGS, NULL, ec); 1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: construct w/case mappings failed"); 1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectContainment(cm, "abckABCK", CharsToUnicodeString("defDEF\\u212A")); 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestEscapePattern() { 1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char pattern[] = 1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\uFEFF \\u200A-\\u200E \\U0001D173-\\U0001D17A \\U000F0000-\\U000FFFFD ]"; 1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char exp[] = 1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u200A-\\u200E\\uFEFF\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]"; 1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We test this with two passes; in the second pass we 1391b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // pre-unescape the pattern. Since U+200E is Pattern_White_Space, 1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // this fails -- which is what we expect. 1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t pass=1; pass<=2; ++pass) { 1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString pat(pattern, -1, US_INV); 1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pass==2) { 1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat = pat.unescape(); 1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Pattern is only good for pass 1 1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isPatternValid = (pass==1); 1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set(pat, ec); 1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(ec) != isPatternValid){ 1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: applyPattern(" + 1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(pat) + ") => " + 1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_errorName(ec)); 1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.contains((UChar)0x0644)){ 1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: " + escape(pat) + " contains(U+0664)"); 1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString newpat; 1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.toPattern(newpat, TRUE); 1418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (newpat == UnicodeString(exp, -1, US_INV)) { 1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(escape(pat) + " => " + newpat); 1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: " + escape(pat) + " => " + newpat); 1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<set.getRangeCount(); ++i) { 1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str("Range "); 1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append((UChar)(0x30 + i)) 1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru .append(": ") 1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru .append((UChar32)set.getRangeStart(i)) 1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru .append(" - ") 1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru .append((UChar32)set.getRangeEnd(i)); 1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str = str + " (" + set.getRangeStart(i) + " - " + 1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.getRangeEnd(i) + ")"; 1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.getRangeStart(i) < 0) { 1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: " + escape(str)); 1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(escape(str)); 1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::expectRange(const UnicodeString& label, 1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet& set, 1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 start, UChar32 end) { 1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet exp(start, end); 1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat; 1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set == exp) { 1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(label + " => " + set.toPattern(pat, TRUE)); 1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString xpat; 1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: " + label + " => " + 1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.toPattern(pat, TRUE) + 1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ", expected " + exp.toPattern(xpat, TRUE)); 1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestInvalidCodePoint() { 1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar32 DATA[] = { 1460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test range Expected range 1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0, 0x10FFFF, 0, 0x10FFFF, 1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (UChar32)-1, 8, 0, 8, 1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8, 0x110000, 8, 0x10FFFF 1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const int32_t DATA_LENGTH = sizeof(DATA)/sizeof(DATA[0]); 1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat; 1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<DATA_LENGTH; i+=4) { 1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 start = DATA[i]; 1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 end = DATA[i+1]; 1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 xstart = DATA[i+2]; 1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 xend = DATA[i+3]; 1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Try various API using the test code points 1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set(start, end); 1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectRange((UnicodeString)"ct(" + start + "," + end + ")", 1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set, xstart, xend); 1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.set(start, end); 1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectRange((UnicodeString)"set(" + start + "," + end + ")", 1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set, xstart, xend); 1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool b = set.contains(start); 1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = set.contains(start, end); 1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = set.containsNone(start, end); 1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = set.containsSome(start, end); 1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*int32_t index = set.indexOf(start);*/ 1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(start); 1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(start, end); 1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectRange((UnicodeString)"add(" + start + "," + end + ")", 1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set, xstart, xend); 1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.set(0, 0x10FFFF); 1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.retain(start, end); 1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectRange((UnicodeString)"retain(" + start + "," + end + ")", 1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set, xstart, xend); 1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.retain(start); 1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.set(0, 0x10FFFF); 1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.remove(start); 1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.remove(start, end); 1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.complement(); 1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectRange((UnicodeString)"!remove(" + start + "," + end + ")", 1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set, xstart, xend); 1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.set(0, 0x10FFFF); 1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.complement(start, end); 1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.complement(); 1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectRange((UnicodeString)"!complement(" + start + "," + end + ")", 1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set, xstart, xend); 1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.complement(start); 1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar32 DATA2[] = { 1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0, 1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x10FFFF, 1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (UChar32)-1, 1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x110000 1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const int32_t DATA2_LENGTH = sizeof(DATA2)/sizeof(DATA2[0]); 1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<DATA2_LENGTH; ++i) { 1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c = DATA2[i], end = 0x10FFFF; 1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool valid = (c >= 0 && c <= 0x10FFFF); 1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set(0, 0x10FFFF); 1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // For single-codepoint contains, invalid codepoints are NOT contained 1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool b = set.contains(c); 1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (b == valid) { 1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"[\\u0000-\\U0010FFFF].contains(" + c + 1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ") = " + b); 1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: [\\u0000-\\U0010FFFF].contains(" + c + 1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ") = " + b); 1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // For codepoint range contains, containsNone, and containsSome, 1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // invalid or empty (start > end) ranges have UNDEFINED behavior. 1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = set.contains(c, end); 1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"* [\\u0000-\\U0010FFFF].contains(" + c + 1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "," + end + ") = " + b); 1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = set.containsNone(c, end); 1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"* [\\u0000-\\U0010FFFF].containsNone(" + c + 1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "," + end + ") = " + b); 1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = set.containsSome(c, end); 1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"* [\\u0000-\\U0010FFFF].containsSome(" + c + 1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "," + end + ") = " + b); 1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t index = set.indexOf(c); 1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((index >= 0) == valid) { 1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"[\\u0000-\\U0010FFFF].indexOf(" + c + 1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ") = " + index); 1563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: [\\u0000-\\U0010FFFF].indexOf(" + c + 1565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ") = " + index); 1566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Used by TestSymbolTable 1571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass TokenSymbolTable : public SymbolTable { 1572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 1573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Hashtable contents; 1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TokenSymbolTable(UErrorCode& ec) : contents(FALSE, ec) { 1576103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius contents.setValueDeleter(uprv_deleteUObject); 1577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ~TokenSymbolTable() {} 1580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (Non-SymbolTable API) Add the given variable and value to 1583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the table. Variable should NOT contain leading '$'. 1584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void add(const UnicodeString& var, const UnicodeString& value, 1586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode& ec) { 1587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(ec)) { 1588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru contents.put(var, new UnicodeString(value), ec); 1589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * SymbolTable API 1594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual const UnicodeString* lookup(const UnicodeString& s) const { 1596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (const UnicodeString*) contents.get(s); 1597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * SymbolTable API 1601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual const UnicodeFunctor* lookupMatcher(UChar32 /*ch*/) const { 1603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * SymbolTable API 1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UnicodeString parseReference(const UnicodeString& text, 1610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ParsePosition& pos, int32_t limit) const { 1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start = pos.getIndex(); 1612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = start; 1613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString result; 1614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (i < limit) { 1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c = text.charAt(i); 1616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) { 1617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++i; 1620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i == start) { // No valid name chars 1622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; // Indicate failure with empty string 1623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos.setIndex(i); 1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru text.extractBetween(start, i, result); 1626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 1629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestSymbolTable() { 1631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Multiple test cases can be set up here. Each test case 1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // is terminated by null: 1633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // var, value, var, value,..., input pat., exp. output pat., null 1634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* DATA[] = { 1635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "us", "a-z", "[0-1$us]", "[0-1a-z]", NULL, 1636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "us", "[a-z]", "[0-1$us]", "[0-1[a-z]]", NULL, 1637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "us", "\\[a\\-z\\]", "[0-1$us]", "[-01\\[\\]az]", NULL, 1638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NULL 1639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 1640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; DATA[i]!=NULL; ++i) { 1642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TokenSymbolTable sym(ec); 1644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: couldn't construct TokenSymbolTable"); 1646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Set up variables 1650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (DATA[i+2] != NULL) { 1651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sym.add(UnicodeString(DATA[i], -1, US_INV), UnicodeString(DATA[i+1], -1, US_INV), ec); 1652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: couldn't add to TokenSymbolTable"); 1654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i += 2; 1657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Input pattern and expected output pattern 1660c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString inpat = UnicodeString(DATA[i], -1, US_INV), exppat = UnicodeString(DATA[i+1], -1, US_INV); 1661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i += 2; 1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ParsePosition pos(0); 1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet us(inpat, pos, USET_IGNORE_SPACE, &sym, ec); 1665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: couldn't construct UnicodeSet"); 1667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // results 1671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pos.getIndex() != inpat.length()) { 1672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"Failed to read to end of string \"" 1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + inpat + "\": read to " 1674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + pos.getIndex() + ", length is " 1675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + inpat.length()); 1676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet us2(exppat, ec); 1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: couldn't construct expected UnicodeSet"); 1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString a, b; 1685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (us != us2) { 1686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"Failed, got " + us.toPattern(a, TRUE) + 1687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ", expected " + us2.toPattern(b, TRUE)); 1688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Ok, got " + us.toPattern(a, TRUE)); 1690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestSurrogate() { 1695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* DATA[] = { 1696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // These should all behave identically 1697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[abc\\uD800\\uDC00]", 1698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // "[abc\uD800\uDC00]", // Can't do this on C -- only Java 1699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[abc\\U00010000]", 1700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0 1701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 1702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int i=0; DATA[i] != 0; ++i) { 1703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru logln((UnicodeString)"Test pattern " + i + " :" + UnicodeString(DATA[i], -1, US_INV)); 1705b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeString str = UnicodeString(DATA[i], -1, US_INV); 1706b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UnicodeSet set(str, ec); 1707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet constructor"); 1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectContainment(set, 1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CharsToUnicodeString("abc\\U00010000"), 1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CharsToUnicodeString("\\uD800;\\uDC00")); // split apart surrogate-pair 1714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.size() != 4) { 1715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errln((UnicodeString)"FAIL: " + UnicodeString(DATA[i], -1, US_INV) + ".size() == " + 1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.size() + ", expected 4"); 1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestExhaustive() { 1722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // exhaustive tests. Simulate UnicodeSets with integers. 1723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // That gives us very solid tests (except for large memory tests). 1724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t limit = 128; 1726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet x, y, z, aa; 1728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < limit; ++i) { 1730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(i, x); 1731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Testing " + i + ", " + x); 1732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _testComplement(i, x, y); 1733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // AS LONG AS WE ARE HERE, check roundtrip 1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkRoundTrip(bitsToSet(i, aa)); 1736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t j = 0; j < limit; ++j) { 1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _testAdd(i,j, x,y,z); 1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _testXor(i,j, x,y,z); 1740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _testRetain(i,j, x,y,z); 1741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _testRemove(i,j, x,y,z); 1742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testComplement(int32_t a, UnicodeSet& x, UnicodeSet& z) { 1747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(a, x); 1748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z = x; 1749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z.complement(); 1750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c = setToBits(z); 1751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c != (~a)) { 1752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: add: ~" + x + " != " + z); 1753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: add: ~" + a + " != " + c); 1754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkCanonicalRep(z, (UnicodeString)"complement " + a); 1756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testAdd(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) { 1759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(a, x); 1760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(b, y); 1761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z = x; 1762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z.addAll(y); 1763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c = setToBits(z); 1764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c != (a | b)) { 1765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: add: " + x + " | " + y + " != " + z); 1766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: add: " + a + " | " + b + " != " + c); 1767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkCanonicalRep(z, (UnicodeString)"add " + a + "," + b); 1769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testRetain(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) { 1772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(a, x); 1773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(b, y); 1774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z = x; 1775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z.retainAll(y); 1776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c = setToBits(z); 1777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c != (a & b)) { 1778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: retain: " + x + " & " + y + " != " + z); 1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: retain: " + a + " & " + b + " != " + c); 1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkCanonicalRep(z, (UnicodeString)"retain " + a + "," + b); 1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testRemove(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) { 1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(a, x); 1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(b, y); 1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z = x; 1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z.removeAll(y); 1789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c = setToBits(z); 1790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c != (a &~ b)) { 1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: remove: " + x + " &~ " + y + " != " + z); 1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: remove: " + a + " &~ " + b + " != " + c); 1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkCanonicalRep(z, (UnicodeString)"remove " + a + "," + b); 1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testXor(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) { 1798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(a, x); 1799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(b, y); 1800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z = x; 1801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z.complementAll(y); 1802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c = setToBits(z); 1803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c != (a ^ b)) { 1804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: complement: " + x + " ^ " + y + " != " + z); 1805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: complement: " + a + " ^ " + b + " != " + c); 1806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkCanonicalRep(z, (UnicodeString)"complement " + a + "," + b); 1808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Check that ranges are monotonically increasing and non- 1812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * overlapping. 1813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::checkCanonicalRep(const UnicodeSet& set, const UnicodeString& msg) { 1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t n = set.getRangeCount(); 1816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (n < 0) { 1817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL result of " + msg + 1818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ": range count should be >= 0 but is " + 1819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n /*+ " for " + set.toPattern())*/); 1820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 last = 0; 1823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<n; ++i) { 1824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 start = set.getRangeStart(i); 1825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 end = set.getRangeEnd(i); 1826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (start > end) { 1827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL result of " + msg + 1828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ": range " + (i+1) + 1829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " start > end: " + (int)start + ", " + (int)end + 1830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " for " + set); 1831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i > 0 && start <= last) { 1833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL result of " + msg + 1834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ": range " + (i+1) + 1835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " overlaps previous range: " + (int)start + ", " + (int)end + 1836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " for " + set); 1837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last = end; 1839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Convert a bitmask to a UnicodeSet. 1844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSetTest::bitsToSet(int32_t a, UnicodeSet& result) { 1846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.clear(); 1847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (UChar32 i = 0; i < 32; ++i) { 1848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((a & (1<<i)) != 0) { 1849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.add(i); 1850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Convert a UnicodeSet to a bitmask. Only the characters 1857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * U+0000 to U+0020 are represented in the bitmask. 1858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSetTest::setToBits(const UnicodeSet& x) { 1860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result = 0; 1861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < 32; ++i) { 1862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (x.contains((UChar32)i)) { 1863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result |= (1<<i); 1864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return the representation of an inversion list based UnicodeSet 1871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * as a pairs list. Ranges are listed in ascending Unicode order. 1872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For example, the set [a-zA-M3] is represented as "33AMaz". 1873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString UnicodeSetTest::getPairs(const UnicodeSet& set) { 1875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pairs; 1876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<set.getRangeCount(); ++i) { 1877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 start = set.getRangeStart(i); 1878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 end = set.getRangeEnd(i); 1879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (end > 0xFFFF) { 1880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru end = 0xFFFF; 1881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i = set.getRangeCount(); // Should be unnecessary 1882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pairs.append((UChar)start).append((UChar)end); 1884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return pairs; 1886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Basic consistency check for a few items. 1890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * That the iterator works, and that we can create a pattern and 1891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * get the same thing back 1892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::checkRoundTrip(const UnicodeSet& s) { 1894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet t(s); 1897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkEqual(s, t, "copy ct"); 1898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = s; 1900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkEqual(s, t, "operator="); 1901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru copyWithIterator(t, s, FALSE); 1903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkEqual(s, t, "iterator roundtrip"); 1904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru copyWithIterator(t, s, TRUE); // try range 1906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkEqual(s, t, "iterator roundtrip"); 1907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat; s.toPattern(pat, FALSE); 1909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.applyPattern(pat, ec); 1910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: applyPattern"); 1912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkEqual(s, t, "toPattern(false)"); 1915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.toPattern(pat, TRUE); 1918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.applyPattern(pat, ec); 1919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: applyPattern"); 1921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkEqual(s, t, "toPattern(true)"); 1924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::copyWithIterator(UnicodeSet& t, const UnicodeSet& s, UBool withRange) { 1928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.clear(); 1929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetIterator it(s); 1930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (withRange) { 1931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (it.nextRange()) { 1932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (it.isString()) { 1933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.add(it.getString()); 1934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.add(it.getCodepoint(), it.getCodepointEnd()); 1936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (it.next()) { 1940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (it.isString()) { 1941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.add(it.getString()); 1942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.add(it.getCodepoint()); 1944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSetTest::checkEqual(const UnicodeSet& s, const UnicodeSet& t, const char* message) { 1950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString source; s.toPattern(source, TRUE); 1951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString result; t.toPattern(result, TRUE); 1952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s != t) { 1953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: " + message 1954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + "; source = " + source 1955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + "; result = " + result 1956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ); 1957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 1958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Ok: " + message 1960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + "; source = " + source 1961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + "; result = " + result 1962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ); 1963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 1965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 1968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectContainment(const UnicodeString& pat, 1969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& charsIn, 1970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& charsOut) { 1971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set(pat, ec); 1973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 19746d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln((UnicodeString)"FAIL: pattern \"" + 1975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat + "\" => " + u_errorName(ec)); 1976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectContainment(set, pat, charsIn, charsOut); 1979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 1982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectContainment(const UnicodeSet& set, 1983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& charsIn, 1984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& charsOut) { 1985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat; 1986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.toPattern(pat); 1987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectContainment(set, pat, charsIn, charsOut); 1988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 1991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectContainment(const UnicodeSet& set, 1992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& setName, 1993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& charsIn, 1994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& charsOut) { 1995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString bad; 1996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 1997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 1998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<charsIn.length(); i+=U16_LENGTH(c)) { 2000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = charsIn.char32At(i); 2001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!set.contains(c)) { 2002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bad.append(c); 2003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (bad.length() > 0) { 2006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"Fail: set " + setName + " does not contain " + prettify(bad) + 2007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ", expected containment of " + prettify(charsIn)); 2008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Ok: set " + setName + " contains " + prettify(charsIn)); 2010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bad.truncate(0); 2013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<charsOut.length(); i+=U16_LENGTH(c)) { 2014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = charsOut.char32At(i); 2015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.contains(c)) { 2016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bad.append(c); 2017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (bad.length() > 0) { 2020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"Fail: set " + setName + " contains " + prettify(bad) + 2021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ", expected non-containment of " + prettify(charsOut)); 2022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Ok: set " + setName + " does not contain " + prettify(charsOut)); 2024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 2028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectPattern(UnicodeSet& set, 2029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& pattern, 2030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& expectedPairs){ 2031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 2032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern(pattern, status); 2033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 2034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(UnicodeString("FAIL: applyPattern(\"") + pattern + 2035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\") failed"); 2036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 2037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (getPairs(set) != expectedPairs ) { 2039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(UnicodeString("FAIL: applyPattern(\"") + pattern + 2040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\") => pairs \"" + 2041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(getPairs(set)) + "\", expected \"" + 2042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(expectedPairs) + "\""); 2043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(UnicodeString("Ok: applyPattern(\"") + pattern + 2045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\") => pairs \"" + 2046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(getPairs(set)) + "\""); 2047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the result of calling set.toPattern(), which is the string representation of 2050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // this set(set), is passed to a UnicodeSet constructor, and tested that it 2051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // will produce another set that is equal to this one. 2052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString temppattern; 2053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.toPattern(temppattern); 2054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *tempset=new UnicodeSet(temppattern, status); 2055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 2056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(UnicodeString("FAIL: applyPattern(\""+ pattern + "\").toPattern() => " + temppattern + " => invalid pattern")); 2057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 2058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(*tempset != set || getPairs(*tempset) != getPairs(set)){ 2060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(UnicodeString("FAIL: applyPattern(\""+ pattern + "\").toPattern() => " + temppattern + " => pairs \""+ escape(getPairs(*tempset)) + "\", expected pairs \"" + 2061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(getPairs(set)) + "\"")); 2062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else{ 2063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(UnicodeString("Ok: applyPattern(\""+ pattern + "\").toPattern() => " + temppattern + " => pairs \"" + escape(getPairs(*tempset)) + "\"")); 2064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete tempset; 2067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 2071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectPairs(const UnicodeSet& set, const UnicodeString& expectedPairs) { 2072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (getPairs(set) != expectedPairs) { 2073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(UnicodeString("FAIL: Expected pair list \"") + 2074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(expectedPairs) + "\", got \"" + 2075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(getPairs(set)) + "\""); 2076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::expectToPattern(const UnicodeSet& set, 2080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& expPat, 2081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char** expStrings) { 2082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat; 2083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.toPattern(pat, TRUE); 2084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pat == expPat) { 2085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Ok: toPattern() => \"" + pat + "\""); 2086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: toPattern() => \"" + pat + "\", expected \"" + expPat + "\""); 2088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 2089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expStrings == NULL) { 2091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 2092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool in = TRUE; 2094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; expStrings[i] != NULL; ++i) { 2095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expStrings[i] == NOT) { // sic; pointer comparison 2096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru in = FALSE; 2097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s = CharsToUnicodeString(expStrings[i]); 2100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool contained = set.contains(s); 2101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (contained == in) { 2102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Ok: " + expPat + 2103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (contained ? " contains {" : " does not contain {") + 2104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(expStrings[i]) + "}"); 2105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: " + expPat + 2107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (contained ? " contains {" : " does not contain {") + 2108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(expStrings[i]) + "}"); 2109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar toHexString(int32_t i) { return (UChar)(i + (i < 10 ? 0x30 : (0x41 - 10))); } 2114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 2116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::doAssert(UBool condition, const char *message) 2117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 2118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!condition) { 2119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(UnicodeString("ERROR : ") + message); 2120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString 2124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::escape(const UnicodeString& s) { 2125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString buf; 2126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<s.length(); ) 2127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 2128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c = s.char32At(i); 2129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (0x0020 <= c && c <= 0x007F) { 2130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += c; 2131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c <= 0xFFFF) { 2133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += (UChar)0x5c; buf += (UChar)0x75; 2134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += (UChar)0x5c; buf += (UChar)0x55; 2136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += toHexString((c & 0xF0000000) >> 28); 2137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += toHexString((c & 0x0F000000) >> 24); 2138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += toHexString((c & 0x00F00000) >> 20); 2139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += toHexString((c & 0x000F0000) >> 16); 2140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += toHexString((c & 0xF000) >> 12); 2142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += toHexString((c & 0x0F00) >> 8); 2143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += toHexString((c & 0x00F0) >> 4); 2144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += toHexString(c & 0x000F); 2145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i += U16_LENGTH(c); 2147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return buf; 2149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestFreezable() { 2152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 2153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString idPattern=UNICODE_STRING("[:ID_Continue:]", 15); 2154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet idSet(idPattern, errorCode); 2155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 21566d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("FAIL: unable to create UnicodeSet([:ID_Continue:]) - %s", u_errorName(errorCode)); 2157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 2158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString wsPattern=UNICODE_STRING("[:White_Space:]", 15); 2161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet wsSet(wsPattern, errorCode); 2162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 21636d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("FAIL: unable to create UnicodeSet([:White_Space:]) - %s", u_errorName(errorCode)); 2164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 2165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru idSet.add(idPattern); 2168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet frozen(idSet); 2169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen.freeze(); 2170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(idSet.isFrozen() || !frozen.isFrozen()) { 2172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: isFrozen() is wrong"); 2173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: a copy-constructed frozen set differs from its original"); 2176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen=wsSet; 2179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: a frozen set was modified by operator="); 2181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet frozen2(frozen); 2184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen2!=frozen || frozen2!=idSet) { 2185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: a copied frozen set differs from its frozen original"); 2186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!frozen2.isFrozen()) { 2188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: copy-constructing a frozen set results in a thawed one"); 2189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet frozen3(5, 55); // Set to some values to really test assignment below, not copy construction. 2191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen3.contains(0, 4) || !frozen3.contains(5, 55) || frozen3.contains(56, 0x10ffff)) { 2192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet(5, 55) failed"); 2193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen3=frozen; 2195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!frozen3.isFrozen()) { 2196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: copying a frozen set results in a thawed one"); 2197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *cloned=(UnicodeSet *)frozen.clone(); 2200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!cloned->isFrozen() || *cloned!=frozen || cloned->containsSome(0xd802, 0xd805)) { 2201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: clone() failed"); 2202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cloned->add(0xd802, 0xd805); 2204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(cloned->containsSome(0xd802, 0xd805)) { 2205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: unable to modify clone"); 2206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete cloned; 2208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *thawed=(UnicodeSet *)frozen.cloneAsThawed(); 2210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(thawed->isFrozen() || *thawed!=frozen || thawed->containsSome(0xd802, 0xd805)) { 2211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: cloneAsThawed() failed"); 2212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru thawed->add(0xd802, 0xd805); 2214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!thawed->contains(0xd802, 0xd805)) { 2215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: unable to modify thawed clone"); 2216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete thawed; 2218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen.set(5, 55); 2220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::set() modified a frozen set"); 2222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen.clear(); 2225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::clear() modified a frozen set"); 2227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen.closeOver(USET_CASE_INSENSITIVE); 2230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::closeOver() modified a frozen set"); 2232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen.compact(); 2235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::compact() modified a frozen set"); 2237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ParsePosition pos; 2240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen. 2241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru applyPattern(wsPattern, errorCode). 2242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru applyPattern(wsPattern, USET_IGNORE_SPACE, NULL, errorCode). 2243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru applyPattern(wsPattern, pos, USET_IGNORE_SPACE, NULL, errorCode). 2244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru applyIntPropertyValue(UCHAR_CANONICAL_COMBINING_CLASS, 230, errorCode). 2245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru applyPropertyAlias(UNICODE_STRING_SIMPLE("Assigned"), UnicodeString(), errorCode); 2246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::applyXYZ() modified a frozen set"); 2248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen. 2251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru add(0xd800). 2252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru add(0xd802, 0xd805). 2253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru add(wsPattern). 2254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addAll(idPattern). 2255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addAll(wsSet); 2256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::addXYZ() modified a frozen set"); 2258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen. 2261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retain(0x62). 2262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retain(0x64, 0x69). 2263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retainAll(wsPattern). 2264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retainAll(wsSet); 2265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::retainXYZ() modified a frozen set"); 2267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen. 2270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru remove(0x62). 2271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru remove(0x64, 0x69). 2272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru remove(idPattern). 2273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru removeAll(idPattern). 2274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru removeAll(idSet); 2275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::removeXYZ() modified a frozen set"); 2277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen. 2280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complement(). 2281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complement(0x62). 2282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complement(0x64, 0x69). 2283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complement(idPattern). 2284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complementAll(idPattern). 2285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complementAll(idSet); 2286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::complementXYZ() modified a frozen set"); 2288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test span() etc. -------------------------------------------------------- *** 2292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Append the UTF-8 version of the string to t and return the appended UTF-8 length. 2294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 2295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruappendUTF8(const UChar *s, int32_t length, char *t, int32_t capacity) { 2296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 2297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length8=0; 2298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_strToUTF8(t, capacity, &length8, s, length, &errorCode); 2299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 2300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return length8; 2301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The string contains an unpaired surrogate. 2303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Ignore this string. 2304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass UnicodeSetWithStringsIterator; 2309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Make the strings in a UnicodeSet easily accessible. 2311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass UnicodeSetWithStrings { 2312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 2313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStrings(const UnicodeSet &normalSet) : 2314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set(normalSet), stringsLength(0), hasSurrogates(FALSE) { 2315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t size=set.size(); 2316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(size>0 && set.charAt(size-1)<0) { 2317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If a set's last element is not a code point, then it must contain strings. 2318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Iterate over the set, skip all code point ranges, and cache the strings. 2319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Convert them to UTF-8 for spanUTF8(). 2320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetIterator iter(set); 2321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *s; 2322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *s8=utf8; 2323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length8, utf8Count=0; 2324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(iter.nextRange() && stringsLength<LENGTHOF(strings)) { 2325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(iter.isString()) { 2326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Store the pointer to the set's string element 2327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // which we happen to know is a stable pointer. 2328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strings[stringsLength]=s=&iter.getString(); 2329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utf8Count+= 2330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utf8Lengths[stringsLength]=length8= 2331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru appendUTF8(s->getBuffer(), s->length(), 2332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s8, (int32_t)(sizeof(utf8)-utf8Count)); 2333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length8==0) { 2334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hasSurrogates=TRUE; // Contains unpaired surrogates. 2335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s8+=length8; 2337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++stringsLength; 2338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet &getSet() const { 2344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return set; 2345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool hasStrings() const { 2348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UBool)(stringsLength>0); 2349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool hasStringsWithSurrogates() const { 2352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return hasSurrogates; 2353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 2356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru friend class UnicodeSetWithStringsIterator; 2357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet &set; 2359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *strings[20]; 2361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t stringsLength; 2362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool hasSurrogates; 2363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char utf8[1024]; 2365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t utf8Lengths[20]; 2366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t nextStringIndex; 2368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t nextUTF8Start; 2369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 2370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass UnicodeSetWithStringsIterator { 2372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 2373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator(const UnicodeSetWithStrings &set) : 2374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSet(set), nextStringIndex(0), nextUTF8Start(0) { 2375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void reset() { 2378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextStringIndex=nextUTF8Start=0; 2379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *nextString() { 2382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(nextStringIndex<fSet.stringsLength) { 2383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return fSet.strings[nextStringIndex++]; 2384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 2386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Do not mix with calls to nextString(). 2390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *nextUTF8(int32_t &length) { 2391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(nextStringIndex<fSet.stringsLength) { 2392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *s8=fSet.utf8+nextUTF8Start; 2393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextUTF8Start+=length=fSet.utf8Lengths[nextStringIndex++]; 2394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return s8; 2395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=0; 2397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 2398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 2402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSetWithStrings &fSet; 2403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t nextStringIndex; 2404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t nextUTF8Start; 2405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 2406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Compare 16-bit Unicode strings (which may be malformed UTF-16) 2408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// at code point boundaries. 2409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// That is, each edge of a match must not be in the middle of a surrogate pair. 2410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UBool 2411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querumatches16CPB(const UChar *s, int32_t start, int32_t limit, const UnicodeString &t) { 2412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s+=start; 2413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit-=start; 2414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length=t.length(); 2415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0==t.compare(s, length) && 2416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru !(0<start && U16_IS_LEAD(s[-1]) && U16_IS_TRAIL(s[0])) && 2417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru !(length<limit && U16_IS_LEAD(s[length-1]) && U16_IS_TRAIL(s[length])); 2418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Implement span() with contains() for comparison. 2421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t containsSpanUTF16(const UnicodeSetWithStrings &set, const UChar *s, int32_t length, 2422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USetSpanCondition spanCondition) { 2423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet &realSet(set.getSet()); 2424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!set.hasStrings()) { 2425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 2426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. 2427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start=0, prev; 2431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((prev=start)<length) { 2432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(s, start, length, c); 2433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(realSet.contains(c)!=spanCondition) { 2434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(spanCondition==USET_SPAN_NOT_CONTAINED) { 2439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator iter(set); 2440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start, next; 2442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(start=next=0; start<length;) { 2443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(s, next, length, c); 2444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(realSet.contains(c)) { 2445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *str; 2448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.reset(); 2449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((str=iter.nextString())!=NULL) { 2450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(str->length()<=(length-start) && matches16CPB(s, start, length, *str)) { 2451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanNeedsStrings=TRUE; 2452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return start; 2453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=next; 2456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return start; 2458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ { 2459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator iter(set); 2460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start, next, maxSpanLimit=0; 2462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(start=next=0; start<length;) { 2463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(s, next, length, c); 2464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!realSet.contains(c)) { 2465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru next=start; // Do not span this single, not-contained code point. 2466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *str; 2468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.reset(); 2469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((str=iter.nextString())!=NULL) { 2470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(str->length()<=(length-start) && matches16CPB(s, start, length, *str)) { 2471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanNeedsStrings=TRUE; 2472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t matchLimit=start+str->length(); 2473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchLimit==length) { 2474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return length; 2475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition==USET_SPAN_CONTAINED) { 2477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Iterate for the shortest match at each position. 2478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Recurse for each but the shortest match. 2479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(next==start) { 2480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru next=matchLimit; // First match from start. 2481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchLimit<next) { 2483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remember shortest match from start for iteration. 2484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t temp=next; 2485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru next=matchLimit; 2486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matchLimit=temp; 2487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Recurse for non-shortest match from start. 2489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t spanLength=containsSpanUTF16(set, s+matchLimit, length-matchLimit, 2490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USET_SPAN_CONTAINED); 2491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((matchLimit+spanLength)>maxSpanLimit) { 2492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxSpanLimit=matchLimit+spanLength; 2493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(maxSpanLimit==length) { 2494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return length; 2495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* spanCondition==USET_SPAN_SIMPLE */ { 2499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchLimit>next) { 2500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remember longest match from start. 2501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru next=matchLimit; 2502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(next==start) { 2507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; // No match from start. 2508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=next; 2510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(start>maxSpanLimit) { 2512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return start; 2513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return maxSpanLimit; 2515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t containsSpanBackUTF16(const UnicodeSetWithStrings &set, const UChar *s, int32_t length, 2520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USetSpanCondition spanCondition) { 2521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==0) { 2522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet &realSet(set.getSet()); 2525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!set.hasStrings()) { 2526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 2527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. 2528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=length; 2532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_PREV(s, 0, length, c); 2534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(realSet.contains(c)!=spanCondition) { 2535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=length)>0); 2538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(spanCondition==USET_SPAN_NOT_CONTAINED) { 2540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator iter(set); 2541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=length, length0=length; 2543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_PREV(s, 0, length, c); 2545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(realSet.contains(c)) { 2546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *str; 2549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.reset(); 2550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((str=iter.nextString())!=NULL) { 2551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(str->length()<=prev && matches16CPB(s, prev-str->length(), length0, *str)) { 2552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanNeedsStrings=TRUE; 2553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=length)>0); 2557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ { 2559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator iter(set); 2560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=length, minSpanStart=length, length0=length; 2562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_PREV(s, 0, length, c); 2564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!realSet.contains(c)) { 2565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=prev; // Do not span this single, not-contained code point. 2566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *str; 2568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.reset(); 2569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((str=iter.nextString())!=NULL) { 2570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(str->length()<=prev && matches16CPB(s, prev-str->length(), length0, *str)) { 2571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanNeedsStrings=TRUE; 2572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t matchStart=prev-str->length(); 2573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchStart==0) { 2574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition==USET_SPAN_CONTAINED) { 2577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Iterate for the shortest match at each position. 2578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Recurse for each but the shortest match. 2579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==prev) { 2580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=matchStart; // First match from prev. 2581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchStart>length) { 2583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remember shortest match from prev for iteration. 2584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t temp=length; 2585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=matchStart; 2586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matchStart=temp; 2587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Recurse for non-shortest match from prev. 2589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t spanStart=containsSpanBackUTF16(set, s, matchStart, 2590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USET_SPAN_CONTAINED); 2591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanStart<minSpanStart) { 2592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru minSpanStart=spanStart; 2593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(minSpanStart==0) { 2594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* spanCondition==USET_SPAN_SIMPLE */ { 2599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchStart<length) { 2600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remember longest match from prev. 2601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=matchStart; 2602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==prev) { 2607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; // No match from prev. 2608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=length)>0); 2610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(prev<minSpanStart) { 2611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return minSpanStart; 2614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t containsSpanUTF8(const UnicodeSetWithStrings &set, const char *s, int32_t length, 2619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USetSpanCondition spanCondition) { 2620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet &realSet(set.getSet()); 2621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!set.hasStrings()) { 2622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 2623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. 2624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start=0, prev; 2628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((prev=start)<length) { 26298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius U8_NEXT_OR_FFFD(s, start, length, c); 2630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(realSet.contains(c)!=spanCondition) { 2631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(spanCondition==USET_SPAN_NOT_CONTAINED) { 2636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator iter(set); 2637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start, next; 2639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(start=next=0; start<length;) { 26408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius U8_NEXT_OR_FFFD(s, next, length, c); 2641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(realSet.contains(c)) { 2642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *s8; 2645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length8; 2646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.reset(); 2647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((s8=iter.nextUTF8(length8))!=NULL) { 2648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length8!=0 && length8<=(length-start) && 0==memcmp(s+start, s8, length8)) { 2649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanNeedsStrings=TRUE; 2650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return start; 2651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=next; 2654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return start; 2656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ { 2657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator iter(set); 2658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start, next, maxSpanLimit=0; 2660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(start=next=0; start<length;) { 26618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius U8_NEXT_OR_FFFD(s, next, length, c); 2662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!realSet.contains(c)) { 2663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru next=start; // Do not span this single, not-contained code point. 2664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *s8; 2666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length8; 2667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.reset(); 2668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((s8=iter.nextUTF8(length8))!=NULL) { 2669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length8!=0 && length8<=(length-start) && 0==memcmp(s+start, s8, length8)) { 2670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanNeedsStrings=TRUE; 2671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t matchLimit=start+length8; 2672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchLimit==length) { 2673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return length; 2674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition==USET_SPAN_CONTAINED) { 2676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Iterate for the shortest match at each position. 2677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Recurse for each but the shortest match. 2678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(next==start) { 2679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru next=matchLimit; // First match from start. 2680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchLimit<next) { 2682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remember shortest match from start for iteration. 2683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t temp=next; 2684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru next=matchLimit; 2685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matchLimit=temp; 2686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Recurse for non-shortest match from start. 2688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t spanLength=containsSpanUTF8(set, s+matchLimit, length-matchLimit, 2689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USET_SPAN_CONTAINED); 2690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((matchLimit+spanLength)>maxSpanLimit) { 2691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxSpanLimit=matchLimit+spanLength; 2692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(maxSpanLimit==length) { 2693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return length; 2694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* spanCondition==USET_SPAN_SIMPLE */ { 2698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchLimit>next) { 2699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remember longest match from start. 2700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru next=matchLimit; 2701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(next==start) { 2706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; // No match from start. 2707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=next; 2709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(start>maxSpanLimit) { 2711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return start; 2712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return maxSpanLimit; 2714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t containsSpanBackUTF8(const UnicodeSetWithStrings &set, const char *s, int32_t length, 2719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USetSpanCondition spanCondition) { 2720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==0) { 2721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet &realSet(set.getSet()); 2724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!set.hasStrings()) { 2725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 2726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. 2727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=length; 2731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 27328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius U8_PREV_OR_FFFD(s, 0, length, c); 2733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(realSet.contains(c)!=spanCondition) { 2734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=length)>0); 2737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(spanCondition==USET_SPAN_NOT_CONTAINED) { 2739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator iter(set); 2740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=length; 2742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 27438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius U8_PREV_OR_FFFD(s, 0, length, c); 2744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(realSet.contains(c)) { 2745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *s8; 2748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length8; 2749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.reset(); 2750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((s8=iter.nextUTF8(length8))!=NULL) { 2751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length8!=0 && length8<=prev && 0==memcmp(s+prev-length8, s8, length8)) { 2752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanNeedsStrings=TRUE; 2753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=length)>0); 2757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ { 2759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator iter(set); 2760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=length, minSpanStart=length; 2762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 27638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius U8_PREV_OR_FFFD(s, 0, length, c); 2764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!realSet.contains(c)) { 2765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=prev; // Do not span this single, not-contained code point. 2766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *s8; 2768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length8; 2769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.reset(); 2770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((s8=iter.nextUTF8(length8))!=NULL) { 2771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length8!=0 && length8<=prev && 0==memcmp(s+prev-length8, s8, length8)) { 2772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanNeedsStrings=TRUE; 2773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t matchStart=prev-length8; 2774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchStart==0) { 2775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition==USET_SPAN_CONTAINED) { 2778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Iterate for the shortest match at each position. 2779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Recurse for each but the shortest match. 2780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==prev) { 2781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=matchStart; // First match from prev. 2782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchStart>length) { 2784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remember shortest match from prev for iteration. 2785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t temp=length; 2786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=matchStart; 2787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matchStart=temp; 2788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Recurse for non-shortest match from prev. 2790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t spanStart=containsSpanBackUTF8(set, s, matchStart, 2791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USET_SPAN_CONTAINED); 2792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanStart<minSpanStart) { 2793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru minSpanStart=spanStart; 2794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(minSpanStart==0) { 2795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* spanCondition==USET_SPAN_SIMPLE */ { 2800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchStart<length) { 2801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remember longest match from prev. 2802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=matchStart; 2803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==prev) { 2808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; // No match from prev. 2809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=length)>0); 2811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(prev<minSpanStart) { 2812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return minSpanStart; 2815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// spans to be performed and compared 2820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum { 2821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_UTF16 =1, 2822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_UTF8 =2, 2823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_UTFS =3, 2824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_SET =4, 2826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_COMPLEMENT =8, 2827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_POLARITY =0xc, 2828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_FWD =0x10, 2830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_BACK =0x20, 2831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_DIRS =0x30, 2832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_CONTAINED =0x100, 2834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_SIMPLE =0x200, 2835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_CONDITION =0x300, 2836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_ALL =0x33f 2838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 2839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline USetSpanCondition invertSpanCondition(USetSpanCondition spanCondition, USetSpanCondition contained) { 2841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return spanCondition == USET_SPAN_NOT_CONTAINED ? contained : USET_SPAN_NOT_CONTAINED; 2842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline int32_t slen(const void *s, UBool isUTF16) { 2845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return isUTF16 ? u_strlen((const UChar *)s) : strlen((const char *)s); 2846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Count spans on a string with the method according to type and set the span limits. 2850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set may be the complement of the original. 2851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * When using spanBack() and comparing with span(), use a span condition for the first spanBack() 2852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * according to the expected number of spans. 2853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Sets typeName to an empty string if there is no such type. 2854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns -1 if the span option is filtered out. 2855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t getSpans(const UnicodeSetWithStrings &set, UBool isComplement, 2857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *s, int32_t length, UBool isUTF16, 2858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t whichSpans, 2859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int type, const char *&typeName, 2860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t limits[], int32_t limitsCapacity, 2861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expectCount) { 2862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet &realSet(set.getSet()); 2863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start, count; 2864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USetSpanCondition spanCondition, firstSpanCondition, contained; 2865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isForward; 2866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(type<0 || 7<type) { 2868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru typeName=""; 2869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char *const typeNames16[]={ 2873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "contains", "contains(LM)", 2874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "span", "span(LM)", 2875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "containsBack", "containsBack(LM)", 2876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "spanBack", "spanBack(LM)" 2877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 2878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char *const typeNames8[]={ 2880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "containsUTF8", "containsUTF8(LM)", 2881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "spanUTF8", "spanUTF8(LM)", 2882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "containsBackUTF8", "containsBackUTF8(LM)", // not implemented 2883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "spanBackUTF8", "spanBackUTF8(LM)" 2884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 2885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru typeName= isUTF16 ? typeNames16[type] : typeNames8[type]; 2887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // filter span options 2889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(type<=3) { 2890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // span forward 2891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_FWD)==0) { 2892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 2893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isForward=TRUE; 2895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // span backward 2897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_BACK)==0) { 2898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 2899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isForward=FALSE; 2901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((type&1)==0) { 2903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // use USET_SPAN_CONTAINED 2904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_CONTAINED)==0) { 2905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 2906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru contained=USET_SPAN_CONTAINED; 2908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // use USET_SPAN_SIMPLE 2910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_SIMPLE)==0) { 2911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 2912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru contained=USET_SPAN_SIMPLE; 2914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Default first span condition for going forward with an uncomplemented set. 2917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_NOT_CONTAINED; 2918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isComplement) { 2919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=invertSpanCondition(spanCondition, contained); 2920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // First span condition for span(), used to terminate the spanBack() iteration. 2923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru firstSpanCondition=spanCondition; 2924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanBack(): Its initial span condition is span()'s last span condition, 2926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // which is the opposite of span()'s first span condition 2927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if we expect an even number of spans. 2928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (The loop inverts spanCondition (expectCount-1) times 2929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // before the expectCount'th span() call.) 2930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we do not compare forward and backward directions, then we do not have an 2931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // expectCount and just start with firstSpanCondition. 2932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!isForward && (whichSpans&SPAN_FWD)!=0 && (expectCount&1)==0) { 2933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=invertSpanCondition(spanCondition, contained); 2934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=0; 2937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(type) { 2938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0: 2939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: 2940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=0; 2941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 2942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=slen(s, isUTF16); 2943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 2945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start+= isUTF16 ? containsSpanUTF16(set, (const UChar *)s+start, length-start, spanCondition) : 2946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru containsSpanUTF8(set, (const char *)s+start, length-start, spanCondition); 2947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count<limitsCapacity) { 2948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limits[count]=start; 2949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++count; 2951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(start>=length) { 2952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=invertSpanCondition(spanCondition, contained); 2955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: 2958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: 2959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=0; 2960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 2961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start+= isUTF16 ? realSet.span((const UChar *)s+start, length>=0 ? length-start : length, spanCondition) : 2962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru realSet.spanUTF8((const char *)s+start, length>=0 ? length-start : length, spanCondition); 2963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count<limitsCapacity) { 2964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limits[count]=start; 2965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++count; 2967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>=0 ? start>=length : 2968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isUTF16 ? ((const UChar *)s)[start]==0 : 2969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((const char *)s)[start]==0 2970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 2971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=invertSpanCondition(spanCondition, contained); 2974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 4: 2977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 5: 2978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 2979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=slen(s, isUTF16); 2980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 2982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++count; 2983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count<=limitsCapacity) { 2984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limits[limitsCapacity-count]=length; 2985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length= isUTF16 ? containsSpanBackUTF16(set, (const UChar *)s, length, spanCondition) : 2987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru containsSpanBackUTF8(set, (const char *)s, length, spanCondition); 2988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==0 && spanCondition==firstSpanCondition) { 2989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=invertSpanCondition(spanCondition, contained); 2992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count<limitsCapacity) { 2994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru memmove(limits, limits+(limitsCapacity-count), count*4); 2995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 6: 2998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 7: 2999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 3000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++count; 3001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count<=limitsCapacity) { 3002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limits[limitsCapacity-count]= length >=0 ? length : slen(s, isUTF16); 3003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Note: Length<0 is tested only for the first spanBack(). 3005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we wanted to keep length<0 for all spanBack()s, we would have to 3006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // temporarily modify the string by placing a NUL where the previous spanBack() stopped. 3007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length= isUTF16 ? realSet.spanBack((const UChar *)s, length, spanCondition) : 3008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru realSet.spanBackUTF8((const char *)s, length, spanCondition); 3009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==0 && spanCondition==firstSpanCondition) { 3010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=invertSpanCondition(spanCondition, contained); 3013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count<limitsCapacity) { 3015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru memmove(limits, limits+(limitsCapacity-count), count*4); 3016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 3019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru typeName=""; 3020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 3021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return count; 3024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// sets to be tested; odd index=isComplement 3027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum { 3028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SLOW, 3029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SLOW_NOT, 3030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FAST, 3031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FAST_NOT, 3032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SET_COUNT 3033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 3034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char *const setNames[SET_COUNT]={ 3036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "slow", 3037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "slow.not", 3038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "fast", 3039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "fast.not" 3040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 3041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 3043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Verify that we get the same results whether we look at text with contains(), 3044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * span() or spanBack(), using unfrozen or frozen versions of the set, 3045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and using the set or its complement (switching the spanConditions accordingly). 3046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The latter verifies that 3047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * set.span(spanCondition) == set.complement().span(!spanCondition). 3048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 3049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The expectLimits[] are either provided by the caller (with expectCount>=0) 3050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * or returned to the caller (with an input expectCount<0). 3051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 3052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpan(const UnicodeSetWithStrings *sets[4], 3053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *s, int32_t length, UBool isUTF16, 3054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t whichSpans, 3055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expectLimits[], int32_t &expectCount, 3056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *testName, int32_t index) { 3057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t limits[500]; 3058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t limitsCount; 3059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i, j; 3060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *typeName; 3062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int type; 3063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<SET_COUNT; ++i) { 3065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((i&1)==0) { 3066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Even-numbered sets are original, uncomplemented sets. 3067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_SET)==0) { 3068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Odd-numbered sets are complemented. 3072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_COMPLEMENT)==0) { 3073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(type=0;; ++type) { 3077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limitsCount=getSpans(*sets[i], (UBool)(i&1), 3078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s, length, isUTF16, 3079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans, 3080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru type, typeName, 3081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limits, LENGTHOF(limits), expectCount); 3082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(typeName[0]==0) { 3083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; // All types tried. 3084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(limitsCount<0) { 3086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; // Span option filtered out. 3087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(expectCount<0) { 3089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectCount=limitsCount; 3090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(limitsCount>LENGTHOF(limits)) { 3091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: %s[0x%lx].%s.%s span count=%ld > %ld capacity - too many spans", 3092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)LENGTHOF(limits)); 3093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru memcpy(expectLimits, limits, limitsCount*4); 3096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(limitsCount!=expectCount) { 3097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: %s[0x%lx].%s.%s span count=%ld != %ld", 3098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)expectCount); 3099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0; j<limitsCount; ++j) { 3101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(limits[j]!=expectLimits[j]) { 3102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: %s[0x%lx].%s.%s span count=%ld limits[%d]=%ld != %ld", 3103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testName, (long)index, setNames[i], typeName, (long)limitsCount, 3104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru j, (long)limits[j], (long)expectLimits[j]); 3105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Compare span() with containsAll()/containsNone(), 3113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // but only if we have expectLimits[] from the uncomplemented set. 3114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isUTF16 && (whichSpans&SPAN_SET)!=0) { 3115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s16=(const UChar *)s; 3116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString string; 3117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=0, limit, length; 3118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<expectCount; ++i) { 3119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit=expectLimits[i]; 3120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=limit-prev; 3121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>0) { 3122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru string.setTo(FALSE, s16+prev, length); // read-only alias 3123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i&1) { 3124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!sets[SLOW]->getSet().containsAll(string)) { 3125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: %s[0x%lx].%s.containsAll(%ld..%ld)==FALSE contradicts span()", 3126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testName, (long)index, setNames[SLOW], (long)prev, (long)limit); 3127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!sets[FAST]->getSet().containsAll(string)) { 3130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: %s[0x%lx].%s.containsAll(%ld..%ld)==FALSE contradicts span()", 3131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testName, (long)index, setNames[FAST], (long)prev, (long)limit); 3132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!sets[SLOW]->getSet().containsNone(string)) { 3136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: %s[0x%lx].%s.containsNone(%ld..%ld)==FALSE contradicts span()", 3137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testName, (long)index, setNames[SLOW], (long)prev, (long)limit); 3138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!sets[FAST]->getSet().containsNone(string)) { 3141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: %s[0x%lx].%s.containsNone(%ld..%ld)==FALSE contradicts span()", 3142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testName, (long)index, setNames[FAST], (long)prev, (long)limit); 3143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev=limit; 3148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Specifically test either UTF-16 or UTF-8. 3153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpan(const UnicodeSetWithStrings *sets[4], 3154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *s, int32_t length, UBool isUTF16, 3155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t whichSpans, 3156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *testName, int32_t index) { 3157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expectLimits[500]; 3158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expectCount=-1; 3159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpan(sets, s, length, isUTF16, whichSpans, expectLimits, expectCount, testName, index); 3160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool stringContainsUnpairedSurrogate(const UChar *s, int32_t length) { 3163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c, c2; 3164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>=0) { 3166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(length>0) { 3167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=*s++; 3168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --length; 3169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(0xd800<=c && c<0xe000) { 3170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c>=0xdc00 || length==0 || !U16_IS_TRAIL(c2=*s++)) { 3171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 3172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --length; 3174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((c=*s++)!=0) { 3178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(0xd800<=c && c<0xe000) { 3179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c>=0xdc00 || !U16_IS_TRAIL(c2=*s++)) { 3180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 3181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 3186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test both UTF-16 and UTF-8 versions of span() etc. on the same sets and text, 3189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// unless either UTF is turned off in whichSpans. 3190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Testing UTF-16 and UTF-8 together requires that surrogate code points 3191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// have the same contains(c) value as U+FFFD. 3192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpanBothUTFs(const UnicodeSetWithStrings *sets[4], 3193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s16, int32_t length16, 3194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t whichSpans, 3195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *testName, int32_t index) { 3196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expectLimits[500]; 3197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expectCount; 3198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectCount=-1; // Get expectLimits[] from testSpan(). 3200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_UTF16)!=0) { 3202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpan(sets, s16, length16, TRUE, whichSpans, expectLimits, expectCount, testName, index); 3203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_UTF8)==0) { 3205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Convert s16[] and expectLimits[] to UTF-8. 3209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t s8[3000]; 3210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t offsets[3000]; 3211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s16Limit=s16+length16; 3213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *t=(char *)s8; 3214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *tLimit=t+sizeof(s8); 3215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *o=offsets; 3216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 3217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Convert with substitution: Turn unpaired surrogates into U+FFFD. 3219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_fromUnicode(openUTF8Converter(), &t, tLimit, &s16, s16Limit, o, TRUE, &errorCode); 3220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 3221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: %s[0x%lx] ucnv_fromUnicode(to UTF-8) fails with %s", 3222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testName, (long)index, u_errorName(errorCode)); 3223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_resetFromUnicode(utf8Cnv); 3224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length8=(int32_t)(t-(char *)s8); 3227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Convert expectLimits[]. 3229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i, j, expect; 3230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=j=0; i<expectCount; ++i) { 3231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expect=expectLimits[i]; 3232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(expect==length16) { 3233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectLimits[i]=length8; 3234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(offsets[j]<expect) { 3236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++j; 3237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectLimits[i]=j; 3239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpan(sets, s8, length8, FALSE, whichSpans, expectLimits, expectCount, testName, index); 3243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32 nextCodePoint(UChar32 c) { 3246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Skip some large and boring ranges. 3247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(c) { 3248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0x3441: 3249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0x4d7f; 3250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0x5100: 3251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0x9f00; 3252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0xb040: 3253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0xd780; 3254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0xe041: 3255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0xf8fe; 3256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0x10100: 3257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0x20000; 3258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0x20041: 3259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0xe0000; 3260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0xe0101: 3261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0x10fffd; 3262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 3263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c+1; 3264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Verify that all implementations represent the same set. 3268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpanContents(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) { 3269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // contains(U+FFFD) is inconsistent with contains(some surrogates), 3270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // or the set contains strings with unpaired surrogates which don't translate to valid UTF-8: 3271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Skip the UTF-8 part of the test - if the string contains surrogates - 3272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // because it is likely to produce a different result. 3273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool inconsistentSurrogates= 3274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (!(sets[0]->getSet().contains(0xfffd) ? 3275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets[0]->getSet().contains(0xd800, 0xdfff) : 3276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets[0]->getSet().containsNone(0xd800, 0xdfff)) || 3277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets[0]->hasStringsWithSurrogates()); 3278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar s[1000]; 3280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length=0; 3281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t localWhichSpans; 3282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c, first; 3284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(first=c=0;; c=nextCodePoint(c)) { 3285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c>0x10ffff || length>(LENGTHOF(s)-U16_MAX_LENGTH)) { 3286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localWhichSpans=whichSpans; 3287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(stringContainsUnpairedSurrogate(s, length) && inconsistentSurrogates) { 3288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localWhichSpans&=~SPAN_UTF8; 3289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpanBothUTFs(sets, s, length, localWhichSpans, testName, first); 3291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c>0x10ffff) { 3292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=0; 3295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru first=c; 3296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_APPEND_UNSAFE(s, length, c); 3298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test with a particular, interesting string. 3302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Specify length and try NUL-termination. 3303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpanUTF16String(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) { 3304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar s[]={ 3305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x61, 0x62, 0x20, // Latin, space 3306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x3b1, 0x3b2, 0x3b3, // Greek 3307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xd900, // lead surrogate 3308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x3000, 0x30ab, 0x30ad, // wide space, Katakana 3309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xdc05, // trail surrogate 3310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xa0, 0xac00, 0xd7a3, // nbsp, Hangul 3311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xd900, 0xdc05, // unassigned supplementary 3312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xd840, 0xdfff, 0xd860, 0xdffe, // Han supplementary 3313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xd7a4, 0xdc05, 0xd900, 0x2028, // unassigned, surrogates in wrong order, LS 3314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0 // NUL 3315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 3316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_UTF16)==0) { 3318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpan(sets, s, -1, TRUE, (whichSpans&~SPAN_UTF8), testName, 0); 3321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpan(sets, s, LENGTHOF(s)-1, TRUE, (whichSpans&~SPAN_UTF8), testName, 1); 3322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpanUTF8String(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) { 3325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char s[]={ 3326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abc" // Latin 3327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trail byte in lead position */ 3329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\x80" 3330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " " // space 3332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* truncated multi-byte sequences */ 3334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xd0" 3335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xe0" 3336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xe1" 3337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xed" 3338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xee" 3339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf0" 3340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf1" 3341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf4" 3342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf8" 3343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xfc" 3344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xCE\xB1\xCE\xB2\xCE\xB3" // Greek 3346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trail byte in lead position */ 3348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\x80" 3349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xe0\x80" 3351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xe0\xa0" 3352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xe1\x80" 3353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xed\x80" 3354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xed\xa0" 3355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xee\x80" 3356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf0\x80" 3357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf0\x90" 3358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf1\x80" 3359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf4\x80" 3360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf4\x90" 3361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf8\x80" 3362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xfc\x80" 3363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xE3\x80\x80\xE3\x82\xAB\xE3\x82\xAD" // wide space, Katakana 3365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trail byte in lead position */ 3367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\x80" 3368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf0\x80\x80" 3370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf0\x90\x80" 3371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf1\x80\x80" 3372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf4\x80\x80" 3373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf4\x90\x80" 3374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf8\x80\x80" 3375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xfc\x80\x80" 3376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xC2\xA0\xEA\xB0\x80\xED\x9E\xA3" // nbsp, Hangul 3378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trail byte in lead position */ 3380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\x80" 3381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf8\x80\x80\x80" 3383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xfc\x80\x80\x80" 3384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xF1\x90\x80\x85" // unassigned supplementary 3386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trail byte in lead position */ 3388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\x80" 3389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xfc\x80\x80\x80\x80" 3391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xF0\xA0\x8F\xBF\xF0\xA8\x8F\xBE" // Han supplementary 3393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trail byte in lead position */ 3395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\x80" 3396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* complete sequences but non-shortest forms or out of range etc. */ 3398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xc0\x80" 3399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xe0\x80\x80" 3400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xed\xa0\x80" 3401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf0\x80\x80\x80" 3402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf4\x90\x80\x80" 3403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf8\x80\x80\x80\x80" 3404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xfc\x80\x80\x80\x80\x80" 3405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xfe" 3406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xff" 3407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trail byte in lead position */ 3409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\x80" 3410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xED\x9E\xA4\xE2\x80\xA8" // unassigned, LS, NUL-terminated 3412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 3413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_UTF8)==0) { 3415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpan(sets, s, -1, FALSE, (whichSpans&~SPAN_UTF16), testName, 0); 3418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpan(sets, s, LENGTHOF(s)-1, FALSE, (whichSpans&~SPAN_UTF16), testName, 1); 3419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Take a set of span options and multiply them so that 3422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// each portion only has one of the options a, b and c. 3423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// If b==0, then the set of options is just modified with mask and a. 3424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// If b!=0 and c==0, then the set of options is just modified with mask, a and b. 3425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 3426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruaddAlternative(uint32_t whichSpans[], int32_t whichSpansCount, 3427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t mask, uint32_t a, uint32_t b, uint32_t c) { 3428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t s; 3429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 3430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<whichSpansCount; ++i) { 3432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s=whichSpans[i]&mask; 3433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans[i]=s|a; 3434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b!=0) { 3435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans[whichSpansCount+i]=s|b; 3436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c!=0) { 3437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans[2*whichSpansCount+i]=s|c; 3438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return b==0 ? whichSpansCount : c==0 ? 2*whichSpansCount : 3*whichSpansCount; 3442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _63_a "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _64_a "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _63_b "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" 3447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _64_b "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" 3448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestSpan() { 3450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // "[...]" is a UnicodeSet pattern. 3451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // "*" performs tests on all Unicode code points and on a selection of 3452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // malformed UTF-8/16 strings. 3453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // "-options" limits the scope of testing for the current set. 3454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // By default, the test verifies that equivalent boundaries are found 3455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for UTF-16 and UTF-8, going forward and backward, 3456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // alternating USET_SPAN_NOT_CONTAINED with 3457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // either USET_SPAN_CONTAINED or USET_SPAN_SIMPLE. 3458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Single-character options: 3459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 8 -- UTF-16 and UTF-8 boundaries may differ. 3460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Cause: contains(U+FFFD) is inconsistent with contains(some surrogates), 3461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // or the set contains strings with unpaired surrogates 3462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // which do not translate to valid UTF-8. 3463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // c -- set.span() and set.complement().span() boundaries may differ. 3464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Cause: Set strings are not complemented. 3465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // b -- span() and spanBack() boundaries may differ. 3466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Cause: Strings in the set overlap, and spanBack(USET_SPAN_CONTAINED) 3467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and spanBack(USET_SPAN_SIMPLE) are defined to 3468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // match with non-overlapping substrings. 3469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // For example, with a set containing "ab" and "ba", 3470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // span() of "aba" yields boundaries { 0, 2, 3 } 3471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // because the initial "ab" matches from 0 to 2, 3472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // while spanBack() yields boundaries { 0, 1, 3 } 3473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // because the final "ba" matches from 1 to 3. 3474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // l -- USET_SPAN_CONTAINED and USET_SPAN_SIMPLE boundaries may differ. 3475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Cause: Strings in the set overlap, and a longer match may 3476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // require a sequence including non-longest substrings. 3477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // For example, with a set containing "ab", "abc" and "cd", 3478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // span(contained) of "abcd" spans the entire string 3479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // but span(longest match) only spans the first 3 characters. 3480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Each "-options" first resets all options and then applies the specified options. 3481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // A "-" without options resets the options. 3482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The options are also reset for each new set. 3483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Other strings will be spanned. 3484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char *const testdata[]={ 3485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:ID_Continue:]", 3486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "*", 3487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:White_Space:]", 3488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "*", 3489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[]", 3490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "*", 3491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u0000-\\U0010FFFF]", 3492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "*", 3493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u0000\\u0080\\u0800\\U00010000]", 3494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "*", 3495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u007F\\u07FF\\uFFFF\\U0010FFFF]", 3496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "*", 3497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[[[:ID_Continue:]-[\\u30ab\\u30ad]]{\\u3000\\u30ab}{\\u3000\\u30ab\\u30ad}]", 3498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-c", 3499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "*", 3500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[[[:ID_Continue:]-[\\u30ab\\u30ad]]{\\u30ab\\u30ad}{\\u3000\\u30ab\\u30ad}]", 3501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-c", 3502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "*", 3503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Overlapping strings cause overlapping attempts to match. 3505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[x{xy}{xya}{axy}{ax}]", 3506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-cl", 3507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // More repetitions of "xya" would take too long with the recursive 3509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // reference implementation. 3510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // containsAll()=FALSE 3511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test_string 0x14 3512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" 3513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxya" // set.complement().span(longest match) will stop here. 3514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" // set.complement().span(contained) will stop between the two 'x'es. 3515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxya" 3516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" 3517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxya" // span() ends here. 3518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "aaa", 3519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // containsAll()=TRUE 3521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test_string 0x15 3522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" 3523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxya" 3524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" 3525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxya" 3526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" 3527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxy", 3528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-bc", 3530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test_string 0x17 3531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "byayaxya", // span() -> { 4, 7, 8 } spanBack() -> { 5, 8 } 3532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-c", 3533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "byayaxy", // span() -> { 4, 7 } complement.span() -> { 7 } 3534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "byayax", // span() -> { 4, 6 } complement.span() -> { 6 } 3535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-", 3536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "byaya", // span() -> { 5 } 3537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "byay", // span() -> { 4 } 3538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "bya", // span() -> { 3 } 3539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // span(longest match) will not span the whole string. 3541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[a{ab}{bc}]", 3542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-cl", 3543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test_string 0x21 3544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abc", 3545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[a{ab}{abc}{cd}]", 3547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-cl", 3548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "acdabcdabccd", 3549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanBack(longest match) will not span the whole string. 3551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[c{ab}{bc}]", 3552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-cl", 3553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abc", 3554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[d{cd}{bcd}{ab}]", 3556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-cl", 3557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abbcdabcdabd", 3558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test with non-ASCII set strings - test proper handling of surrogate pairs 3560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and UTF-8 trail bytes. 3561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Copies of above test sets and strings, but transliterated to have 3562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // different code points with similar trail units. 3563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Previous: a b c d 3564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Unicode: 042B 30AB 200AB 204AB 3565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // UTF-16: 042B 30AB D840 DCAB D841 DCAB 3566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // UTF-8: D0 AB E3 82 AB F0 A0 82 AB F0 A0 92 AB 3567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u042B{\\u042B\\u30AB}{\\u042B\\u30AB\\U000200AB}{\\U000200AB\\U000204AB}]", 3568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-cl", 3569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u042B\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000200AB\\U000204AB", 3570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\U000204AB{\\U000200AB\\U000204AB}{\\u30AB\\U000200AB\\U000204AB}{\\u042B\\u30AB}]", 3572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-cl", 3573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u042B\\u30AB\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000204AB", 3574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Stress bookkeeping and recursion. 3576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The following strings are barely doable with the recursive 3577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // reference implementation. 3578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The not-contained character at the end prevents an early exit from the span(). 3579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[b{bb}]", 3580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-c", 3581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test_string 0x33 3582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "bbbbbbbbbbbbbbbbbbbbbbbb-", 3583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // On complement sets, span() and spanBack() get different results 3584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // because b is not in the complement set and there is an odd number of b's 3585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // in the test string. 3586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-bc", 3587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "bbbbbbbbbbbbbbbbbbbbbbbbb-", 3588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test with set strings with an initial or final code point span 3590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // longer than 254. 3591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[a{" _64_a _64_a _64_a _64_a "b}" 3592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "{a" _64_b _64_b _64_b _64_b "}]", 3593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-c", 3594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _64_a _64_a _64_a _63_a "b", 3595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _64_a _64_a _64_a _64_a "b", 3596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _64_a _64_a _64_a _64_a "aaaabbbb", 3597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "a" _64_b _64_b _64_b _63_b, 3598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "a" _64_b _64_b _64_b _64_b, 3599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "aaaabbbb" _64_b _64_b _64_b _64_b, 3600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test with strings containing unpaired surrogates. 3602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // They are not representable in UTF-8, and a leading trail surrogate 3603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and a trailing lead surrogate must not match in the middle of a proper surrogate pair. 3604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // U+20001 == \\uD840\\uDC01 3605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // U+20400 == \\uD841\\uDC00 3606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[a\\U00020001\\U00020400{ab}{b\\uD840}{\\uDC00a}]", 3607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-8cl", 3608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "aaab\\U00020001ba\\U00020400aba\\uD840ab\\uD840\\U00020000b\\U00020000a\\U00020000\\uDC00a\\uDC00babbb" 3609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 3610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t whichSpans[96]={ SPAN_ALL }; 3611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t whichSpansCount=1; 3612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *sets[SET_COUNT]={ NULL }; 3614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSetWithStrings *sets_with_str[SET_COUNT]={ NULL }; 3615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char testName[1024]; 3617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *testNameLimit=testName; 3618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i, j; 3620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<LENGTHOF(testdata); ++i) { 3621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *s=testdata[i]; 3622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s[0]=='[') { 3623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Create new test sets from this pattern. 3624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0; j<SET_COUNT; ++j) { 3625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete sets_with_str[j]; 3626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete sets[j]; 3627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 3629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets[SLOW]=new UnicodeSet(UnicodeString(s, -1, US_INV).unescape(), errorCode); 3630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 36316d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("FAIL: Unable to create UnicodeSet(%s) - %s", s, u_errorName(errorCode)); 3632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets[SLOW_NOT]=new UnicodeSet(*sets[SLOW]); 3635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets[SLOW_NOT]->complement(); 3636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Intermediate set: Test cloning of a frozen set. 3637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fast=new UnicodeSet(*sets[SLOW]); 3638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fast->freeze(); 3639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets[FAST]=(UnicodeSet *)fast->clone(); 3640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fast; 3641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fastNot=new UnicodeSet(*sets[SLOW_NOT]); 3642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fastNot->freeze(); 3643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets[FAST_NOT]=(UnicodeSet *)fastNot->clone(); 3644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fastNot; 3645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0; j<SET_COUNT; ++j) { 3647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets_with_str[j]=new UnicodeSetWithStrings(*sets[j]); 3648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(testName, s); 3651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testNameLimit=strchr(testName, 0); 3652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *testNameLimit++=':'; 3653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *testNameLimit=0; 3654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans[0]=SPAN_ALL; 3656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpansCount=1; 3657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(s[0]=='-') { 3658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans[0]=SPAN_ALL; 3659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpansCount=1; 3660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(*++s!=0) { 3662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(*s) { 3663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 'c': 3664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpansCount=addAlternative(whichSpans, whichSpansCount, 3665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ~SPAN_POLARITY, 3666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_SET, 3667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_COMPLEMENT, 3668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0); 3669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 'b': 3671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpansCount=addAlternative(whichSpans, whichSpansCount, 3672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ~SPAN_DIRS, 3673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_FWD, 3674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_BACK, 3675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0); 3676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 'l': 3678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test USET_SPAN_CONTAINED FWD & BACK, and separately 3679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // USET_SPAN_SIMPLE only FWD, and separately 3680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // USET_SPAN_SIMPLE only BACK 3681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpansCount=addAlternative(whichSpans, whichSpansCount, 3682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ~(SPAN_DIRS|SPAN_CONDITION), 3683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_DIRS|SPAN_CONTAINED, 3684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_FWD|SPAN_SIMPLE, 3685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_BACK|SPAN_SIMPLE); 3686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case '8': 3688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpansCount=addAlternative(whichSpans, whichSpansCount, 3689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ~SPAN_UTFS, 3690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_UTF16, 3691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_UTF8, 3692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0); 3693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 3695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: unrecognized span set option in \"%s\"", testdata[i]); 3696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(0==strcmp(s, "*")) { 3700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(testNameLimit, "bad_string"); 3701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0; j<whichSpansCount; ++j) { 3702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(whichSpansCount>1) { 3703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sprintf(testNameLimit+10 /* strlen("bad_string") */, 3704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "%%0x%3x", 3705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans[j]); 3706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpanUTF16String(sets_with_str, whichSpans[j], testName); 3708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpanUTF8String(sets_with_str, whichSpans[j], testName); 3709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(testNameLimit, "contents"); 3712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0; j<whichSpansCount; ++j) { 3713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(whichSpansCount>1) { 3714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sprintf(testNameLimit+8 /* strlen("contents") */, 3715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "%%0x%3x", 3716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans[j]); 3717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpanContents(sets_with_str, whichSpans[j], testName); 3719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString string=UnicodeString(s, -1, US_INV).unescape(); 3722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(testNameLimit, "test_string"); 3723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0; j<whichSpansCount; ++j) { 3724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(whichSpansCount>1) { 3725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sprintf(testNameLimit+11 /* strlen("test_string") */, 3726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "%%0x%3x", 3727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans[j]); 3728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpanBothUTFs(sets_with_str, string.getBuffer(), string.length(), whichSpans[j], testName, i); 3730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0; j<SET_COUNT; ++j) { 3734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete sets_with_str[j]; 3735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete sets[j]; 3736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test select patterns and strings, and test USET_SPAN_SIMPLE. 3740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestStringSpan() { 3741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char *pattern="[x{xy}{xya}{axy}{ax}]"; 3742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char *const string= 3743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" 3744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxya" 3745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" 3746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxya" 3747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" 3748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxy" 3749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "aaaa"; 3750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 3752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pattern16=UnicodeString(pattern, -1, US_INV); 3753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set(pattern16, errorCode); 3754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 3755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode)); 3756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString string16=UnicodeString(string, -1, US_INV).unescape(); 3760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(set.containsAll(string16)) { 3762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet(%s).containsAll(%s) should be FALSE", pattern, string); 3763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remove trailing "aaaa". 3766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru string16.truncate(string16.length()-4); 3767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!set.containsAll(string16)) { 3768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet(%s).containsAll(%s[:-4]) should be TRUE", pattern, string); 3769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru string16=UNICODE_STRING_SIMPLE("byayaxya"); 3772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s16=string16.getBuffer(); 3773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length16=string16.length(); 3774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( set.span(s16, 8, USET_SPAN_NOT_CONTAINED)!=4 || 3775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.span(s16, 7, USET_SPAN_NOT_CONTAINED)!=4 || 3776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.span(s16, 6, USET_SPAN_NOT_CONTAINED)!=4 || 3777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.span(s16, 5, USET_SPAN_NOT_CONTAINED)!=5 || 3778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.span(s16, 4, USET_SPAN_NOT_CONTAINED)!=4 || 3779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.span(s16, 3, USET_SPAN_NOT_CONTAINED)!=3 3780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 3781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet(%s).span(while not) returns the wrong value", pattern); 3782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pattern="[a{ab}{abc}{cd}]"; 3785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pattern16=UnicodeString(pattern, -1, US_INV); 3786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern(pattern16, errorCode); 3787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 3788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode)); 3789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru string16=UNICODE_STRING_SIMPLE("acdabcdabccd"); 3792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s16=string16.getBuffer(); 3793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length16=string16.length(); 3794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( set.span(s16, 12, USET_SPAN_CONTAINED)!=12 || 3795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.span(s16, 12, USET_SPAN_SIMPLE)!=6 || 3796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.span(s16+7, 5, USET_SPAN_SIMPLE)!=5 3797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 3798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet(%s).span(while longest match) returns the wrong value", pattern); 3799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pattern="[d{cd}{bcd}{ab}]"; 3802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pattern16=UnicodeString(pattern, -1, US_INV); 3803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern(pattern16, errorCode).freeze(); 3804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 3805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode)); 3806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru string16=UNICODE_STRING_SIMPLE("abbcdabcdabd"); 3809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s16=string16.getBuffer(); 3810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length16=string16.length(); 3811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( set.spanBack(s16, 12, USET_SPAN_CONTAINED)!=0 || 3812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.spanBack(s16, 12, USET_SPAN_SIMPLE)!=6 || 3813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.spanBack(s16, 5, USET_SPAN_SIMPLE)!=0 3814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 3815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet(%s).spanBack(while longest match) returns the wrong value", pattern); 3816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3818