usettest.cpp revision c69afcec261fc345fda8daf46f0ea6b4351dc777
1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************** 3c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru* Copyright (C) 1999-2008 International Business Machines Corporation and 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************** 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Date Name Description 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 10/20/99 alan Creation. 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 03/22/2000 Madhu Added additional tests 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************** 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h> 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h> 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "usettest.h" 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h" 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h" 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/usetiter.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/parsepos.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/symtable.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uversion.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "hash.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("fail in file \"%s\", line %d: \"%s\"", __FILE__, __LINE__, \ 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_errorName(status));}} 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define TEST_ASSERT(expr) {if (!(expr)) { \ 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("fail in file \"%s\", line %d", __FILE__, __LINE__); }} 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) { 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat; 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.toPattern(pat); 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return left + UnicodeSetTest::escape(pat); 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define CASE(id,test) case id: \ 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = #test; \ 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) { \ 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(#test "---"); \ 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(); \ 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru test(); \ 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } \ 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::UnicodeSetTest() : utf8Cnv(NULL) { 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUConverter *UnicodeSetTest::openUTF8Converter() { 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(utf8Cnv==NULL) { 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utf8Cnv=ucnv_open("UTF-8", &errorCode); 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return utf8Cnv; 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::~UnicodeSetTest() { 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_close(utf8Cnv); 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::runIndexedTest(int32_t index, UBool exec, 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* &name, char* /*par*/) { 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if (exec) logln((UnicodeString)"TestSuite UnicodeSetTest"); 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (index) { 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(0,TestPatterns); 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(1,TestAddRemove); 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(2,TestCategories); 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(3,TestCloneEqualHash); 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(4,TestMinimalRep); 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(5,TestAPI); 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(6,TestScriptSet); 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(7,TestPropertySet); 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(8,TestClone); 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(9,TestExhaustive); 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(10,TestToPattern); 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(11,TestIndexOf); 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(12,TestStrings); 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(13,Testj2268); 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(14,TestCloseOver); 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(15,TestEscapePattern); 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(16,TestInvalidCodePoint); 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(17,TestSymbolTable); 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(18,TestSurrogate); 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(19,TestPosixClasses); 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(20,TestIteration); 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(21,TestFreezable); 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(22,TestSpan); 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(23,TestStringSpan); 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: name = ""; break; 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char NOT[] = "%%%%"; 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * UVector was improperly copying contents 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This code will crash this is still true 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::Testj2268() { 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet t; 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.add(UnicodeString("abc")); 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet test(t); 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString ustrPat; 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru test.toPattern(ustrPat, TRUE); 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test toPattern(). 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestToPattern() { 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test that toPattern() round trips with syntax characters and 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // whitespace. 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char* OTHER_TOPATTERN_TESTS[] = { 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[[:latin:]&[:greek:]]", 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[[:latin:]-[:greek:]]", 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:nonspacing mark:]", 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NULL 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t j=0; OTHER_TOPATTERN_TESTS[j]!=NULL; ++j) { 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec = U_ZERO_ERROR; 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s(OTHER_TOPATTERN_TESTS[j], ec); 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: bad pattern " + OTHER_TOPATTERN_TESTS[j]); 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkPat(OTHER_TOPATTERN_TESTS[j], s); 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (UChar32 i = 0; i <= 0x10FFFF; ++i) { 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((i <= 0xFF && !u_isalpha(i)) || u_isspace(i)) { 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // check various combinations to make sure they all work. 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i != 0 && !toPatternAux(i, i)){ 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!toPatternAux(0, i)){ 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!toPatternAux(i, 0xFFFF)){ 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test pattern behavior of multicharacter strings. 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec = U_ZERO_ERROR; 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet* s = new UnicodeSet("[a-z {aa} {ab}]", ec); 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This loop isn't a loop. It's here to make the compiler happy. 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If you're curious, try removing it and changing the 'break' 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // statements (except for the last) to goto's. 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) break; 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* exp1[] = {"aa", "ab", NOT, "ac", NULL}; 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectToPattern(*s, "[a-z{aa}{ab}]", exp1); 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s->add("ac"); 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* exp2[] = {"aa", "ab", "ac", NOT, "xy", NULL}; 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectToPattern(*s, "[a-z{aa}{ab}{ac}]", exp2); 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\{l} {r\\}}]"), ec); 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) break; 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* exp3[] = {"{l", "r}", NOT, "xy", NULL}; 176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{r\\}}{\\{l}]"), exp3); 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s->add("[]"); 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* exp4[] = {"{l", "r}", "[]", NOT, "xy", NULL}; 180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\[\\]}{r\\}}{\\{l}]"), exp4); 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\u4E01\\u4E02}{\\n\\r}]"), ec); 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) break; 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* exp5[] = {"\\u4E01\\u4E02", "\n\r", NULL}; 185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]"), exp5); 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // j2189 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s->clear(); 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s->add(UnicodeString("abc", "")); 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s->add(UnicodeString("abc", "")); 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* exp6[] = {"abc", NOT, "ab", NULL}; 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectToPattern(*s, "[{abc}]", exp6); 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) errln("FAIL: pattern parse error"); 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete s; 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // JB#3400: For 2 character ranges prefer [ab] to [a-b] 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s; 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.add((UChar)97, (UChar)98); // 'a', 'b' 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectToPattern(s, "[ab]", NULL); 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSetTest::toPatternAux(UChar32 start, UChar32 end) { 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // use Integer.toString because Utility.hex doesn't handle ints 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat = ""; 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO do these in hex 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //String source = "0x" + Integer.toString(start,16).toUpperCase(); 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if (start != end) source += "..0x" + Integer.toString(end,16).toUpperCase(); 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString source; 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source = source + (uint32_t)start; 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (start != end) 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source = source + ".." + (uint32_t)end; 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet testSet; 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSet.add(start, end); 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return checkPat(source, testSet); 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSetTest::checkPat(const UnicodeString& source, 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet& testSet) { 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // What we want to make sure of is that a pattern generated 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // by toPattern(), with or without escaped unprintables, can 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // be passed back into the UnicodeSet constructor. 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat0; 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSet.toPattern(pat0, TRUE); 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!checkPat(source + " (escaped)", testSet, pat0)) return FALSE; 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //String pat1 = unescapeLeniently(pat0); 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if (!checkPat(source + " (in code)", testSet, pat1)) return false; 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat2; 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSet.toPattern(pat2, FALSE); 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!checkPat(source, testSet, pat2)) return FALSE; 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //String pat3 = unescapeLeniently(pat2); 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if (!checkPat(source + " (in code)", testSet, pat3)) return false; 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //logln(source + " => " + pat0 + ", " + pat1 + ", " + pat2 + ", " + pat3); 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)source + " => " + pat0 + ", " + pat2); 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSetTest::checkPat(const UnicodeString& source, 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet& testSet, 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& pat) { 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet testSet2(pat, ec); 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (testSet2 != testSet) { 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"Fail toPattern: " + source + " => " + pat); 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::TestPatterns(void) { 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set; 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, UnicodeString("[[a-m]&[d-z]&[k-y]]", ""), "km"); 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, UnicodeString("[[a-z]-[m-y]-[d-r]]", ""), "aczz"); 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, UnicodeString("[a\\-z]", ""), "--aazz"); 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, UnicodeString("[-az]", ""), "--aazz"); 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, UnicodeString("[az-]", ""), "--aazz"); 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, UnicodeString("[[[a-z]-[aeiou]i]]", ""), "bdfnptvz"); 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Throw in a test of complement 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.complement(); 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString exp; 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.append((UChar)0x0000).append("aeeoouu").append((UChar)(0x007a+1)).append((UChar)0xFFFF); 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, exp); 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::TestCategories(void) { 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* pat = " [:Lu:] "; // Whitespace ok outside [:..:] 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set(pat, status); 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"Fail: Can't construct set with " + pat); 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectContainment(set, pat, "ABC", "abc"); 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 i; 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t failures = 0; 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Make sure generation of L doesn't pollute cached Lu set 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // First generate L, then Lu 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern("[:L:]", status); 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<0x200; ++i) { 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool l = u_isalpha((UChar)i); 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (l != set.contains(i)) { 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: L contains " + (unsigned short)i + " = " + 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.contains(i)); 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (++failures == 10) break; 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern("[:Lu:]", status); 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<0x200; ++i) { 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool lu = (u_charType((UChar)i) == U_UPPERCASE_LETTER); 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lu != set.contains(i)) { 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: Lu contains " + (unsigned short)i + " = " + 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.contains(i)); 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (++failures == 20) break; 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::TestCloneEqualHash(void) { 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // set1 and set2 used to be built with the obsolete constructor taking 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // UCharCategory values; replaced with pattern constructors 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // markus 20030502 321c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *set1=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Lowercase Letter}"), status); // :Ll: Letter, lowercase 322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *set1a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Ll:]"), status); // Letter, lowercase 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)){ 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: Can't construst set with category->Ll"); 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *set2=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Decimal Number}"), status); //Number, Decimal digit 328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet *set2a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Nd:]"), status); //Number, Decimal digit 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)){ 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: Can't construct set with category->Nd"); 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*set1 != *set1a) { 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: category constructor for Ll broken"); 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*set2 != *set2a) { 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: category constructor for Nd broken"); 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete set1a; 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete set2a; 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Testing copy construction"); 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *set1copy=new UnicodeSet(*set1); 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(*set1 != *set1copy || *set1 == *set2 || 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru getPairs(*set1) != getPairs(*set1copy) || 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set1->hashCode() != set1copy->hashCode()){ 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL : Error in copy construction"); 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Testing =operator"); 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set1equal=*set1; 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set2equal=*set2; 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(set1equal != *set1 || set1equal != *set1copy || set2equal != *set2 || 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set2equal == *set1 || set2equal == *set1copy || set2equal == set1equal){ 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: Error in =operator"); 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Testing clone()"); 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *set1clone=(UnicodeSet*)set1->clone(); 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *set2clone=(UnicodeSet*)set2->clone(); 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(*set1clone != *set1 || *set1clone != *set1copy || *set1clone != set1equal || 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *set2clone != *set2 || *set2clone == *set1copy || *set2clone != set2equal || 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *set2clone == *set1 || *set2clone == set1equal || *set2clone == *set1clone){ 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: Error in clone"); 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Testing hashcode"); 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(set1->hashCode() != set1equal.hashCode() || set1->hashCode() != set1clone->hashCode() || 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set2->hashCode() != set2equal.hashCode() || set2->hashCode() != set2clone->hashCode() || 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set1copy->hashCode() != set1equal.hashCode() || set1copy->hashCode() != set1clone->hashCode() || 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set1->hashCode() == set2->hashCode() || set1copy->hashCode() == set2->hashCode() || 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set2->hashCode() == set1clone->hashCode() || set2->hashCode() == set1equal.hashCode() ){ 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: Error in hashCode()"); 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete set1; 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete set1copy; 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete set2; 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete set1clone; 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete set2clone; 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::TestAddRemove(void) { 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set; // Construct empty set 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.isEmpty() == TRUE, "set should be empty"); 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 0, "size should be 0"); 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.complement(); 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 0x110000, "size should be 0x110000"); 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0061, 0x007a); 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "az"); 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.isEmpty() == FALSE, "set should not be empty"); 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() != 0, "size should not be equal to 0"); 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 26, "size should be equal to 26"); 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.remove(0x006d, 0x0070); 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "alqz"); 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 22, "size should be equal to 22"); 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.remove(0x0065, 0x0067); 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "adhlqz"); 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 19, "size should be equal to 19"); 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.remove(0x0064, 0x0069); 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "acjlqz"); 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 16, "size should be equal to 16"); 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.remove(0x0063, 0x0072); 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "absz"); 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 10, "size should be equal to 10"); 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0066, 0x0071); 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "abfqsz"); 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 22, "size should be equal to 22"); 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.remove(0x0061, 0x0067); 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "hqsz"); 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.remove(0x0061, 0x007a); 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, ""); 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.isEmpty() == TRUE, "set should be empty"); 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 0, "size should be 0"); 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0061); 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.isEmpty() == FALSE, "set should not be empty"); 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 1, "size should not be equal to 1"); 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0062); 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0063); 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "ac"); 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 3, "size should not be equal to 3"); 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0070); 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0071); 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "acpq"); 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 5, "size should not be equal to 5"); 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, ""); 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.isEmpty() == TRUE, "set should be empty"); 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == 0, "size should be 0"); 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Try removing an entire set from another set 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, "[c-x]", "cx"); 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set2; 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set2, "[f-ky-za-bc[vw]]", "acfkvwyz"); 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.removeAll(set2); 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "deluxx"); 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Try adding an entire set to another set 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, "[jackiemclean]", "aacceein"); 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set2, "[hitoshinamekatajamesanderson]", "aadehkmort"); 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(set2); 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "aacehort"); 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.containsAll(set2) == TRUE, "set should contain all the elements in set2"); 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Try retaining an set of elements contained in another set (intersection) 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set3; 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set3, "[a-c]", "ac"); 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.containsAll(set3) == FALSE, "set doesn't contain all the elements in set3"); 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set3.remove(0x0062); 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set3, "aacc"); 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.containsAll(set3) == TRUE, "set should contain all the elements in set3"); 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.retainAll(set3); 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "aacc"); 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() == set3.size(), "set.size() should be set3.size()"); 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.containsAll(set3) == TRUE, "set should contain all the elements in set3"); 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.size() != set3.size(), "set.size() != set3.size()"); 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test commutativity 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set, "[hitoshinamekatajamesanderson]", "aadehkmort"); 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPattern(set2, "[jackiemclean]", "aacceein"); 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(set2); 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "aacehort"); 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru doAssert(set.containsAll(set2) == TRUE, "set should contain all the elements in set2"); 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Make sure minimal representation is maintained. 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestMinimalRep() { 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This is pretty thoroughly tested by checkCanonicalRep() 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // run against the exhaustive operation results. Use the code 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // here for debugging specific spot problems. 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1 overlap against 2 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set("[h-km-q]", status); 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set2("[i-o]", status); 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(set2); 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "hq"); 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // right 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern("[a-m]", status); 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set2.applyPattern("[e-o]", status); 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(set2); 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "ao"); 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // left 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern("[e-o]", status); 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set2.applyPattern("[a-m]", status); 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(set2); 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "ao"); 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1 overlap against 3 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern("[a-eg-mo-w]", status); 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set2.applyPattern("[d-q]", status); 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(set2); 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectPairs(set, "aw"); 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestAPI() { 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // default ct 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set; 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!set.isEmpty() || set.getRangeCount() != 0) { 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, set should be empty but isn't: " + 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set); 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // clear(), isEmpty() 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0061); 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.isEmpty()) { 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, set shouldn't be empty but is: " + 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set); 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!set.isEmpty()) { 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, set should be empty but isn't: " + 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set); 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // size() 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.size() != 0) { 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, size should be 0, but is " + set.size() + 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ": " + set); 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0061); 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.size() != 1) { 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, size should be 1, but is " + set.size() + 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ": " + set); 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(0x0031, 0x0039); 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.size() != 10) { 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, size should be 10, but is " + set.size() + 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ": " + set); 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // contains(first, last) 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern("[A-Y 1-8 b-d l-y]", status); 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i<set.getRangeCount(); ++i) { 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 a = set.getRangeStart(i); 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 b = set.getRangeEnd(i); 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!set.contains(a, b)) { 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, should contain " + (unsigned short)a + '-' + (unsigned short)b + 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " but doesn't: " + set); 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.contains((UChar32)(a-1), b)) { 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, shouldn't contain " + 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (unsigned short)(a-1) + '-' + (unsigned short)b + 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " but does: " + set); 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.contains(a, (UChar32)(b+1))) { 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL, shouldn't contain " + 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (unsigned short)a + '-' + (unsigned short)(b+1) + 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " but does: " + set); 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Ported InversionList test. 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet a((UChar32)3,(UChar32)10); 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet b((UChar32)7,(UChar32)15); 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet c; 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"a [3-10]: " + a); 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"b [7-15]: " + b); 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = a; 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c.addAll(b); 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet exp((UChar32)3,(UChar32)15); 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == exp) { 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"c.set(a).add(b): " + c); 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: c.set(a).add(b) = " + c + ", expect " + exp); 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c.complement(); 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.set((UChar32)0, (UChar32)2); 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.add((UChar32)16, UnicodeSet::MAX_VALUE); 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == exp) { 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"c.complement(): " + c); 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: c.complement() = " + c + ", expect " + exp); 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c.complement(); 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.set((UChar32)3, (UChar32)15); 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == exp) { 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"c.complement(): " + c); 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: c.complement() = " + c + ", expect " + exp); 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = a; 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c.complementAll(b); 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.set((UChar32)3,(UChar32)6); 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.add((UChar32)11,(UChar32) 15); 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == exp) { 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"c.set(a).exclusiveOr(b): " + c); 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: c.set(a).exclusiveOr(b) = " + c + ", expect " + exp); 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp = c; 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(setToBits(c), c); 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == exp) { 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"bitsToSet(setToBits(c)): " + c); 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: bitsToSet(setToBits(c)) = " + c + ", expect " + exp); 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Additional tests for coverage JB#2118 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::complement(class UnicodeString const &) 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::complementAll(class UnicodeString const &) 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::containsNone(class UnicodeSet const &) 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::containsNone(long,long) 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::containsSome(class UnicodeSet const &) 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::containsSome(long,long) 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::removeAll(class UnicodeString const &) 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::retain(long) 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::retainAll(class UnicodeString const &) 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSet::serialize(unsigned short *,long,enum UErrorCode &) 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UnicodeSetIterator::getString(void) 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.complement("ab"); 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.applyPattern("[{ab}]", status); 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set != exp) { errln("FAIL: complement(\"ab\")"); return; } 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetIterator iset(set); 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!iset.next() || !iset.isString()) { 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSetIterator::next/isString"); 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (iset.getString() != "ab") { 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSetIterator::getString"); 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add((UChar32)0x61, (UChar32)0x7A); 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.complementAll("alan"); 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.applyPattern("[{ab}b-kmo-z]", status); 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set != exp) { errln("FAIL: complementAll(\"alan\")"); return; } 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.applyPattern("[a-z]", status); 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); } 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); } 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.applyPattern("[aln]", status); 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); } 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); } 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.containsNone((UChar32)0x61, (UChar32)0x7A)) { 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: containsNone(UChar32, UChar32)"); 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!set.containsSome((UChar32)0x61, (UChar32)0x7A)) { 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: containsSome(UChar32, UChar32)"); 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!set.containsNone((UChar32)0x41, (UChar32)0x5A)) { 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: containsNone(UChar32, UChar32)"); 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.containsSome((UChar32)0x41, (UChar32)0x5A)) { 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: containsSome(UChar32, UChar32)"); 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.removeAll("liu"); 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.applyPattern("[{ab}b-hj-kmo-tv-z]", status); 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set != exp) { errln("FAIL: removeAll(\"liu\")"); return; } 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.retainAll("star"); 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.applyPattern("[rst]", status); 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set != exp) { errln("FAIL: retainAll(\"star\")"); return; } 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.retain((UChar32)0x73); 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exp.applyPattern("[s]", status); 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL"); return; } 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set != exp) { errln("FAIL: retain('s')"); return; } 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t buf[32]; 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t slen = set.serialize(buf, sizeof(buf)/sizeof(buf[0]), status); 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { errln("FAIL: serialize"); return; } 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (slen != 3 || buf[0] != 2 || buf[1] != 0x73 || buf[2] != 0x74) { 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: serialize"); 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestIteration() { 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i = 0; 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int outerLoop; 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 6 code points, 3 ranges, 2 strings, 8 total elements 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Iteration will access them in sorted order - a, b, c, y, z, U0001abcd, "str1", "str2" 708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet set(UNICODE_STRING_SIMPLE("[zabyc\\U0001abcd{str1}{str2}]"), ec); 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(ec); 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetIterator it(set); 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (outerLoop=0; outerLoop<3; outerLoop++) { 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Run the test multiple times, to check that iterator.reset() is working. 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<10; i++) { 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool nextv = it.next(); 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isString = it.isString(); 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t codePoint = it.getCodepoint(); 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //int32_t codePointEnd = it.getCodepointEnd(); 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s = it.getString(); 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (i) { 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0: 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == TRUE); 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(isString == FALSE); 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(codePoint==0x61); 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s == "a"); 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == TRUE); 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(isString == FALSE); 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(codePoint==0x62); 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s == "b"); 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == TRUE); 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(isString == FALSE); 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(codePoint==0x63); 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s == "c"); 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == TRUE); 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(isString == FALSE); 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(codePoint==0x79); 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s == "y"); 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 4: 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == TRUE); 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(isString == FALSE); 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(codePoint==0x7a); 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s == "z"); 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 5: 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == TRUE); 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(isString == FALSE); 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(codePoint==0x1abcd); 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s == UnicodeString((UChar32)0x1abcd)); 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 6: 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == TRUE); 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(isString == TRUE); 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s == "str1"); 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 7: 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == TRUE); 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(isString == TRUE); 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s == "str2"); 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 8: 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == FALSE); 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 9: 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(nextv == FALSE); 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru it.reset(); // prepare to run the iteration again. 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestStrings() { 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet* testList[] = { 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet::createFromAll("abc"), 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru new UnicodeSet("[a-c]", ec), 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &(UnicodeSet::createFrom("ch")->add('a','z').add("ll")), 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru new UnicodeSet("[{ll}{ch}a-z]", ec), 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet::createFrom("ab}c"), 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru new UnicodeSet("[{ab\\}c}]", ec), 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &((new UnicodeSet('a','z'))->add('A', 'Z').retain('M','m').complement('X')), 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]", ec), 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NULL 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: couldn't construct test sets"); 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; testList[i] != NULL; i+=2) { 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(ec)) { 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat0, pat1; 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testList[i]->toPattern(pat0, TRUE); 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testList[i+1]->toPattern(pat1, TRUE); 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*testList[i] == *testList[i+1]) { 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Ok: " + pat0 + " == " + pat1); 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"FAIL: " + pat0 + " != " + pat1); 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete testList[i]; 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete testList[i+1]; 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test the [:Latin:] syntax. 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestScriptSet() { 825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectContainment(UNICODE_STRING_SIMPLE("[:Latin:]"), "aA", CharsToUnicodeString("\\u0391\\u03B1")); 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectContainment(UNICODE_STRING_SIMPLE("[:Greek:]"), CharsToUnicodeString("\\u0391\\u03B1"), "aA"); 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Jitterbug 1423 */ 830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectContainment(UNICODE_STRING_SIMPLE("[[:Common:][:Inherited:]]"), CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA"); 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test the [:Latin:] syntax. 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestPropertySet() { 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char* const DATA[] = { 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Pattern, Chars IN, Chars NOT in 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:Latin:]", 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "aA", 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0391\\u03B1", 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\p{Greek}]", 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0391\\u03B1", 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "aA", 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\P{ GENERAL Category = upper case letter }", 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abc", 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ABC", 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Combining class: @since ICU 2.2 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check both symbolic and numeric 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\p{ccc=Nukta}", 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0ABC", 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abc", 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\p{Canonical Combining Class = 11}", 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u05B1", 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u05B2", 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:c c c = iota subscript :]", 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0345", 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyz", 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Bidi class: @since ICU 2.2 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\p{bidiclass=lefttoright}", 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abc", 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0671\\u0672", 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Binary properties: @since ICU 2.2 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\p{ideographic}", 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u4E0A", 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "x", 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:math=false:]", 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "q)*(", 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // weiv: )(and * were removed from math in Unicode 4.0.1 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //"(*+)", 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "+<>^", 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // JB#1767 \N{}, \p{ASCII} 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:Ascii:]", 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abc\\u0000\\u007F", 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0080\\u4E00", 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\N{ latin small letter a }[:name= latin small letter z:]]", 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "az", 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "qrs", 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // JB#2015 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:any:]", 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "a\\U0010FFFF", 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "", 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:nv=0.5:]", 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u00BD\\u0F2A", 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u00BC", 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // JB#2653: Age 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:Age=1.1:]", 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u03D6", // 1.1 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u03D8\\u03D9", // 3.2 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:Age=3.1:]", 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u1800\\u3400\\U0002f800", 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0220\\u034f\\u30ff\\u33ff\\ufe73\\U00010000\\U00050000", 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // JB#2350: Case_Sensitive 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:Case Sensitive:]", 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "A\\u1FFC\\U00010410", 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ";\\u00B4\\U00010500", 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // JB#2832: C99-compatibility props 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:blank:]", 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " \\u0009", 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "1-9A-Z", 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:graph:]", 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "19AZ", 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " \\u0003\\u0007\\u0009\\u000A\\u000D", 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:punct:]", 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "!@#%&*()[]{}-_\\/;:,.?'\"", 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "09azAZ", 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:xdigit:]", 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "09afAF", 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "gG!", 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Regex compatibility test 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[-b]", // leading '-' is literal 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-b", 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ac", 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[^-b]", // leading '-' is literal 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ac", 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-b", 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[b-]", // trailing '-' is literal 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-b", 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ac", 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[^b-]", // trailing '-' is literal 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ac", 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-b", 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[a-b-]", // trailing '-' is literal 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ab-", 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "c=", 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[[a-q]&[p-z]-]", // trailing '-' is literal 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "pq-", 955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "or=", 956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\s|\\)|:|$|\\>]", // from regex tests 958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "s|):$>", 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abc", 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\uDC00cd]", // JB#2906: isolated trail at start 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "cd\\uDC00", 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ab\\uD800\\U00010000", 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[ab\\uD800]", // JB#2906: isolated trail at start 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ab\\uD800", 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "cd\\uDC00\\U00010000", 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[ab\\uD800cd]", // JB#2906: isolated lead in middle 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abcd\\uD800", 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ef\\uDC00\\U00010000", 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[ab\\uDC00cd]", // JB#2906: isolated trail in middle 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abcd\\uDC00", 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ef\\uD800\\U00010000", 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:^lccc=0:]", // Lead canonical class 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0300\\u0301", 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abcd\\u00c0\\u00c5", 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:^tccc=0:]", // Trail canonical class 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0300\\u0301\\u00c0\\u00c5", 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abcd", 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0300\\u0301\\u00c0\\u00c5", 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abcd", 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now) 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "", 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abcd\\u0300\\u0301\\u00c0\\u00c5", 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0F73\\u0F75\\u0F81", 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abcd\\u0300\\u0301\\u00c0\\u00c5", 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:Assigned:]", 998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD", 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0888\\uFDD3\\uFFFE\\U00050005" 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]); 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<DATA_LEN; i+=3) { 1005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expectContainment(UnicodeString(DATA[i], -1, US_INV), CharsToUnicodeString(DATA[i+1]), 1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CharsToUnicodeString(DATA[i+2])); 1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test that Posix style character classes [:digit:], etc. 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * have the Unicode definitions from TR 18. 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestPosixClasses() { 1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:alpha:]", status); 1018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Alphabetic}"), status); 1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:lower:]", status); 1025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{lowercase}"), status); 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:upper:]", status); 1032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Uppercase}"), status); 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:punct:]", status); 1039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=Punctuation}"), status); 1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:digit:]", status); 1046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=DecimalNumber}"), status); 1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:xdigit:]", status); 1053c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{DecimalNumber}\\p{HexDigit}]"), status); 1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:alnum:]", status); 1060c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Alphabetic}\\p{DecimalNumber}]"), status); 1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:space:]", status); 1067c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Whitespace}"), status); 1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:blank:]", status); 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]"), 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status); 1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:cntrl:]", status); 1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Control}"), status); 1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:graph:]", status); 1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]"), status); 1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s1("[:print:]", status); 1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet s2(UNICODE_STRING_SIMPLE("[[:graph:][:blank:]-[\\p{Control}]]") ,status); 1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT_SUCCESS(status); 1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TEST_ASSERT(s1==s2); 1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test cloning of UnicodeSet. For C++, we test the copy constructor. 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestClone() { 1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s("[abcxyz]", ec); 1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet t(s); 1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectContainment(t, "abc", "def"); 1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test the indexOf() and charAt() methods. 1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestIndexOf() { 1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set("[a-cx-y3578]", ec); 1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet constructor"); 1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<set.size(); ++i) { 1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c = set.charAt(i); 1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.indexOf(c) != i) { 1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: charAt(%d) = %X => indexOf() => %d", 1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i, c, set.indexOf(c)); 1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c = set.charAt(set.size()); 1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c != -1) { 1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: charAt(<out of range>) = %X", c); 1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t j = set.indexOf((UChar32)0x71/*'q'*/); 1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (j != -1) { 1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: indexOf('q') = " + j); 1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test closure API. 1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestCloseOver() { 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char CASE[] = {(char)USET_CASE_INSENSITIVE}; 1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char CASE_MAPPINGS[] = {(char)USET_ADD_CASE_MAPPINGS}; 1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* DATA[] = { 1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // selector, input, output 1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, 1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[aq\\u00DF{Bc}{bC}{Fi}]", 1154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "[aAqQ\\u00DF\\u1E9E\\uFB01{ss}{bc}{fi}]", // U+1E9E LATIN CAPITAL LETTER SHARP S is new in Unicode 5.1 1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, 1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u01F1]", // 'DZ' 1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u01F1\\u01F2\\u01F3]", 1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, 1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u1FB4]", 1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u1FB4{\\u03AC\\u03B9}]", 1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, 1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[{F\\uFB01}]", 1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\uFB03{ffi}]", 1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, // make sure binary search finds limits 1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[a\\uFF3A]", 1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[aA\\uFF3A\\uFF5A]", 1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, 1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[a-z]","[A-Za-z\\u017F\\u212A]", 1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, 1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[abc]","[A-Ca-c]", 1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, 1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[ABC]","[A-Ca-c]", 1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[i]", "[iI]", 1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\u0130]", "[\\u0130{i\\u0307}]", // dotted I 1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[{i\\u0307}]", "[\\u0130{i\\u0307}]", // i with dot 1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\u0131]", "[\\u0131]", // dotless i 1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\u0390]", "[\\u0390\\u1FD3{\\u03B9\\u0308\\u0301}]", 1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\u03c2]", "[\\u03a3\\u03c2\\u03c3]", // sigmas 1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\u03f2]", "[\\u03f2\\u03f9]", // lunate sigmas 1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\u03f7]", "[\\u03f7\\u03f8]", 1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\u1fe3]", "[\\u03b0\\u1fe3{\\u03c5\\u0308\\u0301}]", 1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\ufb05]", "[\\ufb05\\ufb06{st}]", 1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[{st}]", "[\\ufb05\\ufb06{st}]", 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[\\U0001044F]", "[\\U00010427\\U0001044F]", 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[{a\\u02BE}]", "[\\u1E9A{a\\u02BE}]", // first in sorted table 1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE, "[{\\u1f7c\\u03b9}]", "[\\u1ff2{\\u1f7c\\u03b9}]", // last in sorted table 1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE_MAPPINGS, 1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[aq\\u00DF{Bc}{bC}{Fi}]", 1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[aAqQ\\u00DF{ss}{Ss}{SS}{Bc}{BC}{bC}{bc}{FI}{Fi}{fi}]", 1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE_MAPPINGS, 1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u01F1]", // 'DZ' 1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u01F1\\u01F2\\u01F3]", 1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE_MAPPINGS, 1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[a-z]", 1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[A-Za-z]", 1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NULL 1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet s; 1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet t; 1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString buf; 1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; DATA[i]!=NULL; i+=3) { 1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t selector = DATA[i][0]; 1225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString pat(DATA[i+1], -1, US_INV); 1226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString exp(DATA[i+2], -1, US_INV); 1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.applyPattern(pat, ec); 1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.closeOver(selector); 1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.applyPattern(exp, ec); 1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: applyPattern failed"); 1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s == t) { 1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Ok: " + pat + ".closeOver(" + selector + ") => " + exp); 1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: " + pat + ".closeOver(" + selector + ") => " + 1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.toPattern(buf, TRUE) + ", expected " + exp); 1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0 1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Unused test code. 1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This was used to compare the old implementation (using USET_CASE) 1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * with the new one (using 0x100 temporarily) 1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * while transitioning from hardcoded case closure tables in uniset.cpp 1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (moved to uniset_props.cpp) to building the data by gencase into ucase.icu. 1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and using ucase.c functions for closure. 1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * See Jitterbug 3432 RFE: Move uniset.cpp data to a data file 1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Note: The old and new implementation never fully matched because 1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the old implementation turned out to not map U+0130 and U+0131 correctly 1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (dotted I and dotless i) and because the old implementation's data tables 1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * were outdated compared to Unicode 4.0.1 at the time of the change to the 1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * new implementation. (So sigmas and some other characters were not handled 1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * according to the newer Unicode version.) 1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet sens("[:case_sensitive:]", ec), sens2, s2; 1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetIterator si(sens); 1261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str, buf2; 1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *pStr; 1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(si.next()) { 1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!si.isString()) { 1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=si.getCodepoint(); 1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.clear(); 1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.add(c); 1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.setTo(c); 1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.foldCase(); 1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sens2.add(str); 1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t=s; 1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.closeOver(USET_CASE); 1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.closeOver(0x100); 1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s!=t) { 1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: closeOver(U+%04x) differs: ", c); 1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"old "+s.toPattern(buf, TRUE)+" new: "+t.toPattern(buf2, TRUE)); 1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // remove all code points 1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // should contain all full case folding mapping strings 1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sens2.remove(0, 0x10ffff); 1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru si.reset(sens2); 1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(si.next()) { 1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(si.isString()) { 1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pStr=&si.getString(); 1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.clear(); 1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.add(*pStr); 1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t=s2=s; 1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.closeOver(USET_CASE); 1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.closeOver(0x100); 1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s!=t) { 1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: closeOver("+s2.toPattern(buf, TRUE)+") differs: "); 1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"old "+s.toPattern(buf, TRUE)+" new: "+t.toPattern(buf2, TRUE)); 1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test the pattern API 1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.applyPattern("[abc]", USET_CASE_INSENSITIVE, NULL, ec); 1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: applyPattern failed"); 1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectContainment(s, "abcABC", "defDEF"); 1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet v("[^abc]", USET_CASE_INSENSITIVE, NULL, ec); 1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: constructor failed"); 1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectContainment(v, "defDEF", "abcABC"); 1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet cm("[abck]", USET_ADD_CASE_MAPPINGS, NULL, ec); 1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: construct w/case mappings failed"); 1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectContainment(cm, "abckABCK", CharsToUnicodeString("defDEF\\u212A")); 1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestEscapePattern() { 1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char pattern[] = 1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\uFEFF \\u200A-\\u200E \\U0001D173-\\U0001D17A \\U000F0000-\\U000FFFFD ]"; 1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char exp[] = 1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u200A-\\u200E\\uFEFF\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]"; 1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We test this with two passes; in the second pass we 1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // pre-unescape the pattern. Since U+200E is rule whitespace, 1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // this fails -- which is what we expect. 1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t pass=1; pass<=2; ++pass) { 1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString pat(pattern, -1, US_INV); 1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pass==2) { 1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat = pat.unescape(); 1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Pattern is only good for pass 1 1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isPatternValid = (pass==1); 1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set(pat, ec); 1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(ec) != isPatternValid){ 1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: applyPattern(" + 1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(pat) + ") => " + 1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_errorName(ec)); 1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.contains((UChar)0x0644)){ 1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: " + escape(pat) + " contains(U+0664)"); 1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString newpat; 1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.toPattern(newpat, TRUE); 1357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (newpat == UnicodeString(exp, -1, US_INV)) { 1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(escape(pat) + " => " + newpat); 1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: " + escape(pat) + " => " + newpat); 1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<set.getRangeCount(); ++i) { 1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str("Range "); 1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append((UChar)(0x30 + i)) 1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru .append(": ") 1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru .append((UChar32)set.getRangeStart(i)) 1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru .append(" - ") 1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru .append((UChar32)set.getRangeEnd(i)); 1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str = str + " (" + set.getRangeStart(i) + " - " + 1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.getRangeEnd(i) + ")"; 1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.getRangeStart(i) < 0) { 1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: " + escape(str)); 1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(escape(str)); 1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::expectRange(const UnicodeString& label, 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet& set, 1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 start, UChar32 end) { 1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet exp(start, end); 1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat; 1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set == exp) { 1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(label + " => " + set.toPattern(pat, TRUE)); 1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString xpat; 1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: " + label + " => " + 1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.toPattern(pat, TRUE) + 1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ", expected " + exp.toPattern(xpat, TRUE)); 1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestInvalidCodePoint() { 1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar32 DATA[] = { 1399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test range Expected range 1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0, 0x10FFFF, 0, 0x10FFFF, 1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (UChar32)-1, 8, 0, 8, 1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8, 0x110000, 8, 0x10FFFF 1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const int32_t DATA_LENGTH = sizeof(DATA)/sizeof(DATA[0]); 1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat; 1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<DATA_LENGTH; i+=4) { 1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 start = DATA[i]; 1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 end = DATA[i+1]; 1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 xstart = DATA[i+2]; 1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 xend = DATA[i+3]; 1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Try various API using the test code points 1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set(start, end); 1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectRange((UnicodeString)"ct(" + start + "," + end + ")", 1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set, xstart, xend); 1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.set(start, end); 1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectRange((UnicodeString)"set(" + start + "," + end + ")", 1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set, xstart, xend); 1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool b = set.contains(start); 1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = set.contains(start, end); 1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = set.containsNone(start, end); 1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = set.containsSome(start, end); 1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*int32_t index = set.indexOf(start);*/ 1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.clear(); 1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(start); 1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.add(start, end); 1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectRange((UnicodeString)"add(" + start + "," + end + ")", 1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set, xstart, xend); 1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.set(0, 0x10FFFF); 1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.retain(start, end); 1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectRange((UnicodeString)"retain(" + start + "," + end + ")", 1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set, xstart, xend); 1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.retain(start); 1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.set(0, 0x10FFFF); 1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.remove(start); 1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.remove(start, end); 1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.complement(); 1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectRange((UnicodeString)"!remove(" + start + "," + end + ")", 1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set, xstart, xend); 1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.set(0, 0x10FFFF); 1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.complement(start, end); 1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.complement(); 1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectRange((UnicodeString)"!complement(" + start + "," + end + ")", 1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set, xstart, xend); 1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.complement(start); 1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar32 DATA2[] = { 1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0, 1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x10FFFF, 1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (UChar32)-1, 1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x110000 1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const int32_t DATA2_LENGTH = sizeof(DATA2)/sizeof(DATA2[0]); 1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<DATA2_LENGTH; ++i) { 1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c = DATA2[i], end = 0x10FFFF; 1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool valid = (c >= 0 && c <= 0x10FFFF); 1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set(0, 0x10FFFF); 1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // For single-codepoint contains, invalid codepoints are NOT contained 1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool b = set.contains(c); 1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (b == valid) { 1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"[\\u0000-\\U0010FFFF].contains(" + c + 1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ") = " + b); 1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: [\\u0000-\\U0010FFFF].contains(" + c + 1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ") = " + b); 1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // For codepoint range contains, containsNone, and containsSome, 1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // invalid or empty (start > end) ranges have UNDEFINED behavior. 1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = set.contains(c, end); 1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"* [\\u0000-\\U0010FFFF].contains(" + c + 1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "," + end + ") = " + b); 1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = set.containsNone(c, end); 1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"* [\\u0000-\\U0010FFFF].containsNone(" + c + 1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "," + end + ") = " + b); 1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = set.containsSome(c, end); 1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"* [\\u0000-\\U0010FFFF].containsSome(" + c + 1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "," + end + ") = " + b); 1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t index = set.indexOf(c); 1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((index >= 0) == valid) { 1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"[\\u0000-\\U0010FFFF].indexOf(" + c + 1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ") = " + index); 1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: [\\u0000-\\U0010FFFF].indexOf(" + c + 1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ") = " + index); 1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Used by TestSymbolTable 1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass TokenSymbolTable : public SymbolTable { 1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Hashtable contents; 1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TokenSymbolTable(UErrorCode& ec) : contents(FALSE, ec) { 1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru contents.setValueDeleter(uhash_deleteUnicodeString); 1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ~TokenSymbolTable() {} 1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (Non-SymbolTable API) Add the given variable and value to 1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the table. Variable should NOT contain leading '$'. 1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void add(const UnicodeString& var, const UnicodeString& value, 1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode& ec) { 1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(ec)) { 1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru contents.put(var, new UnicodeString(value), ec); 1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * SymbolTable API 1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual const UnicodeString* lookup(const UnicodeString& s) const { 1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (const UnicodeString*) contents.get(s); 1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * SymbolTable API 1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual const UnicodeFunctor* lookupMatcher(UChar32 /*ch*/) const { 1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /** 1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * SymbolTable API 1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru virtual UnicodeString parseReference(const UnicodeString& text, 1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ParsePosition& pos, int32_t limit) const { 1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start = pos.getIndex(); 1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = start; 1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString result; 1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (i < limit) { 1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c = text.charAt(i); 1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) { 1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++i; 1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i == start) { // No valid name chars 1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; // Indicate failure with empty string 1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos.setIndex(i); 1564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru text.extractBetween(start, i, result); 1565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 1568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestSymbolTable() { 1570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Multiple test cases can be set up here. Each test case 1571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // is terminated by null: 1572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // var, value, var, value,..., input pat., exp. output pat., null 1573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* DATA[] = { 1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "us", "a-z", "[0-1$us]", "[0-1a-z]", NULL, 1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "us", "[a-z]", "[0-1$us]", "[0-1[a-z]]", NULL, 1576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "us", "\\[a\\-z\\]", "[0-1$us]", "[-01\\[\\]az]", NULL, 1577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NULL 1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 1579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; DATA[i]!=NULL; ++i) { 1581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TokenSymbolTable sym(ec); 1583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: couldn't construct TokenSymbolTable"); 1585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Set up variables 1589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (DATA[i+2] != NULL) { 1590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru sym.add(UnicodeString(DATA[i], -1, US_INV), UnicodeString(DATA[i+1], -1, US_INV), ec); 1591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: couldn't add to TokenSymbolTable"); 1593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i += 2; 1596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Input pattern and expected output pattern 1599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString inpat = UnicodeString(DATA[i], -1, US_INV), exppat = UnicodeString(DATA[i+1], -1, US_INV); 1600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i += 2; 1601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ParsePosition pos(0); 1603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet us(inpat, pos, USET_IGNORE_SPACE, &sym, ec); 1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: couldn't construct UnicodeSet"); 1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // results 1610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pos.getIndex() != inpat.length()) { 1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"Failed to read to end of string \"" 1612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + inpat + "\": read to " 1613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + pos.getIndex() + ", length is " 1614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + inpat.length()); 1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet us2(exppat, ec); 1618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: couldn't construct expected UnicodeSet"); 1620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString a, b; 1624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (us != us2) { 1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"Failed, got " + us.toPattern(a, TRUE) + 1626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ", expected " + us2.toPattern(b, TRUE)); 1627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Ok, got " + us.toPattern(a, TRUE)); 1629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestSurrogate() { 1634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* DATA[] = { 1635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // These should all behave identically 1636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[abc\\uD800\\uDC00]", 1637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // "[abc\uD800\uDC00]", // Can't do this on C -- only Java 1638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[abc\\U00010000]", 1639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0 1640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 1641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int i=0; DATA[i] != 0; ++i) { 1642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru logln((UnicodeString)"Test pattern " + i + " :" + UnicodeString(DATA[i], -1, US_INV)); 1644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet set(UnicodeString(DATA[i], -1, US_INV), ec); 1645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet constructor"); 1647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectContainment(set, 1650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CharsToUnicodeString("abc\\U00010000"), 1651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CharsToUnicodeString("\\uD800;\\uDC00")); // split apart surrogate-pair 1652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.size() != 4) { 1653c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errln((UnicodeString)"FAIL: " + UnicodeString(DATA[i], -1, US_INV) + ".size() == " + 1654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.size() + ", expected 4"); 1655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestExhaustive() { 1660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // exhaustive tests. Simulate UnicodeSets with integers. 1661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // That gives us very solid tests (except for large memory tests). 1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t limit = 128; 1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet x, y, z, aa; 1666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < limit; ++i) { 1668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(i, x); 1669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Testing " + i + ", " + x); 1670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _testComplement(i, x, y); 1671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // AS LONG AS WE ARE HERE, check roundtrip 1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkRoundTrip(bitsToSet(i, aa)); 1674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t j = 0; j < limit; ++j) { 1676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _testAdd(i,j, x,y,z); 1677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _testXor(i,j, x,y,z); 1678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _testRetain(i,j, x,y,z); 1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _testRemove(i,j, x,y,z); 1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testComplement(int32_t a, UnicodeSet& x, UnicodeSet& z) { 1685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(a, x); 1686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z = x; 1687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z.complement(); 1688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c = setToBits(z); 1689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c != (~a)) { 1690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: add: ~" + x + " != " + z); 1691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: add: ~" + a + " != " + c); 1692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkCanonicalRep(z, (UnicodeString)"complement " + a); 1694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testAdd(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) { 1697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(a, x); 1698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(b, y); 1699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z = x; 1700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z.addAll(y); 1701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c = setToBits(z); 1702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c != (a | b)) { 1703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: add: " + x + " | " + y + " != " + z); 1704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: add: " + a + " | " + b + " != " + c); 1705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkCanonicalRep(z, (UnicodeString)"add " + a + "," + b); 1707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testRetain(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) { 1710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(a, x); 1711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(b, y); 1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z = x; 1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z.retainAll(y); 1714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c = setToBits(z); 1715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c != (a & b)) { 1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: retain: " + x + " & " + y + " != " + z); 1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: retain: " + a + " & " + b + " != " + c); 1718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkCanonicalRep(z, (UnicodeString)"retain " + a + "," + b); 1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testRemove(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) { 1723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(a, x); 1724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(b, y); 1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z = x; 1726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z.removeAll(y); 1727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c = setToBits(z); 1728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c != (a &~ b)) { 1729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: remove: " + x + " &~ " + y + " != " + z); 1730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: remove: " + a + " &~ " + b + " != " + c); 1731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkCanonicalRep(z, (UnicodeString)"remove " + a + "," + b); 1733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testXor(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) { 1736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(a, x); 1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bitsToSet(b, y); 1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z = x; 1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z.complementAll(y); 1740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c = setToBits(z); 1741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c != (a ^ b)) { 1742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: complement: " + x + " ^ " + y + " != " + z); 1743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAILED: complement: " + a + " ^ " + b + " != " + c); 1744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkCanonicalRep(z, (UnicodeString)"complement " + a + "," + b); 1746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Check that ranges are monotonically increasing and non- 1750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * overlapping. 1751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::checkCanonicalRep(const UnicodeSet& set, const UnicodeString& msg) { 1753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t n = set.getRangeCount(); 1754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (n < 0) { 1755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL result of " + msg + 1756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ": range count should be >= 0 but is " + 1757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n /*+ " for " + set.toPattern())*/); 1758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 last = 0; 1761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<n; ++i) { 1762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 start = set.getRangeStart(i); 1763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 end = set.getRangeEnd(i); 1764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (start > end) { 1765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL result of " + msg + 1766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ": range " + (i+1) + 1767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " start > end: " + (int)start + ", " + (int)end + 1768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " for " + set); 1769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i > 0 && start <= last) { 1771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL result of " + msg + 1772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ": range " + (i+1) + 1773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " overlaps previous range: " + (int)start + ", " + (int)end + 1774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " for " + set); 1775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru last = end; 1777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Convert a bitmask to a UnicodeSet. 1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSetTest::bitsToSet(int32_t a, UnicodeSet& result) { 1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.clear(); 1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (UChar32 i = 0; i < 32; ++i) { 1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((a & (1<<i)) != 0) { 1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.add(i); 1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Convert a UnicodeSet to a bitmask. Only the characters 1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * U+0000 to U+0020 are represented in the bitmask. 1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSetTest::setToBits(const UnicodeSet& x) { 1798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result = 0; 1799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < 32; ++i) { 1800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (x.contains((UChar32)i)) { 1801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result |= (1<<i); 1802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return the representation of an inversion list based UnicodeSet 1809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * as a pairs list. Ranges are listed in ascending Unicode order. 1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For example, the set [a-zA-M3] is represented as "33AMaz". 1811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString UnicodeSetTest::getPairs(const UnicodeSet& set) { 1813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pairs; 1814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<set.getRangeCount(); ++i) { 1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 start = set.getRangeStart(i); 1816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 end = set.getRangeEnd(i); 1817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (end > 0xFFFF) { 1818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru end = 0xFFFF; 1819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i = set.getRangeCount(); // Should be unnecessary 1820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pairs.append((UChar)start).append((UChar)end); 1822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return pairs; 1824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Basic consistency check for a few items. 1828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * That the iterator works, and that we can create a pattern and 1829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * get the same thing back 1830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::checkRoundTrip(const UnicodeSet& s) { 1832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet t(s); 1835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkEqual(s, t, "copy ct"); 1836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = s; 1838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkEqual(s, t, "operator="); 1839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru copyWithIterator(t, s, FALSE); 1841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkEqual(s, t, "iterator roundtrip"); 1842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru copyWithIterator(t, s, TRUE); // try range 1844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkEqual(s, t, "iterator roundtrip"); 1845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat; s.toPattern(pat, FALSE); 1847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.applyPattern(pat, ec); 1848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: applyPattern"); 1850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkEqual(s, t, "toPattern(false)"); 1853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.toPattern(pat, TRUE); 1856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.applyPattern(pat, ec); 1857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: applyPattern"); 1859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkEqual(s, t, "toPattern(true)"); 1862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::copyWithIterator(UnicodeSet& t, const UnicodeSet& s, UBool withRange) { 1866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.clear(); 1867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetIterator it(s); 1868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (withRange) { 1869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (it.nextRange()) { 1870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (it.isString()) { 1871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.add(it.getString()); 1872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.add(it.getCodepoint(), it.getCodepointEnd()); 1874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (it.next()) { 1878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (it.isString()) { 1879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.add(it.getString()); 1880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t.add(it.getCodepoint()); 1882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSetTest::checkEqual(const UnicodeSet& s, const UnicodeSet& t, const char* message) { 1888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString source; s.toPattern(source, TRUE); 1889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString result; t.toPattern(result, TRUE); 1890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s != t) { 1891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: " + message 1892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + "; source = " + source 1893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + "; result = " + result 1894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ); 1895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 1896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Ok: " + message 1898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + "; source = " + source 1899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + "; result = " + result 1900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ); 1901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 1903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 1906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectContainment(const UnicodeString& pat, 1907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& charsIn, 1908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& charsOut) { 1909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set(pat, ec); 1911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: pattern \"" + 1913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat + "\" => " + u_errorName(ec)); 1914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectContainment(set, pat, charsIn, charsOut); 1917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 1920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectContainment(const UnicodeSet& set, 1921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& charsIn, 1922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& charsOut) { 1923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat; 1924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.toPattern(pat); 1925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectContainment(set, pat, charsIn, charsOut); 1926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 1929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectContainment(const UnicodeSet& set, 1930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& setName, 1931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& charsIn, 1932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& charsOut) { 1933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString bad; 1934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 1935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 1936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<charsIn.length(); i+=U16_LENGTH(c)) { 1938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = charsIn.char32At(i); 1939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!set.contains(c)) { 1940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bad.append(c); 1941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (bad.length() > 0) { 1944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"Fail: set " + setName + " does not contain " + prettify(bad) + 1945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ", expected containment of " + prettify(charsIn)); 1946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Ok: set " + setName + " contains " + prettify(charsIn)); 1948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bad.truncate(0); 1951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<charsOut.length(); i+=U16_LENGTH(c)) { 1952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = charsOut.char32At(i); 1953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (set.contains(c)) { 1954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bad.append(c); 1955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (bad.length() > 0) { 1958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"Fail: set " + setName + " contains " + prettify(bad) + 1959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ", expected non-containment of " + prettify(charsOut)); 1960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Ok: set " + setName + " does not contain " + prettify(charsOut)); 1962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 1966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectPattern(UnicodeSet& set, 1967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& pattern, 1968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& expectedPairs){ 1969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern(pattern, status); 1971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(UnicodeString("FAIL: applyPattern(\"") + pattern + 1973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\") failed"); 1974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (getPairs(set) != expectedPairs ) { 1977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(UnicodeString("FAIL: applyPattern(\"") + pattern + 1978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\") => pairs \"" + 1979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(getPairs(set)) + "\", expected \"" + 1980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(expectedPairs) + "\""); 1981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(UnicodeString("Ok: applyPattern(\"") + pattern + 1983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\") => pairs \"" + 1984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(getPairs(set)) + "\""); 1985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the result of calling set.toPattern(), which is the string representation of 1988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // this set(set), is passed to a UnicodeSet constructor, and tested that it 1989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // will produce another set that is equal to this one. 1990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString temppattern; 1991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.toPattern(temppattern); 1992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *tempset=new UnicodeSet(temppattern, status); 1993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(UnicodeString("FAIL: applyPattern(\""+ pattern + "\").toPattern() => " + temppattern + " => invalid pattern")); 1995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(*tempset != set || getPairs(*tempset) != getPairs(set)){ 1998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(UnicodeString("FAIL: applyPattern(\""+ pattern + "\").toPattern() => " + temppattern + " => pairs \""+ escape(getPairs(*tempset)) + "\", expected pairs \"" + 1999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(getPairs(set)) + "\"")); 2000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else{ 2001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(UnicodeString("Ok: applyPattern(\""+ pattern + "\").toPattern() => " + temppattern + " => pairs \"" + escape(getPairs(*tempset)) + "\"")); 2002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete tempset; 2005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 2009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectPairs(const UnicodeSet& set, const UnicodeString& expectedPairs) { 2010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (getPairs(set) != expectedPairs) { 2011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(UnicodeString("FAIL: Expected pair list \"") + 2012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(expectedPairs) + "\", got \"" + 2013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(getPairs(set)) + "\""); 2014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::expectToPattern(const UnicodeSet& set, 2018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& expPat, 2019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char** expStrings) { 2020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pat; 2021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.toPattern(pat, TRUE); 2022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pat == expPat) { 2023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Ok: toPattern() => \"" + pat + "\""); 2024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: toPattern() => \"" + pat + "\", expected \"" + expPat + "\""); 2026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 2027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expStrings == NULL) { 2029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 2030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool in = TRUE; 2032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; expStrings[i] != NULL; ++i) { 2033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expStrings[i] == NOT) { // sic; pointer comparison 2034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru in = FALSE; 2035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 2036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s = CharsToUnicodeString(expStrings[i]); 2038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool contained = set.contains(s); 2039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (contained == in) { 2040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Ok: " + expPat + 2041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (contained ? " contains {" : " does not contain {") + 2042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(expStrings[i]) + "}"); 2043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"FAIL: " + expPat + 2045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (contained ? " contains {" : " does not contain {") + 2046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escape(expStrings[i]) + "}"); 2047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar toHexString(int32_t i) { return (UChar)(i + (i < 10 ? 0x30 : (0x41 - 10))); } 2052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 2054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::doAssert(UBool condition, const char *message) 2055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 2056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!condition) { 2057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(UnicodeString("ERROR : ") + message); 2058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString 2062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::escape(const UnicodeString& s) { 2063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString buf; 2064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<s.length(); ) 2065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 2066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c = s.char32At(i); 2067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (0x0020 <= c && c <= 0x007F) { 2068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += c; 2069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c <= 0xFFFF) { 2071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += (UChar)0x5c; buf += (UChar)0x75; 2072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += (UChar)0x5c; buf += (UChar)0x55; 2074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += toHexString((c & 0xF0000000) >> 28); 2075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += toHexString((c & 0x0F000000) >> 24); 2076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += toHexString((c & 0x00F00000) >> 20); 2077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += toHexString((c & 0x000F0000) >> 16); 2078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += toHexString((c & 0xF000) >> 12); 2080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += toHexString((c & 0x0F00) >> 8); 2081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += toHexString((c & 0x00F0) >> 4); 2082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf += toHexString(c & 0x000F); 2083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i += U16_LENGTH(c); 2085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return buf; 2087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestFreezable() { 2090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 2091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString idPattern=UNICODE_STRING("[:ID_Continue:]", 15); 2092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet idSet(idPattern, errorCode); 2093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 2094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: unable to create UnicodeSet([:ID_Continue:]) - %s", u_errorName(errorCode)); 2095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 2096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString wsPattern=UNICODE_STRING("[:White_Space:]", 15); 2099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet wsSet(wsPattern, errorCode); 2100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 2101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: unable to create UnicodeSet([:White_Space:]) - %s", u_errorName(errorCode)); 2102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 2103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru idSet.add(idPattern); 2106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet frozen(idSet); 2107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen.freeze(); 2108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(idSet.isFrozen() || !frozen.isFrozen()) { 2110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: isFrozen() is wrong"); 2111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: a copy-constructed frozen set differs from its original"); 2114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen=wsSet; 2117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: a frozen set was modified by operator="); 2119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet frozen2(frozen); 2122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen2!=frozen || frozen2!=idSet) { 2123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: a copied frozen set differs from its frozen original"); 2124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!frozen2.isFrozen()) { 2126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: copy-constructing a frozen set results in a thawed one"); 2127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet frozen3(5, 55); // Set to some values to really test assignment below, not copy construction. 2129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen3.contains(0, 4) || !frozen3.contains(5, 55) || frozen3.contains(56, 0x10ffff)) { 2130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet(5, 55) failed"); 2131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen3=frozen; 2133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!frozen3.isFrozen()) { 2134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: copying a frozen set results in a thawed one"); 2135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *cloned=(UnicodeSet *)frozen.clone(); 2138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!cloned->isFrozen() || *cloned!=frozen || cloned->containsSome(0xd802, 0xd805)) { 2139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: clone() failed"); 2140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cloned->add(0xd802, 0xd805); 2142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(cloned->containsSome(0xd802, 0xd805)) { 2143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: unable to modify clone"); 2144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete cloned; 2146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *thawed=(UnicodeSet *)frozen.cloneAsThawed(); 2148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(thawed->isFrozen() || *thawed!=frozen || thawed->containsSome(0xd802, 0xd805)) { 2149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: cloneAsThawed() failed"); 2150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru thawed->add(0xd802, 0xd805); 2152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!thawed->contains(0xd802, 0xd805)) { 2153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: unable to modify thawed clone"); 2154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete thawed; 2156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen.set(5, 55); 2158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::set() modified a frozen set"); 2160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen.clear(); 2163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::clear() modified a frozen set"); 2165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen.closeOver(USET_CASE_INSENSITIVE); 2168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::closeOver() modified a frozen set"); 2170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen.compact(); 2173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::compact() modified a frozen set"); 2175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ParsePosition pos; 2178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen. 2179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru applyPattern(wsPattern, errorCode). 2180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru applyPattern(wsPattern, USET_IGNORE_SPACE, NULL, errorCode). 2181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru applyPattern(wsPattern, pos, USET_IGNORE_SPACE, NULL, errorCode). 2182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru applyIntPropertyValue(UCHAR_CANONICAL_COMBINING_CLASS, 230, errorCode). 2183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru applyPropertyAlias(UNICODE_STRING_SIMPLE("Assigned"), UnicodeString(), errorCode); 2184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::applyXYZ() modified a frozen set"); 2186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen. 2189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru add(0xd800). 2190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru add(0xd802, 0xd805). 2191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru add(wsPattern). 2192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addAll(idPattern). 2193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru addAll(wsSet); 2194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::addXYZ() modified a frozen set"); 2196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen. 2199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retain(0x62). 2200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retain(0x64, 0x69). 2201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retainAll(wsPattern). 2202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retainAll(wsSet); 2203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::retainXYZ() modified a frozen set"); 2205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen. 2208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru remove(0x62). 2209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru remove(0x64, 0x69). 2210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru remove(idPattern). 2211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru removeAll(idPattern). 2212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru removeAll(idSet); 2213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::removeXYZ() modified a frozen set"); 2215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru frozen. 2218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complement(). 2219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complement(0x62). 2220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complement(0x64, 0x69). 2221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complement(idPattern). 2222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complementAll(idPattern). 2223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complementAll(idSet); 2224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(frozen!=idSet || !(frozen==idSet)) { 2225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet::complementXYZ() modified a frozen set"); 2226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test span() etc. -------------------------------------------------------- *** 2230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Append the UTF-8 version of the string to t and return the appended UTF-8 length. 2232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 2233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruappendUTF8(const UChar *s, int32_t length, char *t, int32_t capacity) { 2234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 2235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length8=0; 2236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_strToUTF8(t, capacity, &length8, s, length, &errorCode); 2237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 2238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return length8; 2239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The string contains an unpaired surrogate. 2241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Ignore this string. 2242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass UnicodeSetWithStringsIterator; 2247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Make the strings in a UnicodeSet easily accessible. 2249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass UnicodeSetWithStrings { 2250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 2251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStrings(const UnicodeSet &normalSet) : 2252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set(normalSet), stringsLength(0), hasSurrogates(FALSE) { 2253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t size=set.size(); 2254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(size>0 && set.charAt(size-1)<0) { 2255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If a set's last element is not a code point, then it must contain strings. 2256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Iterate over the set, skip all code point ranges, and cache the strings. 2257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Convert them to UTF-8 for spanUTF8(). 2258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetIterator iter(set); 2259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *s; 2260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *s8=utf8; 2261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length8, utf8Count=0; 2262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(iter.nextRange() && stringsLength<LENGTHOF(strings)) { 2263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(iter.isString()) { 2264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Store the pointer to the set's string element 2265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // which we happen to know is a stable pointer. 2266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strings[stringsLength]=s=&iter.getString(); 2267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utf8Count+= 2268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utf8Lengths[stringsLength]=length8= 2269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru appendUTF8(s->getBuffer(), s->length(), 2270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s8, (int32_t)(sizeof(utf8)-utf8Count)); 2271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length8==0) { 2272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hasSurrogates=TRUE; // Contains unpaired surrogates. 2273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s8+=length8; 2275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++stringsLength; 2276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet &getSet() const { 2282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return set; 2283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool hasStrings() const { 2286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UBool)(stringsLength>0); 2287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool hasStringsWithSurrogates() const { 2290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return hasSurrogates; 2291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 2294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru friend class UnicodeSetWithStringsIterator; 2295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet &set; 2297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *strings[20]; 2299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t stringsLength; 2300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool hasSurrogates; 2301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char utf8[1024]; 2303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t utf8Lengths[20]; 2304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t nextStringIndex; 2306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t nextUTF8Start; 2307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 2308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass UnicodeSetWithStringsIterator { 2310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 2311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator(const UnicodeSetWithStrings &set) : 2312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fSet(set), nextStringIndex(0), nextUTF8Start(0) { 2313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void reset() { 2316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextStringIndex=nextUTF8Start=0; 2317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *nextString() { 2320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(nextStringIndex<fSet.stringsLength) { 2321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return fSet.strings[nextStringIndex++]; 2322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 2324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Do not mix with calls to nextString(). 2328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *nextUTF8(int32_t &length) { 2329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(nextStringIndex<fSet.stringsLength) { 2330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *s8=fSet.utf8+nextUTF8Start; 2331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextUTF8Start+=length=fSet.utf8Lengths[nextStringIndex++]; 2332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return s8; 2333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=0; 2335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 2336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 2340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSetWithStrings &fSet; 2341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t nextStringIndex; 2342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t nextUTF8Start; 2343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 2344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Compare 16-bit Unicode strings (which may be malformed UTF-16) 2346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// at code point boundaries. 2347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// That is, each edge of a match must not be in the middle of a surrogate pair. 2348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UBool 2349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querumatches16CPB(const UChar *s, int32_t start, int32_t limit, const UnicodeString &t) { 2350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s+=start; 2351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit-=start; 2352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length=t.length(); 2353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0==t.compare(s, length) && 2354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru !(0<start && U16_IS_LEAD(s[-1]) && U16_IS_TRAIL(s[0])) && 2355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru !(length<limit && U16_IS_LEAD(s[length-1]) && U16_IS_TRAIL(s[length])); 2356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Implement span() with contains() for comparison. 2359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t containsSpanUTF16(const UnicodeSetWithStrings &set, const UChar *s, int32_t length, 2360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USetSpanCondition spanCondition) { 2361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet &realSet(set.getSet()); 2362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!set.hasStrings()) { 2363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 2364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. 2365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start=0, prev; 2369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((prev=start)<length) { 2370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(s, start, length, c); 2371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(realSet.contains(c)!=spanCondition) { 2372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(spanCondition==USET_SPAN_NOT_CONTAINED) { 2377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator iter(set); 2378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start, next; 2380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(start=next=0; start<length;) { 2381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(s, next, length, c); 2382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(realSet.contains(c)) { 2383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *str; 2386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.reset(); 2387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((str=iter.nextString())!=NULL) { 2388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(str->length()<=(length-start) && matches16CPB(s, start, length, *str)) { 2389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanNeedsStrings=TRUE; 2390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return start; 2391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=next; 2394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return start; 2396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ { 2397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator iter(set); 2398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start, next, maxSpanLimit=0; 2400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(start=next=0; start<length;) { 2401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(s, next, length, c); 2402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!realSet.contains(c)) { 2403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru next=start; // Do not span this single, not-contained code point. 2404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *str; 2406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.reset(); 2407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((str=iter.nextString())!=NULL) { 2408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(str->length()<=(length-start) && matches16CPB(s, start, length, *str)) { 2409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanNeedsStrings=TRUE; 2410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t matchLimit=start+str->length(); 2411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchLimit==length) { 2412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return length; 2413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition==USET_SPAN_CONTAINED) { 2415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Iterate for the shortest match at each position. 2416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Recurse for each but the shortest match. 2417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(next==start) { 2418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru next=matchLimit; // First match from start. 2419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchLimit<next) { 2421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remember shortest match from start for iteration. 2422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t temp=next; 2423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru next=matchLimit; 2424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matchLimit=temp; 2425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Recurse for non-shortest match from start. 2427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t spanLength=containsSpanUTF16(set, s+matchLimit, length-matchLimit, 2428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USET_SPAN_CONTAINED); 2429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((matchLimit+spanLength)>maxSpanLimit) { 2430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxSpanLimit=matchLimit+spanLength; 2431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(maxSpanLimit==length) { 2432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return length; 2433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* spanCondition==USET_SPAN_SIMPLE */ { 2437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchLimit>next) { 2438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remember longest match from start. 2439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru next=matchLimit; 2440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(next==start) { 2445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; // No match from start. 2446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=next; 2448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(start>maxSpanLimit) { 2450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return start; 2451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return maxSpanLimit; 2453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t containsSpanBackUTF16(const UnicodeSetWithStrings &set, const UChar *s, int32_t length, 2458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USetSpanCondition spanCondition) { 2459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==0) { 2460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet &realSet(set.getSet()); 2463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!set.hasStrings()) { 2464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 2465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. 2466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=length; 2470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_PREV(s, 0, length, c); 2472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(realSet.contains(c)!=spanCondition) { 2473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=length)>0); 2476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(spanCondition==USET_SPAN_NOT_CONTAINED) { 2478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator iter(set); 2479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=length, length0=length; 2481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_PREV(s, 0, length, c); 2483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(realSet.contains(c)) { 2484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *str; 2487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.reset(); 2488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((str=iter.nextString())!=NULL) { 2489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(str->length()<=prev && matches16CPB(s, prev-str->length(), length0, *str)) { 2490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanNeedsStrings=TRUE; 2491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=length)>0); 2495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ { 2497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator iter(set); 2498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=length, minSpanStart=length, length0=length; 2500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_PREV(s, 0, length, c); 2502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!realSet.contains(c)) { 2503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=prev; // Do not span this single, not-contained code point. 2504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *str; 2506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.reset(); 2507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((str=iter.nextString())!=NULL) { 2508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(str->length()<=prev && matches16CPB(s, prev-str->length(), length0, *str)) { 2509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanNeedsStrings=TRUE; 2510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t matchStart=prev-str->length(); 2511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchStart==0) { 2512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition==USET_SPAN_CONTAINED) { 2515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Iterate for the shortest match at each position. 2516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Recurse for each but the shortest match. 2517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==prev) { 2518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=matchStart; // First match from prev. 2519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchStart>length) { 2521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remember shortest match from prev for iteration. 2522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t temp=length; 2523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=matchStart; 2524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matchStart=temp; 2525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Recurse for non-shortest match from prev. 2527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t spanStart=containsSpanBackUTF16(set, s, matchStart, 2528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USET_SPAN_CONTAINED); 2529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanStart<minSpanStart) { 2530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru minSpanStart=spanStart; 2531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(minSpanStart==0) { 2532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* spanCondition==USET_SPAN_SIMPLE */ { 2537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchStart<length) { 2538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remember longest match from prev. 2539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=matchStart; 2540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==prev) { 2545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; // No match from prev. 2546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=length)>0); 2548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(prev<minSpanStart) { 2549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return minSpanStart; 2552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t containsSpanUTF8(const UnicodeSetWithStrings &set, const char *s, int32_t length, 2557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USetSpanCondition spanCondition) { 2558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet &realSet(set.getSet()); 2559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!set.hasStrings()) { 2560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 2561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. 2562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start=0, prev; 2566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((prev=start)<length) { 2567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U8_NEXT(s, start, length, c); 2568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<0) { 2569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=0xfffd; 2570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(realSet.contains(c)!=spanCondition) { 2572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(spanCondition==USET_SPAN_NOT_CONTAINED) { 2577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator iter(set); 2578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start, next; 2580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(start=next=0; start<length;) { 2581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U8_NEXT(s, next, length, c); 2582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<0) { 2583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=0xfffd; 2584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(realSet.contains(c)) { 2586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *s8; 2589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length8; 2590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.reset(); 2591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((s8=iter.nextUTF8(length8))!=NULL) { 2592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length8!=0 && length8<=(length-start) && 0==memcmp(s+start, s8, length8)) { 2593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanNeedsStrings=TRUE; 2594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return start; 2595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=next; 2598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return start; 2600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ { 2601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator iter(set); 2602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start, next, maxSpanLimit=0; 2604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(start=next=0; start<length;) { 2605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U8_NEXT(s, next, length, c); 2606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<0) { 2607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=0xfffd; 2608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!realSet.contains(c)) { 2610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru next=start; // Do not span this single, not-contained code point. 2611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *s8; 2613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length8; 2614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.reset(); 2615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((s8=iter.nextUTF8(length8))!=NULL) { 2616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length8!=0 && length8<=(length-start) && 0==memcmp(s+start, s8, length8)) { 2617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanNeedsStrings=TRUE; 2618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t matchLimit=start+length8; 2619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchLimit==length) { 2620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return length; 2621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition==USET_SPAN_CONTAINED) { 2623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Iterate for the shortest match at each position. 2624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Recurse for each but the shortest match. 2625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(next==start) { 2626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru next=matchLimit; // First match from start. 2627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchLimit<next) { 2629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remember shortest match from start for iteration. 2630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t temp=next; 2631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru next=matchLimit; 2632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matchLimit=temp; 2633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Recurse for non-shortest match from start. 2635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t spanLength=containsSpanUTF8(set, s+matchLimit, length-matchLimit, 2636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USET_SPAN_CONTAINED); 2637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((matchLimit+spanLength)>maxSpanLimit) { 2638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxSpanLimit=matchLimit+spanLength; 2639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(maxSpanLimit==length) { 2640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return length; 2641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* spanCondition==USET_SPAN_SIMPLE */ { 2645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchLimit>next) { 2646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remember longest match from start. 2647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru next=matchLimit; 2648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(next==start) { 2653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; // No match from start. 2654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=next; 2656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(start>maxSpanLimit) { 2658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return start; 2659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return maxSpanLimit; 2661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t containsSpanBackUTF8(const UnicodeSetWithStrings &set, const char *s, int32_t length, 2666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USetSpanCondition spanCondition) { 2667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==0) { 2668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet &realSet(set.getSet()); 2671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!set.hasStrings()) { 2672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 2673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. 2674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=length; 2678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U8_PREV(s, 0, length, c); 2680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<0) { 2681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=0xfffd; 2682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(realSet.contains(c)!=spanCondition) { 2684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=length)>0); 2687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(spanCondition==USET_SPAN_NOT_CONTAINED) { 2689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator iter(set); 2690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=length; 2692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U8_PREV(s, 0, length, c); 2694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<0) { 2695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=0xfffd; 2696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(realSet.contains(c)) { 2698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *s8; 2701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length8; 2702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.reset(); 2703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((s8=iter.nextUTF8(length8))!=NULL) { 2704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length8!=0 && length8<=prev && 0==memcmp(s+prev-length8, s8, length8)) { 2705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanNeedsStrings=TRUE; 2706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=length)>0); 2710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ { 2712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetWithStringsIterator iter(set); 2713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=length, minSpanStart=length; 2715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U8_PREV(s, 0, length, c); 2717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<0) { 2718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=0xfffd; 2719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!realSet.contains(c)) { 2721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=prev; // Do not span this single, not-contained code point. 2722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *s8; 2724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length8; 2725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.reset(); 2726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((s8=iter.nextUTF8(length8))!=NULL) { 2727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length8!=0 && length8<=prev && 0==memcmp(s+prev-length8, s8, length8)) { 2728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanNeedsStrings=TRUE; 2729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t matchStart=prev-length8; 2730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchStart==0) { 2731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition==USET_SPAN_CONTAINED) { 2734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Iterate for the shortest match at each position. 2735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Recurse for each but the shortest match. 2736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==prev) { 2737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=matchStart; // First match from prev. 2738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchStart>length) { 2740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remember shortest match from prev for iteration. 2741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t temp=length; 2742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=matchStart; 2743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru matchStart=temp; 2744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Recurse for non-shortest match from prev. 2746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t spanStart=containsSpanBackUTF8(set, s, matchStart, 2747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USET_SPAN_CONTAINED); 2748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanStart<minSpanStart) { 2749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru minSpanStart=spanStart; 2750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(minSpanStart==0) { 2751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* spanCondition==USET_SPAN_SIMPLE */ { 2756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(matchStart<length) { 2757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remember longest match from prev. 2758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=matchStart; 2759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==prev) { 2764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; // No match from prev. 2765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=length)>0); 2767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(prev<minSpanStart) { 2768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return minSpanStart; 2771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// spans to be performed and compared 2776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum { 2777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_UTF16 =1, 2778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_UTF8 =2, 2779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_UTFS =3, 2780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_SET =4, 2782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_COMPLEMENT =8, 2783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_POLARITY =0xc, 2784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_FWD =0x10, 2786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_BACK =0x20, 2787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_DIRS =0x30, 2788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_CONTAINED =0x100, 2790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_SIMPLE =0x200, 2791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_CONDITION =0x300, 2792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_ALL =0x33f 2794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 2795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline USetSpanCondition invertSpanCondition(USetSpanCondition spanCondition, USetSpanCondition contained) { 2797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return spanCondition == USET_SPAN_NOT_CONTAINED ? contained : USET_SPAN_NOT_CONTAINED; 2798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline int32_t slen(const void *s, UBool isUTF16) { 2801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return isUTF16 ? u_strlen((const UChar *)s) : strlen((const char *)s); 2802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Count spans on a string with the method according to type and set the span limits. 2806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set may be the complement of the original. 2807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * When using spanBack() and comparing with span(), use a span condition for the first spanBack() 2808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * according to the expected number of spans. 2809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Sets typeName to an empty string if there is no such type. 2810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns -1 if the span option is filtered out. 2811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 2812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t getSpans(const UnicodeSetWithStrings &set, UBool isComplement, 2813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *s, int32_t length, UBool isUTF16, 2814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t whichSpans, 2815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int type, const char *&typeName, 2816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t limits[], int32_t limitsCapacity, 2817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expectCount) { 2818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSet &realSet(set.getSet()); 2819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start, count; 2820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USetSpanCondition spanCondition, firstSpanCondition, contained; 2821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isForward; 2822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(type<0 || 7<type) { 2824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru typeName=""; 2825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char *const typeNames16[]={ 2829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "contains", "contains(LM)", 2830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "span", "span(LM)", 2831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "containsBack", "containsBack(LM)", 2832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "spanBack", "spanBack(LM)" 2833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 2834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char *const typeNames8[]={ 2836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "containsUTF8", "containsUTF8(LM)", 2837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "spanUTF8", "spanUTF8(LM)", 2838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "containsBackUTF8", "containsBackUTF8(LM)", // not implemented 2839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "spanBackUTF8", "spanBackUTF8(LM)" 2840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 2841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru typeName= isUTF16 ? typeNames16[type] : typeNames8[type]; 2843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // filter span options 2845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(type<=3) { 2846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // span forward 2847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_FWD)==0) { 2848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 2849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isForward=TRUE; 2851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // span backward 2853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_BACK)==0) { 2854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 2855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isForward=FALSE; 2857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((type&1)==0) { 2859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // use USET_SPAN_CONTAINED 2860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_CONTAINED)==0) { 2861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 2862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru contained=USET_SPAN_CONTAINED; 2864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // use USET_SPAN_SIMPLE 2866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_SIMPLE)==0) { 2867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 2868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru contained=USET_SPAN_SIMPLE; 2870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Default first span condition for going forward with an uncomplemented set. 2873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_NOT_CONTAINED; 2874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isComplement) { 2875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=invertSpanCondition(spanCondition, contained); 2876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // First span condition for span(), used to terminate the spanBack() iteration. 2879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru firstSpanCondition=spanCondition; 2880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanBack(): Its initial span condition is span()'s last span condition, 2882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // which is the opposite of span()'s first span condition 2883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if we expect an even number of spans. 2884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (The loop inverts spanCondition (expectCount-1) times 2885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // before the expectCount'th span() call.) 2886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we do not compare forward and backward directions, then we do not have an 2887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // expectCount and just start with firstSpanCondition. 2888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!isForward && (whichSpans&SPAN_FWD)!=0 && (expectCount&1)==0) { 2889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=invertSpanCondition(spanCondition, contained); 2890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=0; 2893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(type) { 2894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0: 2895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: 2896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=0; 2897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 2898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=slen(s, isUTF16); 2899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 2901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start+= isUTF16 ? containsSpanUTF16(set, (const UChar *)s+start, length-start, spanCondition) : 2902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru containsSpanUTF8(set, (const char *)s+start, length-start, spanCondition); 2903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count<limitsCapacity) { 2904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limits[count]=start; 2905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++count; 2907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(start>=length) { 2908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=invertSpanCondition(spanCondition, contained); 2911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: 2914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: 2915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=0; 2916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 2917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start+= isUTF16 ? realSet.span((const UChar *)s+start, length>=0 ? length-start : length, spanCondition) : 2918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru realSet.spanUTF8((const char *)s+start, length>=0 ? length-start : length, spanCondition); 2919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count<limitsCapacity) { 2920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limits[count]=start; 2921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++count; 2923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>=0 ? start>=length : 2924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isUTF16 ? ((const UChar *)s)[start]==0 : 2925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((const char *)s)[start]==0 2926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 2927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=invertSpanCondition(spanCondition, contained); 2930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 4: 2933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 5: 2934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 2935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=slen(s, isUTF16); 2936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 2938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++count; 2939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count<=limitsCapacity) { 2940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limits[limitsCapacity-count]=length; 2941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length= isUTF16 ? containsSpanBackUTF16(set, (const UChar *)s, length, spanCondition) : 2943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru containsSpanBackUTF8(set, (const char *)s, length, spanCondition); 2944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==0 && spanCondition==firstSpanCondition) { 2945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=invertSpanCondition(spanCondition, contained); 2948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count<limitsCapacity) { 2950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru memmove(limits, limits+(limitsCapacity-count), count*4); 2951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 6: 2954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 7: 2955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 2956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++count; 2957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count<=limitsCapacity) { 2958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limits[limitsCapacity-count]= length >=0 ? length : slen(s, isUTF16); 2959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Note: Length<0 is tested only for the first spanBack(). 2961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we wanted to keep length<0 for all spanBack()s, we would have to 2962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // temporarily modify the string by placing a NUL where the previous spanBack() stopped. 2963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length= isUTF16 ? realSet.spanBack((const UChar *)s, length, spanCondition) : 2964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru realSet.spanBackUTF8((const char *)s, length, spanCondition); 2965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==0 && spanCondition==firstSpanCondition) { 2966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=invertSpanCondition(spanCondition, contained); 2969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count<limitsCapacity) { 2971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru memmove(limits, limits+(limitsCapacity-count), count*4); 2972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 2975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru typeName=""; 2976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 2977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return count; 2980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// sets to be tested; odd index=isComplement 2983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum { 2984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SLOW, 2985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SLOW_NOT, 2986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FAST, 2987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FAST_NOT, 2988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SET_COUNT 2989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 2990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char *const setNames[SET_COUNT]={ 2992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "slow", 2993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "slow.not", 2994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "fast", 2995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "fast.not" 2996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 2997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Verify that we get the same results whether we look at text with contains(), 3000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * span() or spanBack(), using unfrozen or frozen versions of the set, 3001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and using the set or its complement (switching the spanConditions accordingly). 3002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The latter verifies that 3003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * set.span(spanCondition) == set.complement().span(!spanCondition). 3004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 3005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The expectLimits[] are either provided by the caller (with expectCount>=0) 3006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * or returned to the caller (with an input expectCount<0). 3007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 3008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpan(const UnicodeSetWithStrings *sets[4], 3009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *s, int32_t length, UBool isUTF16, 3010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t whichSpans, 3011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expectLimits[], int32_t &expectCount, 3012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *testName, int32_t index) { 3013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t limits[500]; 3014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t limitsCount; 3015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i, j; 3016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *typeName; 3018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int type; 3019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<SET_COUNT; ++i) { 3021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((i&1)==0) { 3022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Even-numbered sets are original, uncomplemented sets. 3023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_SET)==0) { 3024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Odd-numbered sets are complemented. 3028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_COMPLEMENT)==0) { 3029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 3030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(type=0;; ++type) { 3033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limitsCount=getSpans(*sets[i], (UBool)(i&1), 3034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s, length, isUTF16, 3035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans, 3036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru type, typeName, 3037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limits, LENGTHOF(limits), expectCount); 3038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(typeName[0]==0) { 3039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; // All types tried. 3040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(limitsCount<0) { 3042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; // Span option filtered out. 3043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(expectCount<0) { 3045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectCount=limitsCount; 3046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(limitsCount>LENGTHOF(limits)) { 3047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: %s[0x%lx].%s.%s span count=%ld > %ld capacity - too many spans", 3048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)LENGTHOF(limits)); 3049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru memcpy(expectLimits, limits, limitsCount*4); 3052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(limitsCount!=expectCount) { 3053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: %s[0x%lx].%s.%s span count=%ld != %ld", 3054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)expectCount); 3055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0; j<limitsCount; ++j) { 3057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(limits[j]!=expectLimits[j]) { 3058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: %s[0x%lx].%s.%s span count=%ld limits[%d]=%ld != %ld", 3059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testName, (long)index, setNames[i], typeName, (long)limitsCount, 3060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru j, (long)limits[j], (long)expectLimits[j]); 3061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Compare span() with containsAll()/containsNone(), 3069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // but only if we have expectLimits[] from the uncomplemented set. 3070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isUTF16 && (whichSpans&SPAN_SET)!=0) { 3071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s16=(const UChar *)s; 3072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString string; 3073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=0, limit, length; 3074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<expectCount; ++i) { 3075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit=expectLimits[i]; 3076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=limit-prev; 3077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>0) { 3078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru string.setTo(FALSE, s16+prev, length); // read-only alias 3079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i&1) { 3080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!sets[SLOW]->getSet().containsAll(string)) { 3081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: %s[0x%lx].%s.containsAll(%ld..%ld)==FALSE contradicts span()", 3082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testName, (long)index, setNames[SLOW], (long)prev, (long)limit); 3083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!sets[FAST]->getSet().containsAll(string)) { 3086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: %s[0x%lx].%s.containsAll(%ld..%ld)==FALSE contradicts span()", 3087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testName, (long)index, setNames[FAST], (long)prev, (long)limit); 3088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!sets[SLOW]->getSet().containsNone(string)) { 3092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: %s[0x%lx].%s.containsNone(%ld..%ld)==FALSE contradicts span()", 3093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testName, (long)index, setNames[SLOW], (long)prev, (long)limit); 3094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!sets[FAST]->getSet().containsNone(string)) { 3097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: %s[0x%lx].%s.containsNone(%ld..%ld)==FALSE contradicts span()", 3098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testName, (long)index, setNames[FAST], (long)prev, (long)limit); 3099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev=limit; 3104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Specifically test either UTF-16 or UTF-8. 3109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpan(const UnicodeSetWithStrings *sets[4], 3110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *s, int32_t length, UBool isUTF16, 3111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t whichSpans, 3112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *testName, int32_t index) { 3113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expectLimits[500]; 3114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expectCount=-1; 3115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpan(sets, s, length, isUTF16, whichSpans, expectLimits, expectCount, testName, index); 3116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool stringContainsUnpairedSurrogate(const UChar *s, int32_t length) { 3119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c, c2; 3120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>=0) { 3122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(length>0) { 3123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=*s++; 3124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --length; 3125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(0xd800<=c && c<0xe000) { 3126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c>=0xdc00 || length==0 || !U16_IS_TRAIL(c2=*s++)) { 3127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 3128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --length; 3130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((c=*s++)!=0) { 3134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(0xd800<=c && c<0xe000) { 3135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c>=0xdc00 || !U16_IS_TRAIL(c2=*s++)) { 3136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 3137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 3142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test both UTF-16 and UTF-8 versions of span() etc. on the same sets and text, 3145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// unless either UTF is turned off in whichSpans. 3146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Testing UTF-16 and UTF-8 together requires that surrogate code points 3147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// have the same contains(c) value as U+FFFD. 3148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpanBothUTFs(const UnicodeSetWithStrings *sets[4], 3149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s16, int32_t length16, 3150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t whichSpans, 3151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *testName, int32_t index) { 3152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expectLimits[500]; 3153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t expectCount; 3154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectCount=-1; // Get expectLimits[] from testSpan(). 3156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_UTF16)!=0) { 3158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpan(sets, s16, length16, TRUE, whichSpans, expectLimits, expectCount, testName, index); 3159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_UTF8)==0) { 3161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Convert s16[] and expectLimits[] to UTF-8. 3165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t s8[3000]; 3166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t offsets[3000]; 3167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s16Limit=s16+length16; 3169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *t=(char *)s8; 3170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *tLimit=t+sizeof(s8); 3171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *o=offsets; 3172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 3173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Convert with substitution: Turn unpaired surrogates into U+FFFD. 3175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_fromUnicode(openUTF8Converter(), &t, tLimit, &s16, s16Limit, o, TRUE, &errorCode); 3176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 3177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: %s[0x%lx] ucnv_fromUnicode(to UTF-8) fails with %s", 3178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testName, (long)index, u_errorName(errorCode)); 3179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_resetFromUnicode(utf8Cnv); 3180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length8=(int32_t)(t-(char *)s8); 3183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Convert expectLimits[]. 3185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i, j, expect; 3186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=j=0; i<expectCount; ++i) { 3187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expect=expectLimits[i]; 3188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(expect==length16) { 3189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectLimits[i]=length8; 3190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(offsets[j]<expect) { 3192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++j; 3193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectLimits[i]=j; 3195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpan(sets, s8, length8, FALSE, whichSpans, expectLimits, expectCount, testName, index); 3199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32 nextCodePoint(UChar32 c) { 3202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Skip some large and boring ranges. 3203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(c) { 3204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0x3441: 3205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0x4d7f; 3206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0x5100: 3207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0x9f00; 3208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0xb040: 3209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0xd780; 3210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0xe041: 3211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0xf8fe; 3212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0x10100: 3213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0x20000; 3214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0x20041: 3215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0xe0000; 3216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0xe0101: 3217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0x10fffd; 3218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 3219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c+1; 3220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Verify that all implementations represent the same set. 3224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpanContents(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) { 3225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // contains(U+FFFD) is inconsistent with contains(some surrogates), 3226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // or the set contains strings with unpaired surrogates which don't translate to valid UTF-8: 3227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Skip the UTF-8 part of the test - if the string contains surrogates - 3228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // because it is likely to produce a different result. 3229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool inconsistentSurrogates= 3230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (!(sets[0]->getSet().contains(0xfffd) ? 3231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets[0]->getSet().contains(0xd800, 0xdfff) : 3232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets[0]->getSet().containsNone(0xd800, 0xdfff)) || 3233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets[0]->hasStringsWithSurrogates()); 3234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar s[1000]; 3236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length=0; 3237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t localWhichSpans; 3238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c, first; 3240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(first=c=0;; c=nextCodePoint(c)) { 3241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c>0x10ffff || length>(LENGTHOF(s)-U16_MAX_LENGTH)) { 3242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localWhichSpans=whichSpans; 3243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(stringContainsUnpairedSurrogate(s, length) && inconsistentSurrogates) { 3244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru localWhichSpans&=~SPAN_UTF8; 3245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpanBothUTFs(sets, s, length, localWhichSpans, testName, first); 3247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c>0x10ffff) { 3248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=0; 3251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru first=c; 3252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_APPEND_UNSAFE(s, length, c); 3254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test with a particular, interesting string. 3258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Specify length and try NUL-termination. 3259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpanUTF16String(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) { 3260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar s[]={ 3261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x61, 0x62, 0x20, // Latin, space 3262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x3b1, 0x3b2, 0x3b3, // Greek 3263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xd900, // lead surrogate 3264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x3000, 0x30ab, 0x30ad, // wide space, Katakana 3265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xdc05, // trail surrogate 3266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xa0, 0xac00, 0xd7a3, // nbsp, Hangul 3267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xd900, 0xdc05, // unassigned supplementary 3268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xd840, 0xdfff, 0xd860, 0xdffe, // Han supplementary 3269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xd7a4, 0xdc05, 0xd900, 0x2028, // unassigned, surrogates in wrong order, LS 3270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0 // NUL 3271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 3272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_UTF16)==0) { 3274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpan(sets, s, -1, TRUE, (whichSpans&~SPAN_UTF8), testName, 0); 3277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpan(sets, s, LENGTHOF(s)-1, TRUE, (whichSpans&~SPAN_UTF8), testName, 1); 3278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpanUTF8String(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) { 3281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char s[]={ 3282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abc" // Latin 3283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trail byte in lead position */ 3285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\x80" 3286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru " " // space 3288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* truncated multi-byte sequences */ 3290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xd0" 3291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xe0" 3292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xe1" 3293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xed" 3294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xee" 3295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf0" 3296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf1" 3297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf4" 3298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf8" 3299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xfc" 3300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xCE\xB1\xCE\xB2\xCE\xB3" // Greek 3302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trail byte in lead position */ 3304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\x80" 3305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xe0\x80" 3307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xe0\xa0" 3308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xe1\x80" 3309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xed\x80" 3310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xed\xa0" 3311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xee\x80" 3312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf0\x80" 3313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf0\x90" 3314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf1\x80" 3315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf4\x80" 3316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf4\x90" 3317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf8\x80" 3318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xfc\x80" 3319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xE3\x80\x80\xE3\x82\xAB\xE3\x82\xAD" // wide space, Katakana 3321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trail byte in lead position */ 3323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\x80" 3324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf0\x80\x80" 3326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf0\x90\x80" 3327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf1\x80\x80" 3328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf4\x80\x80" 3329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf4\x90\x80" 3330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf8\x80\x80" 3331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xfc\x80\x80" 3332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xC2\xA0\xEA\xB0\x80\xED\x9E\xA3" // nbsp, Hangul 3334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trail byte in lead position */ 3336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\x80" 3337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf8\x80\x80\x80" 3339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xfc\x80\x80\x80" 3340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xF1\x90\x80\x85" // unassigned supplementary 3342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trail byte in lead position */ 3344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\x80" 3345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xfc\x80\x80\x80\x80" 3347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xF0\xA0\x8F\xBF\xF0\xA8\x8F\xBE" // Han supplementary 3349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trail byte in lead position */ 3351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\x80" 3352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* complete sequences but non-shortest forms or out of range etc. */ 3354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xc0\x80" 3355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xe0\x80\x80" 3356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xed\xa0\x80" 3357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf0\x80\x80\x80" 3358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf4\x90\x80\x80" 3359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xf8\x80\x80\x80\x80" 3360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xfc\x80\x80\x80\x80\x80" 3361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xfe" 3362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xff" 3363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* trail byte in lead position */ 3365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\x80" 3366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\xED\x9E\xA4\xE2\x80\xA8" // unassigned, LS, NUL-terminated 3368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 3369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((whichSpans&SPAN_UTF8)==0) { 3371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpan(sets, s, -1, FALSE, (whichSpans&~SPAN_UTF16), testName, 0); 3374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpan(sets, s, LENGTHOF(s)-1, FALSE, (whichSpans&~SPAN_UTF16), testName, 1); 3375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Take a set of span options and multiply them so that 3378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// each portion only has one of the options a, b and c. 3379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// If b==0, then the set of options is just modified with mask and a. 3380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// If b!=0 and c==0, then the set of options is just modified with mask, a and b. 3381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 3382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruaddAlternative(uint32_t whichSpans[], int32_t whichSpansCount, 3383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t mask, uint32_t a, uint32_t b, uint32_t c) { 3384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t s; 3385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 3386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<whichSpansCount; ++i) { 3388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s=whichSpans[i]&mask; 3389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans[i]=s|a; 3390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b!=0) { 3391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans[whichSpansCount+i]=s|b; 3392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c!=0) { 3393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans[2*whichSpansCount+i]=s|c; 3394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return b==0 ? whichSpansCount : c==0 ? 2*whichSpansCount : 3*whichSpansCount; 3398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _63_a "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _64_a "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _63_b "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" 3403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _64_b "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" 3404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestSpan() { 3406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // "[...]" is a UnicodeSet pattern. 3407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // "*" performs tests on all Unicode code points and on a selection of 3408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // malformed UTF-8/16 strings. 3409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // "-options" limits the scope of testing for the current set. 3410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // By default, the test verifies that equivalent boundaries are found 3411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for UTF-16 and UTF-8, going forward and backward, 3412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // alternating USET_SPAN_NOT_CONTAINED with 3413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // either USET_SPAN_CONTAINED or USET_SPAN_SIMPLE. 3414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Single-character options: 3415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 8 -- UTF-16 and UTF-8 boundaries may differ. 3416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Cause: contains(U+FFFD) is inconsistent with contains(some surrogates), 3417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // or the set contains strings with unpaired surrogates 3418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // which do not translate to valid UTF-8. 3419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // c -- set.span() and set.complement().span() boundaries may differ. 3420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Cause: Set strings are not complemented. 3421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // b -- span() and spanBack() boundaries may differ. 3422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Cause: Strings in the set overlap, and spanBack(USET_SPAN_CONTAINED) 3423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and spanBack(USET_SPAN_SIMPLE) are defined to 3424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // match with non-overlapping substrings. 3425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // For example, with a set containing "ab" and "ba", 3426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // span() of "aba" yields boundaries { 0, 2, 3 } 3427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // because the initial "ab" matches from 0 to 2, 3428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // while spanBack() yields boundaries { 0, 1, 3 } 3429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // because the final "ba" matches from 1 to 3. 3430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // l -- USET_SPAN_CONTAINED and USET_SPAN_SIMPLE boundaries may differ. 3431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Cause: Strings in the set overlap, and a longer match may 3432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // require a sequence including non-longest substrings. 3433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // For example, with a set containing "ab", "abc" and "cd", 3434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // span(contained) of "abcd" spans the entire string 3435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // but span(longest match) only spans the first 3 characters. 3436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Each "-options" first resets all options and then applies the specified options. 3437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // A "-" without options resets the options. 3438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The options are also reset for each new set. 3439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Other strings will be spanned. 3440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char *const testdata[]={ 3441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:ID_Continue:]", 3442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "*", 3443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[:White_Space:]", 3444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "*", 3445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[]", 3446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "*", 3447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u0000-\\U0010FFFF]", 3448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "*", 3449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u0000\\u0080\\u0800\\U00010000]", 3450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "*", 3451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u007F\\u07FF\\uFFFF\\U0010FFFF]", 3452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "*", 3453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[[[:ID_Continue:]-[\\u30ab\\u30ad]]{\\u3000\\u30ab}{\\u3000\\u30ab\\u30ad}]", 3454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-c", 3455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "*", 3456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[[[:ID_Continue:]-[\\u30ab\\u30ad]]{\\u30ab\\u30ad}{\\u3000\\u30ab\\u30ad}]", 3457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-c", 3458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "*", 3459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Overlapping strings cause overlapping attempts to match. 3461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[x{xy}{xya}{axy}{ax}]", 3462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-cl", 3463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // More repetitions of "xya" would take too long with the recursive 3465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // reference implementation. 3466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // containsAll()=FALSE 3467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test_string 0x14 3468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" 3469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxya" // set.complement().span(longest match) will stop here. 3470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" // set.complement().span(contained) will stop between the two 'x'es. 3471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxya" 3472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" 3473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxya" // span() ends here. 3474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "aaa", 3475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // containsAll()=TRUE 3477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test_string 0x15 3478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" 3479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxya" 3480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" 3481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxya" 3482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" 3483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxy", 3484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-bc", 3486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test_string 0x17 3487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "byayaxya", // span() -> { 4, 7, 8 } spanBack() -> { 5, 8 } 3488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-c", 3489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "byayaxy", // span() -> { 4, 7 } complement.span() -> { 7 } 3490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "byayax", // span() -> { 4, 6 } complement.span() -> { 6 } 3491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-", 3492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "byaya", // span() -> { 5 } 3493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "byay", // span() -> { 4 } 3494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "bya", // span() -> { 3 } 3495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // span(longest match) will not span the whole string. 3497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[a{ab}{bc}]", 3498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-cl", 3499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test_string 0x21 3500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abc", 3501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[a{ab}{abc}{cd}]", 3503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-cl", 3504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "acdabcdabccd", 3505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // spanBack(longest match) will not span the whole string. 3507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[c{ab}{bc}]", 3508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-cl", 3509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abc", 3510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[d{cd}{bcd}{ab}]", 3512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-cl", 3513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "abbcdabcdabd", 3514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test with non-ASCII set strings - test proper handling of surrogate pairs 3516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and UTF-8 trail bytes. 3517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Copies of above test sets and strings, but transliterated to have 3518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // different code points with similar trail units. 3519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Previous: a b c d 3520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Unicode: 042B 30AB 200AB 204AB 3521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // UTF-16: 042B 30AB D840 DCAB D841 DCAB 3522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // UTF-8: D0 AB E3 82 AB F0 A0 82 AB F0 A0 92 AB 3523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\u042B{\\u042B\\u30AB}{\\u042B\\u30AB\\U000200AB}{\\U000200AB\\U000204AB}]", 3524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-cl", 3525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u042B\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000200AB\\U000204AB", 3526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[\\U000204AB{\\U000200AB\\U000204AB}{\\u30AB\\U000200AB\\U000204AB}{\\u042B\\u30AB}]", 3528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-cl", 3529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u042B\\u30AB\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000204AB", 3530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Stress bookkeeping and recursion. 3532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The following strings are barely doable with the recursive 3533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // reference implementation. 3534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The not-contained character at the end prevents an early exit from the span(). 3535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[b{bb}]", 3536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-c", 3537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test_string 0x33 3538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "bbbbbbbbbbbbbbbbbbbbbbbb-", 3539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // On complement sets, span() and spanBack() get different results 3540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // because b is not in the complement set and there is an odd number of b's 3541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // in the test string. 3542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-bc", 3543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "bbbbbbbbbbbbbbbbbbbbbbbbb-", 3544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test with set strings with an initial or final code point span 3546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // longer than 254. 3547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[a{" _64_a _64_a _64_a _64_a "b}" 3548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "{a" _64_b _64_b _64_b _64_b "}]", 3549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-c", 3550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _64_a _64_a _64_a _63_a "b", 3551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _64_a _64_a _64_a _64_a "b", 3552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _64_a _64_a _64_a _64_a "aaaabbbb", 3553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "a" _64_b _64_b _64_b _63_b, 3554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "a" _64_b _64_b _64_b _64_b, 3555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "aaaabbbb" _64_b _64_b _64_b _64_b, 3556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Test with strings containing unpaired surrogates. 3558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // They are not representable in UTF-8, and a leading trail surrogate 3559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and a trailing lead surrogate must not match in the middle of a proper surrogate pair. 3560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // U+20001 == \\uD840\\uDC01 3561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // U+20400 == \\uD841\\uDC00 3562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "[a\\U00020001\\U00020400{ab}{b\\uD840}{\\uDC00a}]", 3563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "-8cl", 3564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "aaab\\U00020001ba\\U00020400aba\\uD840ab\\uD840\\U00020000b\\U00020000a\\U00020000\\uDC00a\\uDC00babbb" 3565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 3566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t whichSpans[96]={ SPAN_ALL }; 3567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t whichSpansCount=1; 3568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *sets[SET_COUNT]={ NULL }; 3570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeSetWithStrings *sets_with_str[SET_COUNT]={ NULL }; 3571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char testName[1024]; 3573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *testNameLimit=testName; 3574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i, j; 3576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<LENGTHOF(testdata); ++i) { 3577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *s=testdata[i]; 3578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s[0]=='[') { 3579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Create new test sets from this pattern. 3580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0; j<SET_COUNT; ++j) { 3581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete sets_with_str[j]; 3582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete sets[j]; 3583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 3585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets[SLOW]=new UnicodeSet(UnicodeString(s, -1, US_INV).unescape(), errorCode); 3586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 3587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: Unable to create UnicodeSet(%s) - %s", s, u_errorName(errorCode)); 3588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets[SLOW_NOT]=new UnicodeSet(*sets[SLOW]); 3591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets[SLOW_NOT]->complement(); 3592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Intermediate set: Test cloning of a frozen set. 3593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fast=new UnicodeSet(*sets[SLOW]); 3594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fast->freeze(); 3595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets[FAST]=(UnicodeSet *)fast->clone(); 3596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fast; 3597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *fastNot=new UnicodeSet(*sets[SLOW_NOT]); 3598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fastNot->freeze(); 3599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets[FAST_NOT]=(UnicodeSet *)fastNot->clone(); 3600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete fastNot; 3601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0; j<SET_COUNT; ++j) { 3603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sets_with_str[j]=new UnicodeSetWithStrings(*sets[j]); 3604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(testName, s); 3607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testNameLimit=strchr(testName, 0); 3608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *testNameLimit++=':'; 3609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *testNameLimit=0; 3610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans[0]=SPAN_ALL; 3612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpansCount=1; 3613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(s[0]=='-') { 3614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans[0]=SPAN_ALL; 3615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpansCount=1; 3616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(*++s!=0) { 3618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(*s) { 3619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 'c': 3620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpansCount=addAlternative(whichSpans, whichSpansCount, 3621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ~SPAN_POLARITY, 3622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_SET, 3623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_COMPLEMENT, 3624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0); 3625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 'b': 3627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpansCount=addAlternative(whichSpans, whichSpansCount, 3628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ~SPAN_DIRS, 3629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_FWD, 3630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_BACK, 3631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0); 3632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 'l': 3634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test USET_SPAN_CONTAINED FWD & BACK, and separately 3635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // USET_SPAN_SIMPLE only FWD, and separately 3636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // USET_SPAN_SIMPLE only BACK 3637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpansCount=addAlternative(whichSpans, whichSpansCount, 3638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ~(SPAN_DIRS|SPAN_CONDITION), 3639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_DIRS|SPAN_CONTAINED, 3640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_FWD|SPAN_SIMPLE, 3641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_BACK|SPAN_SIMPLE); 3642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case '8': 3644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpansCount=addAlternative(whichSpans, whichSpansCount, 3645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ~SPAN_UTFS, 3646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_UTF16, 3647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPAN_UTF8, 3648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0); 3649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 3651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: unrecognized span set option in \"%s\"", testdata[i]); 3652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 3653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(0==strcmp(s, "*")) { 3656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(testNameLimit, "bad_string"); 3657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0; j<whichSpansCount; ++j) { 3658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(whichSpansCount>1) { 3659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sprintf(testNameLimit+10 /* strlen("bad_string") */, 3660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "%%0x%3x", 3661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans[j]); 3662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpanUTF16String(sets_with_str, whichSpans[j], testName); 3664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpanUTF8String(sets_with_str, whichSpans[j], testName); 3665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(testNameLimit, "contents"); 3668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0; j<whichSpansCount; ++j) { 3669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(whichSpansCount>1) { 3670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sprintf(testNameLimit+8 /* strlen("contents") */, 3671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "%%0x%3x", 3672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans[j]); 3673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpanContents(sets_with_str, whichSpans[j], testName); 3675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 3677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString string=UnicodeString(s, -1, US_INV).unescape(); 3678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(testNameLimit, "test_string"); 3679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0; j<whichSpansCount; ++j) { 3680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(whichSpansCount>1) { 3681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sprintf(testNameLimit+11 /* strlen("test_string") */, 3682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "%%0x%3x", 3683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru whichSpans[j]); 3684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru testSpanBothUTFs(sets_with_str, string.getBuffer(), string.length(), whichSpans[j], testName, i); 3686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0; j<SET_COUNT; ++j) { 3690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete sets_with_str[j]; 3691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete sets[j]; 3692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test select patterns and strings, and test USET_SPAN_SIMPLE. 3696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestStringSpan() { 3697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char *pattern="[x{xy}{xya}{axy}{ax}]"; 3698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char *const string= 3699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" 3700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxya" 3701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" 3702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxya" 3703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xx" 3704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxy" 3705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "aaaa"; 3706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 3708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pattern16=UnicodeString(pattern, -1, US_INV); 3709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set(pattern16, errorCode); 3710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 3711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode)); 3712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString string16=UnicodeString(string, -1, US_INV).unescape(); 3716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(set.containsAll(string16)) { 3718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet(%s).containsAll(%s) should be FALSE", pattern, string); 3719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remove trailing "aaaa". 3722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru string16.truncate(string16.length()-4); 3723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!set.containsAll(string16)) { 3724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet(%s).containsAll(%s[:-4]) should be TRUE", pattern, string); 3725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru string16=UNICODE_STRING_SIMPLE("byayaxya"); 3728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s16=string16.getBuffer(); 3729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length16=string16.length(); 3730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( set.span(s16, 8, USET_SPAN_NOT_CONTAINED)!=4 || 3731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.span(s16, 7, USET_SPAN_NOT_CONTAINED)!=4 || 3732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.span(s16, 6, USET_SPAN_NOT_CONTAINED)!=4 || 3733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.span(s16, 5, USET_SPAN_NOT_CONTAINED)!=5 || 3734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.span(s16, 4, USET_SPAN_NOT_CONTAINED)!=4 || 3735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.span(s16, 3, USET_SPAN_NOT_CONTAINED)!=3 3736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 3737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet(%s).span(while not) returns the wrong value", pattern); 3738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pattern="[a{ab}{abc}{cd}]"; 3741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pattern16=UnicodeString(pattern, -1, US_INV); 3742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern(pattern16, errorCode); 3743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 3744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode)); 3745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru string16=UNICODE_STRING_SIMPLE("acdabcdabccd"); 3748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s16=string16.getBuffer(); 3749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length16=string16.length(); 3750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( set.span(s16, 12, USET_SPAN_CONTAINED)!=12 || 3751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.span(s16, 12, USET_SPAN_SIMPLE)!=6 || 3752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.span(s16+7, 5, USET_SPAN_SIMPLE)!=5 3753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 3754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet(%s).span(while longest match) returns the wrong value", pattern); 3755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pattern="[d{cd}{bcd}{ab}]"; 3758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pattern16=UnicodeString(pattern, -1, US_INV); 3759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.applyPattern(pattern16, errorCode).freeze(); 3760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 3761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode)); 3762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 3763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru string16=UNICODE_STRING_SIMPLE("abbcdabcdabd"); 3765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s16=string16.getBuffer(); 3766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length16=string16.length(); 3767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( set.spanBack(s16, 12, USET_SPAN_CONTAINED)!=0 || 3768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.spanBack(s16, 12, USET_SPAN_SIMPLE)!=6 || 3769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.spanBack(s16, 5, USET_SPAN_SIMPLE)!=0 3770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 3771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: UnicodeSet(%s).spanBack(while longest match) returns the wrong value", pattern); 3772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 3773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 3774