1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru********************************************************************************
354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius*   Copyright (C) 1999-2012 International Business Machines Corporation and
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   others. All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru********************************************************************************
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Date        Name        Description
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   10/20/99    alan        Creation.
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   03/22/2000  Madhu       Added additional tests
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru********************************************************************************
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h>
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h>
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "usettest.h"
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h"
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h"
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h"
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/usetiter.h"
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h"
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/parsepos.h"
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/symtable.h"
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uversion.h"
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "hash.h"
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
306d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru    dataerrln("fail in file \"%s\", line %d: \"%s\"", __FILE__, __LINE__, \
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    u_errorName(status));}}
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define TEST_ASSERT(expr) {if (!(expr)) { \
346d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru    dataerrln("fail in file \"%s\", line %d", __FILE__, __LINE__); }}
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) {
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat;
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.toPattern(pat);
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return left + UnicodeSetTest::escape(pat);
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define CASE(id,test) case id:                          \
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          name = #test;                 \
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          if (exec) {                   \
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              logln(#test "---");       \
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              logln();                  \
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              test();                   \
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          }                             \
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          break
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::UnicodeSetTest() : utf8Cnv(NULL) {
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUConverter *UnicodeSetTest::openUTF8Converter() {
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(utf8Cnv==NULL) {
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode errorCode=U_ZERO_ERROR;
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        utf8Cnv=ucnv_open("UTF-8", &errorCode);
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return utf8Cnv;
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::~UnicodeSetTest() {
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_close(utf8Cnv);
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::runIndexedTest(int32_t index, UBool exec,
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               const char* &name, char* /*par*/) {
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // if (exec) logln((UnicodeString)"TestSuite UnicodeSetTest");
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch (index) {
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(0,TestPatterns);
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(1,TestAddRemove);
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(2,TestCategories);
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(3,TestCloneEqualHash);
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(4,TestMinimalRep);
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(5,TestAPI);
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(6,TestScriptSet);
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(7,TestPropertySet);
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(8,TestClone);
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(9,TestExhaustive);
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(10,TestToPattern);
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(11,TestIndexOf);
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(12,TestStrings);
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(13,Testj2268);
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(14,TestCloseOver);
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(15,TestEscapePattern);
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(16,TestInvalidCodePoint);
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(17,TestSymbolTable);
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(18,TestSurrogate);
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(19,TestPosixClasses);
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(20,TestIteration);
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(21,TestFreezable);
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(22,TestSpan);
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(23,TestStringSpan);
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        default: name = ""; break;
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char NOT[] = "%%%%";
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * UVector was improperly copying contents
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This code will crash this is still true
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::Testj2268() {
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UnicodeSet t;
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  t.add(UnicodeString("abc"));
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UnicodeSet test(t);
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UnicodeString ustrPat;
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  test.toPattern(ustrPat, TRUE);
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test toPattern().
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestToPattern() {
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Test that toPattern() round trips with syntax characters and
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // whitespace.
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        static const char* OTHER_TOPATTERN_TESTS[] = {
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "[[:latin:]&[:greek:]]",
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "[[:latin:]-[:greek:]]",
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "[:nonspacing mark:]",
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            NULL
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        };
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (int32_t j=0; OTHER_TOPATTERN_TESTS[j]!=NULL; ++j) {
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ec = U_ZERO_ERROR;
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeSet s(OTHER_TOPATTERN_TESTS[j], ec);
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) {
1336d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru                dataerrln((UnicodeString)"FAIL: bad pattern " + OTHER_TOPATTERN_TESTS[j] + " - " + UnicodeString(u_errorName(ec)));
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            checkPat(OTHER_TOPATTERN_TESTS[j], s);
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (UChar32 i = 0; i <= 0x10FFFF; ++i) {
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if ((i <= 0xFF && !u_isalpha(i)) || u_isspace(i)) {
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // check various combinations to make sure they all work.
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (i != 0 && !toPatternAux(i, i)){
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (!toPatternAux(0, i)){
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (!toPatternAux(i, 0xFFFF)){
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Test pattern behavior of multicharacter strings.
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ec = U_ZERO_ERROR;
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet* s = new UnicodeSet("[a-z {aa} {ab}]", ec);
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // This loop isn't a loop.  It's here to make the compiler happy.
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // If you're curious, try removing it and changing the 'break'
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // statements (except for the last) to goto's.
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (;;) {
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) break;
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char* exp1[] = {"aa", "ab", NOT, "ac", NULL};
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            expectToPattern(*s, "[a-z{aa}{ab}]", exp1);
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s->add("ac");
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char* exp2[] = {"aa", "ab", "ac", NOT, "xy", NULL};
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            expectToPattern(*s, "[a-z{aa}{ab}{ac}]", exp2);
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\{l} {r\\}}]"), ec);
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) break;
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char* exp3[] = {"{l", "r}", NOT, "xy", NULL};
176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{r\\}}{\\{l}]"), exp3);
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s->add("[]");
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char* exp4[] = {"{l", "r}", "[]", NOT, "xy", NULL};
180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\[\\]}{r\\}}{\\{l}]"), exp4);
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\u4E01\\u4E02}{\\n\\r}]"), ec);
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) break;
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char* exp5[] = {"\\u4E01\\u4E02", "\n\r", NULL};
185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]"), exp5);
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // j2189
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s->clear();
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s->add(UnicodeString("abc", ""));
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s->add(UnicodeString("abc", ""));
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char* exp6[] = {"abc", NOT, "ab", NULL};
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            expectToPattern(*s, "[{abc}]", exp6);
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) errln("FAIL: pattern parse error");
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete s;
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // JB#3400: For 2 character ranges prefer [ab] to [a-b]
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet s;
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s.add((UChar)97, (UChar)98); // 'a', 'b'
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectToPattern(s, "[ab]", NULL);
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSetTest::toPatternAux(UChar32 start, UChar32 end) {
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // use Integer.toString because Utility.hex doesn't handle ints
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat = "";
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // TODO do these in hex
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //String source = "0x" + Integer.toString(start,16).toUpperCase();
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //if (start != end) source += "..0x" + Integer.toString(end,16).toUpperCase();
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString source;
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    source = source + (uint32_t)start;
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (start != end)
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        source = source + ".." + (uint32_t)end;
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet testSet;
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSet.add(start, end);
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return checkPat(source, testSet);
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSetTest::checkPat(const UnicodeString& source,
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               const UnicodeSet& testSet) {
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // What we want to make sure of is that a pattern generated
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // by toPattern(), with or without escaped unprintables, can
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // be passed back into the UnicodeSet constructor.
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat0;
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSet.toPattern(pat0, TRUE);
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!checkPat(source + " (escaped)", testSet, pat0)) return FALSE;
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //String pat1 = unescapeLeniently(pat0);
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //if (!checkPat(source + " (in code)", testSet, pat1)) return false;
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat2;
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSet.toPattern(pat2, FALSE);
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!checkPat(source, testSet, pat2)) return FALSE;
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //String pat3 = unescapeLeniently(pat2);
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // if (!checkPat(source + " (in code)", testSet, pat3)) return false;
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //logln(source + " => " + pat0 + ", " + pat1 + ", " + pat2 + ", " + pat3);
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    logln((UnicodeString)source + " => " + pat0 + ", " + pat2);
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSetTest::checkPat(const UnicodeString& source,
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               const UnicodeSet& testSet,
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               const UnicodeString& pat) {
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet testSet2(pat, ec);
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (testSet2 != testSet) {
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"Fail toPattern: " + source + " => " + pat);
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::TestPatterns(void) {
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set;
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, UnicodeString("[[a-m]&[d-z]&[k-y]]", ""),  "km");
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, UnicodeString("[[a-z]-[m-y]-[d-r]]", ""),  "aczz");
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, UnicodeString("[a\\-z]", ""),  "--aazz");
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, UnicodeString("[-az]", ""),  "--aazz");
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, UnicodeString("[az-]", ""),  "--aazz");
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, UnicodeString("[[[a-z]-[aeiou]i]]", ""), "bdfnptvz");
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Throw in a test of complement
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.complement();
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString exp;
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.append((UChar)0x0000).append("aeeoouu").append((UChar)(0x007a+1)).append((UChar)0xFFFF);
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, exp);
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::TestCategories(void) {
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char* pat = " [:Lu:] "; // Whitespace ok outside [:..:]
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set(pat, status);
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
2846d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru        dataerrln((UnicodeString)"Fail: Can't construct set with " + pat + " - " + UnicodeString(u_errorName(status)));
2856d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru        return;
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectContainment(set, pat, "ABC", "abc");
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 i;
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t failures = 0;
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Make sure generation of L doesn't pollute cached Lu set
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // First generate L, then Lu
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern("[:L:]", status);
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<0x200; ++i) {
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool l = u_isalpha((UChar)i);
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (l != set.contains(i)) {
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL: L contains " + (unsigned short)i + " = " +
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  set.contains(i));
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (++failures == 10) break;
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern("[:Lu:]", status);
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<0x200; ++i) {
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool lu = (u_charType((UChar)i) == U_UPPERCASE_LETTER);
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (lu != set.contains(i)) {
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL: Lu contains " + (unsigned short)i + " = " +
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  set.contains(i));
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (++failures == 20) break;
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::TestCloneEqualHash(void) {
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // set1 and set2 used to be built with the obsolete constructor taking
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // UCharCategory values; replaced with pattern constructors
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // markus 20030502
322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeSet *set1=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Lowercase Letter}"), status); //  :Ll: Letter, lowercase
323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeSet *set1a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Ll:]"), status); //  Letter, lowercase
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)){
3256d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru        dataerrln((UnicodeString)"FAIL: Can't construst set with category->Ll" + " - " + UnicodeString(u_errorName(status)));
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeSet *set2=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Decimal Number}"), status);   //Number, Decimal digit
329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeSet *set2a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Nd:]"), status);   //Number, Decimal digit
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)){
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: Can't construct set with category->Nd");
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (*set1 != *set1a) {
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: category constructor for Ll broken");
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (*set2 != *set2a) {
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: category constructor for Nd broken");
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete set1a;
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete set2a;
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    logln("Testing copy construction");
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *set1copy=new UnicodeSet(*set1);
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(*set1 != *set1copy || *set1 == *set2 ||
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        getPairs(*set1) != getPairs(*set1copy) ||
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set1->hashCode() != set1copy->hashCode()){
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL : Error in copy construction");
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    logln("Testing =operator");
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set1equal=*set1;
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set2equal=*set2;
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(set1equal != *set1 || set1equal != *set1copy || set2equal != *set2 ||
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set2equal == *set1 || set2equal == *set1copy || set2equal == set1equal){
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: Error in =operator");
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    logln("Testing clone()");
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *set1clone=(UnicodeSet*)set1->clone();
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *set2clone=(UnicodeSet*)set2->clone();
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(*set1clone != *set1 || *set1clone != *set1copy || *set1clone != set1equal ||
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *set2clone != *set2 || *set2clone == *set1copy || *set2clone != set2equal ||
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *set2clone == *set1 || *set2clone == set1equal || *set2clone == *set1clone){
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: Error in clone");
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    logln("Testing hashcode");
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(set1->hashCode() != set1equal.hashCode() || set1->hashCode() != set1clone->hashCode() ||
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set2->hashCode() != set2equal.hashCode() || set2->hashCode() != set2clone->hashCode() ||
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set1copy->hashCode() != set1equal.hashCode() || set1copy->hashCode() != set1clone->hashCode() ||
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set1->hashCode() == set2->hashCode()  || set1copy->hashCode() == set2->hashCode() ||
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set2->hashCode() == set1clone->hashCode() || set2->hashCode() == set1equal.hashCode() ){
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: Error in hashCode()");
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete set1;
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete set1copy;
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete set2;
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete set1clone;
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete set2clone;
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::TestAddRemove(void) {
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set; // Construct empty set
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.isEmpty() == TRUE, "set should be empty");
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 0, "size should be 0");
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.complement();
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 0x110000, "size should be 0x110000");
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.clear();
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0061, 0x007a);
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "az");
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.isEmpty() == FALSE, "set should not be empty");
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() != 0, "size should not be equal to 0");
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 26, "size should be equal to 26");
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.remove(0x006d, 0x0070);
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "alqz");
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 22, "size should be equal to 22");
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.remove(0x0065, 0x0067);
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "adhlqz");
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 19, "size should be equal to 19");
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.remove(0x0064, 0x0069);
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "acjlqz");
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 16, "size should be equal to 16");
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.remove(0x0063, 0x0072);
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "absz");
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 10, "size should be equal to 10");
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0066, 0x0071);
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "abfqsz");
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 22, "size should be equal to 22");
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.remove(0x0061, 0x0067);
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "hqsz");
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.remove(0x0061, 0x007a);
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "");
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.isEmpty() == TRUE, "set should be empty");
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 0, "size should be 0");
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0061);
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.isEmpty() == FALSE, "set should not be empty");
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 1, "size should not be equal to 1");
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0062);
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0063);
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "ac");
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 3, "size should not be equal to 3");
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0070);
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0071);
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "acpq");
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 5, "size should not be equal to 5");
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.clear();
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "");
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.isEmpty() == TRUE, "set should be empty");
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 0, "size should be 0");
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Try removing an entire set from another set
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, "[c-x]", "cx");
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set2;
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set2, "[f-ky-za-bc[vw]]", "acfkvwyz");
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.removeAll(set2);
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "deluxx");
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Try adding an entire set to another set
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, "[jackiemclean]", "aacceein");
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set2, "[hitoshinamekatajamesanderson]", "aadehkmort");
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(set2);
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "aacehort");
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.containsAll(set2) == TRUE, "set should contain all the elements in set2");
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Try retaining an set of elements contained in another set (intersection)
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set3;
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set3, "[a-c]", "ac");
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.containsAll(set3) == FALSE, "set doesn't contain all the elements in set3");
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set3.remove(0x0062);
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set3, "aacc");
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.containsAll(set3) == TRUE, "set should contain all the elements in set3");
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.retainAll(set3);
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "aacc");
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == set3.size(), "set.size() should be set3.size()");
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.containsAll(set3) == TRUE, "set should contain all the elements in set3");
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.clear();
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() != set3.size(), "set.size() != set3.size()");
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Test commutativity
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, "[hitoshinamekatajamesanderson]", "aadehkmort");
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set2, "[jackiemclean]", "aacceein");
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(set2);
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "aacehort");
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.containsAll(set2) == TRUE, "set should contain all the elements in set2");
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Make sure minimal representation is maintained.
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestMinimalRep() {
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // This is pretty thoroughly tested by checkCanonicalRep()
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // run against the exhaustive operation results.  Use the code
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // here for debugging specific spot problems.
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // 1 overlap against 2
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set("[h-km-q]", status);
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set2("[i-o]", status);
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(set2);
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "hq");
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // right
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern("[a-m]", status);
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set2.applyPattern("[e-o]", status);
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(set2);
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "ao");
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // left
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern("[e-o]", status);
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set2.applyPattern("[a-m]", status);
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(set2);
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "ao");
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // 1 overlap against 3
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern("[a-eg-mo-w]", status);
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set2.applyPattern("[d-q]", status);
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(set2);
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "aw");
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestAPI() {
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // default ct
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set;
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!set.isEmpty() || set.getRangeCount() != 0) {
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL, set should be empty but isn't: " +
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              set);
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // clear(), isEmpty()
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0061);
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set.isEmpty()) {
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL, set shouldn't be empty but is: " +
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              set);
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.clear();
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!set.isEmpty()) {
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL, set should be empty but isn't: " +
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              set);
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // size()
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.clear();
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set.size() != 0) {
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL, size should be 0, but is " + set.size() +
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              ": " + set);
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0061);
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set.size() != 1) {
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL, size should be 1, but is " + set.size() +
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              ": " + set);
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0031, 0x0039);
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set.size() != 10) {
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL, size should be 10, but is " + set.size() +
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              ": " + set);
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // contains(first, last)
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.clear();
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern("[A-Y 1-8 b-d l-y]", status);
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i<set.getRangeCount(); ++i) {
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 a = set.getRangeStart(i);
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 b = set.getRangeEnd(i);
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!set.contains(a, b)) {
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL, should contain " + (unsigned short)a + '-' + (unsigned short)b +
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  " but doesn't: " + set);
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (set.contains((UChar32)(a-1), b)) {
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL, shouldn't contain " +
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  (unsigned short)(a-1) + '-' + (unsigned short)b +
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  " but does: " + set);
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (set.contains(a, (UChar32)(b+1))) {
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL, shouldn't contain " +
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  (unsigned short)a + '-' + (unsigned short)(b+1) +
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  " but does: " + set);
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Ported InversionList test.
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet a((UChar32)3,(UChar32)10);
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet b((UChar32)7,(UChar32)15);
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet c;
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    logln((UnicodeString)"a [3-10]: " + a);
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    logln((UnicodeString)"b [7-15]: " + b);
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    c = a;
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    c.addAll(b);
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet exp((UChar32)3,(UChar32)15);
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c == exp) {
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"c.set(a).add(b): " + c);
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: c.set(a).add(b) = " + c + ", expect " + exp);
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    c.complement();
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.set((UChar32)0, (UChar32)2);
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.add((UChar32)16, UnicodeSet::MAX_VALUE);
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c == exp) {
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"c.complement(): " + c);
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: c.complement() = " + c + ", expect " + exp);
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    c.complement();
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.set((UChar32)3, (UChar32)15);
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c == exp) {
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"c.complement(): " + c);
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: c.complement() = " + c + ", expect " + exp);
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    c = a;
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    c.complementAll(b);
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.set((UChar32)3,(UChar32)6);
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.add((UChar32)11,(UChar32) 15);
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c == exp) {
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"c.set(a).exclusiveOr(b): " + c);
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: c.set(a).exclusiveOr(b) = " + c + ", expect " + exp);
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp = c;
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(setToBits(c), c);
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c == exp) {
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"bitsToSet(setToBits(c)): " + c);
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: bitsToSet(setToBits(c)) = " + c + ", expect " + exp);
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Additional tests for coverage JB#2118
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::complement(class UnicodeString const &)
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::complementAll(class UnicodeString const &)
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::containsNone(class UnicodeSet const &)
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::containsNone(long,long)
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::containsSome(class UnicodeSet const &)
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::containsSome(long,long)
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::removeAll(class UnicodeString const &)
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::retain(long)
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::retainAll(class UnicodeString const &)
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::serialize(unsigned short *,long,enum UErrorCode &)
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSetIterator::getString(void)
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.clear();
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.complement("ab");
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.applyPattern("[{ab}]", status);
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set != exp) { errln("FAIL: complement(\"ab\")"); return; }
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSetIterator iset(set);
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!iset.next() || !iset.isString()) {
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSetIterator::next/isString");
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if (iset.getString() != "ab") {
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSetIterator::getString");
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add((UChar32)0x61, (UChar32)0x7A);
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.complementAll("alan");
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.applyPattern("[{ab}b-kmo-z]", status);
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set != exp) { errln("FAIL: complementAll(\"alan\")"); return; }
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.applyPattern("[a-z]", status);
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); }
659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); }
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.applyPattern("[aln]", status);
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); }
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); }
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set.containsNone((UChar32)0x61, (UChar32)0x7A)) {
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: containsNone(UChar32, UChar32)");
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!set.containsSome((UChar32)0x61, (UChar32)0x7A)) {
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: containsSome(UChar32, UChar32)");
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!set.containsNone((UChar32)0x41, (UChar32)0x5A)) {
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: containsNone(UChar32, UChar32)");
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set.containsSome((UChar32)0x41, (UChar32)0x5A)) {
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: containsSome(UChar32, UChar32)");
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.removeAll("liu");
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.applyPattern("[{ab}b-hj-kmo-tv-z]", status);
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set != exp) { errln("FAIL: removeAll(\"liu\")"); return; }
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.retainAll("star");
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.applyPattern("[rst]", status);
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set != exp) { errln("FAIL: retainAll(\"star\")"); return; }
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.retain((UChar32)0x73);
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.applyPattern("[s]", status);
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set != exp) { errln("FAIL: retain('s')"); return; }
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint16_t buf[32];
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t slen = set.serialize(buf, sizeof(buf)/sizeof(buf[0]), status);
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL: serialize"); return; }
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (slen != 3 || buf[0] != 2 || buf[1] != 0x73 || buf[2] != 0x74) {
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: serialize");
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
700b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
701b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    // Conversions to and from USet
702b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UnicodeSet *uniset = &set;
703b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    USet *uset = uniset->toUSet();
704b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_ASSERT((void *)uset == (void *)uniset);
705b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UnicodeSet *setx = UnicodeSet::fromUSet(uset);
706b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_ASSERT((void *)setx == (void *)uset);
707b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const UnicodeSet *constSet = uniset;
708b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const USet *constUSet = constSet->toUSet();
709b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_ASSERT((void *)constUSet == (void *)constSet);
710b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const UnicodeSet *constSetx = UnicodeSet::fromUSet(constUSet);
711b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    TEST_ASSERT((void *)constSetx == (void *)constUSet);
71250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
71350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // span(UnicodeString) and spanBack(UnicodeString) convenience methods
71450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString longString=UNICODE_STRING_SIMPLE("aaaaaaaaaabbbbbbbbbbcccccccccc");
71550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeSet ac(0x61, 0x63);
71650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ac.remove(0x62).freeze();
71750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( ac.span(longString, -5, USET_SPAN_CONTAINED)!=10 ||
71850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.span(longString, 0, USET_SPAN_CONTAINED)!=10 ||
71950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.span(longString, 5, USET_SPAN_CONTAINED)!=10 ||
72050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.span(longString, 10, USET_SPAN_CONTAINED)!=10 ||
72150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.span(longString, 15, USET_SPAN_CONTAINED)!=15 ||
72250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.span(longString, 20, USET_SPAN_CONTAINED)!=30 ||
72350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.span(longString, 25, USET_SPAN_CONTAINED)!=30 ||
72450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.span(longString, 30, USET_SPAN_CONTAINED)!=30 ||
72550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.span(longString, 35, USET_SPAN_CONTAINED)!=30 ||
72650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.span(longString, INT32_MAX, USET_SPAN_CONTAINED)!=30
72750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ) {
72850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("UnicodeSet.span(UnicodeString, ...) returns incorrect end indexes");
72950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
73050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( ac.spanBack(longString, -5, USET_SPAN_CONTAINED)!=0 ||
73150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.spanBack(longString, 0, USET_SPAN_CONTAINED)!=0 ||
73250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.spanBack(longString, 5, USET_SPAN_CONTAINED)!=0 ||
73350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.spanBack(longString, 10, USET_SPAN_CONTAINED)!=0 ||
73450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.spanBack(longString, 15, USET_SPAN_CONTAINED)!=15 ||
73550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.spanBack(longString, 20, USET_SPAN_CONTAINED)!=20 ||
73650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.spanBack(longString, 25, USET_SPAN_CONTAINED)!=20 ||
73750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.spanBack(longString, 30, USET_SPAN_CONTAINED)!=20 ||
73850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.spanBack(longString, 35, USET_SPAN_CONTAINED)!=20 ||
73950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ac.spanBack(longString, INT32_MAX, USET_SPAN_CONTAINED)!=20
74050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ) {
74150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errln("UnicodeSet.spanBack(UnicodeString, ...) returns incorrect start indexes");
74250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestIteration() {
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int i = 0;
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int outerLoop;
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // 6 code points, 3 ranges, 2 strings, 8 total elements
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   Iteration will access them in sorted order -  a, b, c, y, z, U0001abcd, "str1", "str2"
752c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeSet set(UNICODE_STRING_SIMPLE("[zabyc\\U0001abcd{str1}{str2}]"), ec);
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    TEST_ASSERT_SUCCESS(ec);
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSetIterator it(set);
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (outerLoop=0; outerLoop<3; outerLoop++) {
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Run the test multiple times, to check that iterator.reset() is working.
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i=0; i<10; i++) {
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UBool         nextv        = it.next();
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UBool         isString     = it.isString();
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t       codePoint    = it.getCodepoint();
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //int32_t       codePointEnd = it.getCodepointEnd();
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString s   = it.getString();
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            switch (i) {
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 0:
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == TRUE);
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(isString == FALSE);
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(codePoint==0x61);
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(s == "a");
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 1:
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == TRUE);
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(isString == FALSE);
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(codePoint==0x62);
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(s == "b");
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 2:
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == TRUE);
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(isString == FALSE);
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(codePoint==0x63);
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(s == "c");
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 3:
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == TRUE);
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(isString == FALSE);
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(codePoint==0x79);
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(s == "y");
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 4:
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == TRUE);
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(isString == FALSE);
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(codePoint==0x7a);
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(s == "z");
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 5:
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == TRUE);
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(isString == FALSE);
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(codePoint==0x1abcd);
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(s == UnicodeString((UChar32)0x1abcd));
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 6:
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == TRUE);
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(isString == TRUE);
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(s == "str1");
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 7:
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == TRUE);
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(isString == TRUE);
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(s == "str2");
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 8:
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == FALSE);
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 9:
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == FALSE);
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        it.reset();  // prepare to run the iteration again.
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestStrings() {
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet* testList[] = {
830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet::createFromAll("abc"),
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        new UnicodeSet("[a-c]", ec),
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        &(UnicodeSet::createFrom("ch")->add('a','z').add("ll")),
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        new UnicodeSet("[{ll}{ch}a-z]", ec),
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet::createFrom("ab}c"),
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        new UnicodeSet("[{ab\\}c}]", ec),
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        &((new UnicodeSet('a','z'))->add('A', 'Z').retain('M','m').complement('X')),
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]", ec),
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        NULL
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: couldn't construct test sets");
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; testList[i] != NULL; i+=2) {
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_SUCCESS(ec)) {
851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString pat0, pat1;
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            testList[i]->toPattern(pat0, TRUE);
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            testList[i+1]->toPattern(pat1, TRUE);
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (*testList[i] == *testList[i+1]) {
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                logln((UnicodeString)"Ok: " + pat0 + " == " + pat1);
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                logln((UnicodeString)"FAIL: " + pat0 + " != " + pat1);
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete testList[i];
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete testList[i+1];
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test the [:Latin:] syntax.
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestScriptSet() {
869c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    expectContainment(UNICODE_STRING_SIMPLE("[:Latin:]"), "aA", CharsToUnicodeString("\\u0391\\u03B1"));
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
871c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    expectContainment(UNICODE_STRING_SIMPLE("[:Greek:]"), CharsToUnicodeString("\\u0391\\u03B1"), "aA");
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Jitterbug 1423 */
874c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    expectContainment(UNICODE_STRING_SIMPLE("[[:Common:][:Inherited:]]"), CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA");
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test the [:Latin:] syntax.
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestPropertySet() {
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char* const DATA[] = {
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Pattern, Chars IN, Chars NOT in
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:Latin:]",
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "aA",
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0391\\u03B1",
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\p{Greek}]",
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0391\\u03B1",
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "aA",
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\P{ GENERAL Category = upper case letter }",
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abc",
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ABC",
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
89750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Combining class: @since ICU 2.2
899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Check both symbolic and numeric
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\p{ccc=Nukta}",
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0ABC",
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abc",
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\p{Canonical Combining Class = 11}",
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u05B1",
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u05B2",
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:c c c = iota subscript :]",
909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0345",
910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyz",
91150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Bidi class: @since ICU 2.2
914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\p{bidiclass=lefttoright}",
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abc",
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0671\\u0672",
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Binary properties: @since ICU 2.2
919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\p{ideographic}",
920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u4E0A",
921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "x",
922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:math=false:]",
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "q)*(",
925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // weiv: )(and * were removed from math in Unicode 4.0.1
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //"(*+)",
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "+<>^",
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // JB#1767 \N{}, \p{ASCII}
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:Ascii:]",
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abc\\u0000\\u007F",
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0080\\u4E00",
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\N{ latin small letter  a  }[:name= latin small letter z:]]",
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "az",
936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "qrs",
937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // JB#2015
939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:any:]",
940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "a\\U0010FFFF",
941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "",
942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:nv=0.5:]",
944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u00BD\\u0F2A",
945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u00BC",
946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // JB#2653: Age
948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:Age=1.1:]",
949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u03D6", // 1.1
950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u03D8\\u03D9", // 3.2
951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:Age=3.1:]",
953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u1800\\u3400\\U0002f800",
954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0220\\u034f\\u30ff\\u33ff\\ufe73\\U00010000\\U00050000",
955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // JB#2350: Case_Sensitive
957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:Case Sensitive:]",
958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "A\\u1FFC\\U00010410",
959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ";\\u00B4\\U00010500",
960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // JB#2832: C99-compatibility props
962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:blank:]",
963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        " \\u0009",
964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "1-9A-Z",
965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:graph:]",
967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "19AZ",
968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        " \\u0003\\u0007\\u0009\\u000A\\u000D",
969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:punct:]",
971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "!@#%&*()[]{}-_\\/;:,.?'\"",
972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "09azAZ",
973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:xdigit:]",
975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "09afAF",
976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "gG!",
977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Regex compatibility test
979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[-b]", // leading '-' is literal
980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-b",
981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ac",
982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[^-b]", // leading '-' is literal
984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ac",
985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-b",
986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[b-]", // trailing '-' is literal
988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-b",
989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ac",
990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[^b-]", // trailing '-' is literal
992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ac",
993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-b",
994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[a-b-]", // trailing '-' is literal
996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ab-",
997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "c=",
998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[[a-q]&[p-z]-]", // trailing '-' is literal
1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "pq-",
1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "or=",
1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\s|\\)|:|$|\\>]", // from regex tests
1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "s|):$>",
1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abc",
1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\uDC00cd]", // JB#2906: isolated trail at start
1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "cd\\uDC00",
1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ab\\uD800\\U00010000",
1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[ab\\uD800]", // JB#2906: isolated trail at start
1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ab\\uD800",
1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "cd\\uDC00\\U00010000",
1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[ab\\uD800cd]", // JB#2906: isolated lead in middle
1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abcd\\uD800",
1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ef\\uDC00\\U00010000",
1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[ab\\uDC00cd]", // JB#2906: isolated trail in middle
1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abcd\\uDC00",
1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ef\\uD800\\U00010000",
1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
102350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION
1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:^lccc=0:]", // Lead canonical class
1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0300\\u0301",
1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abcd\\u00c0\\u00c5",
1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:^tccc=0:]", // Trail canonical class
1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0300\\u0301\\u00c0\\u00c5",
1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abcd",
1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class
1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0300\\u0301\\u00c0\\u00c5",
1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abcd",
1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now)
1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "",
1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abcd\\u0300\\u0301\\u00c0\\u00c5",
1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0F73\\u0F75\\u0F81",
1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abcd\\u0300\\u0301\\u00c0\\u00c5",
104350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif /* !UCONFIG_NO_NORMALIZATION */
1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:Assigned:]",
1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD",
104727f654740f2a26ad62a5c155af9199af9e69b889claireho        "\\u0888\\uFDD3\\uFFFE\\U00050005",
104827f654740f2a26ad62a5c155af9199af9e69b889claireho
104927f654740f2a26ad62a5c155af9199af9e69b889claireho        // Script_Extensions, new in Unicode 6.0
105027f654740f2a26ad62a5c155af9199af9e69b889claireho        "[:scx=Arab:]",
105127f654740f2a26ad62a5c155af9199af9e69b889claireho        "\\u061E\\u061F\\u0620\\u0621\\u063F\\u0640\\u0650\\u065E\\uFDF1\\uFDF2\\uFDF3",
105254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        "\\u061D\\uFDEF\\uFDFE",
105327f654740f2a26ad62a5c155af9199af9e69b889claireho
105427f654740f2a26ad62a5c155af9199af9e69b889claireho        // U+FDF2 has Script=Arabic and also Arab in its Script_Extensions,
105527f654740f2a26ad62a5c155af9199af9e69b889claireho        // so scx-sc is missing U+FDF2.
105627f654740f2a26ad62a5c155af9199af9e69b889claireho        "[[:Script_Extensions=Arabic:]-[:Arab:]]",
105727f654740f2a26ad62a5c155af9199af9e69b889claireho        "\\u0640\\u064B\\u0650\\u0655\\uFDFD",
105827f654740f2a26ad62a5c155af9199af9e69b889claireho        "\\uFDF2"
1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]);
1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; i<DATA_LEN; i+=3) {
1064c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        expectContainment(UnicodeString(DATA[i], -1, US_INV), CharsToUnicodeString(DATA[i+1]),
1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          CharsToUnicodeString(DATA[i+2]));
1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  * Test that Posix style character classes [:digit:], etc.
1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  *   have the Unicode definitions from TR 18.
1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  */
1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestPosixClasses() {
1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:alpha:]", status);
1077c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Alphabetic}"), status);
1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:lower:]", status);
1084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{lowercase}"), status);
1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:upper:]", status);
1091c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Uppercase}"), status);
1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:punct:]", status);
1098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=Punctuation}"), status);
1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:digit:]", status);
1105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=DecimalNumber}"), status);
1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:xdigit:]", status);
1112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{DecimalNumber}\\p{HexDigit}]"), status);
1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:alnum:]", status);
1119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Alphabetic}\\p{DecimalNumber}]"), status);
1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:space:]", status);
1126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Whitespace}"), status);
1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:blank:]", status);
1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]"),
1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            status);
1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:cntrl:]", status);
1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Control}"), status);
1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:graph:]", status);
1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]"), status);
1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:print:]", status);
1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("[[:graph:][:blank:]-[\\p{Control}]]") ,status);
1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test cloning of UnicodeSet.  For C++, we test the copy constructor.
1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestClone() {
1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet s("[abcxyz]", ec);
1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet t(s);
1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectContainment(t, "abc", "def");
1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test the indexOf() and charAt() methods.
1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestIndexOf() {
1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set("[a-cx-y3578]", ec);
1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet constructor");
1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; i<set.size(); ++i) {
1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c = set.charAt(i);
1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (set.indexOf(c) != i) {
1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("FAIL: charAt(%d) = %X => indexOf() => %d",
1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                i, c, set.indexOf(c));
1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 c = set.charAt(set.size());
1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c != -1) {
1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: charAt(<out of range>) = %X", c);
1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t j = set.indexOf((UChar32)0x71/*'q'*/);
1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (j != -1) {
1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: indexOf('q') = " + j);
1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test closure API.
1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestCloseOver() {
1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char CASE[] = {(char)USET_CASE_INSENSITIVE};
1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char CASE_MAPPINGS[] = {(char)USET_ADD_CASE_MAPPINGS};
1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char* DATA[] = {
1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // selector, input, output
1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE,
1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[aq\\u00DF{Bc}{bC}{Fi}]",
1213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        "[aAqQ\\u00DF\\u1E9E\\uFB01{ss}{bc}{fi}]",  // U+1E9E LATIN CAPITAL LETTER SHARP S is new in Unicode 5.1
1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE,
1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u01F1]", // 'DZ'
1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u01F1\\u01F2\\u01F3]",
1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE,
1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u1FB4]",
1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u1FB4{\\u03AC\\u03B9}]",
1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE,
1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[{F\\uFB01}]",
1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\uFB03{ffi}]",
1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, // make sure binary search finds limits
1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[a\\uFF3A]",
1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[aA\\uFF3A\\uFF5A]",
1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE,
1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[a-z]","[A-Za-z\\u017F\\u212A]",
1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE,
1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[abc]","[A-Ca-c]",
1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE,
1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[ABC]","[A-Ca-c]",
1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[i]", "[iI]",
1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\u0130]",          "[\\u0130{i\\u0307}]", // dotted I
1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[{i\\u0307}]",       "[\\u0130{i\\u0307}]", // i with dot
1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\u0131]",          "[\\u0131]", // dotless i
1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\u0390]",          "[\\u0390\\u1FD3{\\u03B9\\u0308\\u0301}]",
1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\u03c2]",          "[\\u03a3\\u03c2\\u03c3]", // sigmas
1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\u03f2]",          "[\\u03f2\\u03f9]", // lunate sigmas
1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\u03f7]",          "[\\u03f7\\u03f8]",
1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\u1fe3]",          "[\\u03b0\\u1fe3{\\u03c5\\u0308\\u0301}]",
1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\ufb05]",          "[\\ufb05\\ufb06{st}]",
1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[{st}]",             "[\\ufb05\\ufb06{st}]",
1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\U0001044F]",      "[\\U00010427\\U0001044F]",
1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[{a\\u02BE}]",       "[\\u1E9A{a\\u02BE}]", // first in sorted table
1261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[{\\u1f7c\\u03b9}]", "[\\u1ff2{\\u1f7c\\u03b9}]", // last in sorted table
1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
126450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_FILE_IO
1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE_MAPPINGS,
1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[aq\\u00DF{Bc}{bC}{Fi}]",
1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[aAqQ\\u00DF{ss}{Ss}{SS}{Bc}{BC}{bC}{bc}{FI}{Fi}{fi}]",
126850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE_MAPPINGS,
1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u01F1]", // 'DZ'
1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u01F1\\u01F2\\u01F3]",
1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE_MAPPINGS,
1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[a-z]",
1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[A-Za-z]",
1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        NULL
1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet s;
1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet t;
1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString buf;
1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; DATA[i]!=NULL; i+=3) {
1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t selector = DATA[i][0];
1286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString pat(DATA[i+1], -1, US_INV);
1287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString exp(DATA[i+2], -1, US_INV);
1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        s.applyPattern(pat, ec);
1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        s.closeOver(selector);
1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        t.applyPattern(exp, ec);
1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("FAIL: applyPattern failed");
1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (s == t) {
1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            logln((UnicodeString)"Ok: " + pat + ".closeOver(" + selector + ") => " + exp);
1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
12986d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru            dataerrln((UnicodeString)"FAIL: " + pat + ".closeOver(" + selector + ") => " +
1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  s.toPattern(buf, TRUE) + ", expected " + exp);
1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0
1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Unused test code.
1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * This was used to compare the old implementation (using USET_CASE)
1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * with the new one (using 0x100 temporarily)
1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * while transitioning from hardcoded case closure tables in uniset.cpp
1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * (moved to uniset_props.cpp) to building the data by gencase into ucase.icu.
1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * and using ucase.c functions for closure.
1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * See Jitterbug 3432 RFE: Move uniset.cpp data to a data file
1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *
1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Note: The old and new implementation never fully matched because
1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * the old implementation turned out to not map U+0130 and U+0131 correctly
1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * (dotted I and dotless i) and because the old implementation's data tables
1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * were outdated compared to Unicode 4.0.1 at the time of the change to the
1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * new implementation. (So sigmas and some other characters were not handled
1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * according to the newer Unicode version.)
1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet sens("[:case_sensitive:]", ec), sens2, s2;
1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSetIterator si(sens);
1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString str, buf2;
1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeString *pStr;
1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 c;
1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while(si.next()) {
1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(!si.isString()) {
1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=si.getCodepoint();
1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s.clear();
1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s.add(c);
1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            str.setTo(c);
1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            str.foldCase();
1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sens2.add(str);
1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            t=s;
1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s.closeOver(USET_CASE);
1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            t.closeOver(0x100);
1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(s!=t) {
1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln("FAIL: closeOver(U+%04x) differs: ", c);
1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln((UnicodeString)"old "+s.toPattern(buf, TRUE)+" new: "+t.toPattern(buf2, TRUE));
1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // remove all code points
1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // should contain all full case folding mapping strings
1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sens2.remove(0, 0x10ffff);
1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    si.reset(sens2);
1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while(si.next()) {
1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(si.isString()) {
1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pStr=&si.getString();
1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s.clear();
1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s.add(*pStr);
1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            t=s2=s;
1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s.closeOver(USET_CASE);
1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            t.closeOver(0x100);
1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(s!=t) {
1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln((UnicodeString)"FAIL: closeOver("+s2.toPattern(buf, TRUE)+") differs: ");
1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln((UnicodeString)"old "+s.toPattern(buf, TRUE)+" new: "+t.toPattern(buf2, TRUE));
1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Test the pattern API
1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s.applyPattern("[abc]", USET_CASE_INSENSITIVE, NULL, ec);
1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: applyPattern failed");
1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectContainment(s, "abcABC", "defDEF");
1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet v("[^abc]", USET_CASE_INSENSITIVE, NULL, ec);
1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: constructor failed");
1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectContainment(v, "defDEF", "abcABC");
1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet cm("[abck]", USET_ADD_CASE_MAPPINGS, NULL, ec);
1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: construct w/case mappings failed");
1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectContainment(cm, "abckABCK", CharsToUnicodeString("defDEF\\u212A"));
1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestEscapePattern() {
1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char pattern[] =
1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\uFEFF \\u200A-\\u200E \\U0001D173-\\U0001D17A \\U000F0000-\\U000FFFFD ]";
1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char exp[] =
1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u200A-\\u200E\\uFEFF\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]";
1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // We test this with two passes; in the second pass we
1391b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // pre-unescape the pattern.  Since U+200E is Pattern_White_Space,
1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // this fails -- which is what we expect.
1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t pass=1; pass<=2; ++pass) {
1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode ec = U_ZERO_ERROR;
1395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString pat(pattern, -1, US_INV);
1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (pass==2) {
1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pat = pat.unescape();
1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Pattern is only good for pass 1
1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool isPatternValid = (pass==1);
1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet set(pat, ec);
1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_SUCCESS(ec) != isPatternValid){
1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL: applyPattern(" +
1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  escape(pat) + ") => " +
1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  u_errorName(ec));
1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (set.contains((UChar)0x0644)){
1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL: " + escape(pat) + " contains(U+0664)");
1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString newpat;
1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.toPattern(newpat, TRUE);
1418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (newpat == UnicodeString(exp, -1, US_INV)) {
1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            logln(escape(pat) + " => " + newpat);
1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL: " + escape(pat) + " => " + newpat);
1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (int32_t i=0; i<set.getRangeCount(); ++i) {
1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString str("Range ");
1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            str.append((UChar)(0x30 + i))
1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                .append(": ")
1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                .append((UChar32)set.getRangeStart(i))
1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                .append(" - ")
1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                .append((UChar32)set.getRangeEnd(i));
1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            str = str + " (" + set.getRangeStart(i) + " - " +
1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                set.getRangeEnd(i) + ")";
1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (set.getRangeStart(i) < 0) {
1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln((UnicodeString)"FAIL: " + escape(str));
1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                logln(escape(str));
1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::expectRange(const UnicodeString& label,
1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 const UnicodeSet& set,
1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 UChar32 start, UChar32 end) {
1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet exp(start, end);
1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat;
1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set == exp) {
1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln(label + " => " + set.toPattern(pat, TRUE));
1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString xpat;
1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: " + label + " => " +
1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              set.toPattern(pat, TRUE) +
1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              ", expected " + exp.toPattern(xpat, TRUE));
1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestInvalidCodePoint() {
1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar32 DATA[] = {
1460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Test range             Expected range
1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0, 0x10FFFF,              0, 0x10FFFF,
1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (UChar32)-1, 8,           0, 8,
1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        8, 0x110000,              8, 0x10FFFF
1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const int32_t DATA_LENGTH = sizeof(DATA)/sizeof(DATA[0]);
1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat;
1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<DATA_LENGTH; i+=4) {
1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 start  = DATA[i];
1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 end    = DATA[i+1];
1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 xstart = DATA[i+2];
1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 xend   = DATA[i+3];
1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Try various API using the test code points
1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet set(start, end);
1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectRange((UnicodeString)"ct(" + start + "," + end + ")",
1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    set, xstart, xend);
1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.clear();
1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.set(start, end);
1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectRange((UnicodeString)"set(" + start + "," + end + ")",
1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    set, xstart, xend);
1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool b = set.contains(start);
1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b = set.contains(start, end);
1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b = set.containsNone(start, end);
1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b = set.containsSome(start, end);
1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*int32_t index = set.indexOf(start);*/
1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.clear();
1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.add(start);
1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.add(start, end);
1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectRange((UnicodeString)"add(" + start + "," + end + ")",
1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    set, xstart, xend);
1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.set(0, 0x10FFFF);
1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.retain(start, end);
1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectRange((UnicodeString)"retain(" + start + "," + end + ")",
1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    set, xstart, xend);
1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.retain(start);
1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.set(0, 0x10FFFF);
1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.remove(start);
1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.remove(start, end);
1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.complement();
1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectRange((UnicodeString)"!remove(" + start + "," + end + ")",
1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    set, xstart, xend);
1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.set(0, 0x10FFFF);
1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.complement(start, end);
1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.complement();
1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectRange((UnicodeString)"!complement(" + start + "," + end + ")",
1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    set, xstart, xend);
1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.complement(start);
1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar32 DATA2[] = {
1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0,
1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0x10FFFF,
1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (UChar32)-1,
1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0x110000
1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const int32_t DATA2_LENGTH = sizeof(DATA2)/sizeof(DATA2[0]);
1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<DATA2_LENGTH; ++i) {
1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c = DATA2[i], end = 0x10FFFF;
1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool valid = (c >= 0 && c <= 0x10FFFF);
1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet set(0, 0x10FFFF);
1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // For single-codepoint contains, invalid codepoints are NOT contained
1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool b = set.contains(c);
1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (b == valid) {
1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            logln((UnicodeString)"[\\u0000-\\U0010FFFF].contains(" + c +
1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ") = " + b);
1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL: [\\u0000-\\U0010FFFF].contains(" + c +
1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ") = " + b);
1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // For codepoint range contains, containsNone, and containsSome,
1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // invalid or empty (start > end) ranges have UNDEFINED behavior.
1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b = set.contains(c, end);
1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"* [\\u0000-\\U0010FFFF].contains(" + c +
1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              "," + end + ") = " + b);
1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b = set.containsNone(c, end);
1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"* [\\u0000-\\U0010FFFF].containsNone(" + c +
1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              "," + end + ") = " + b);
1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b = set.containsSome(c, end);
1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"* [\\u0000-\\U0010FFFF].containsSome(" + c +
1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              "," + end + ") = " + b);
1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t index = set.indexOf(c);
1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if ((index >= 0) == valid) {
1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            logln((UnicodeString)"[\\u0000-\\U0010FFFF].indexOf(" + c +
1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ") = " + index);
1563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL: [\\u0000-\\U0010FFFF].indexOf(" + c +
1565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ") = " + index);
1566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Used by TestSymbolTable
1571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass TokenSymbolTable : public SymbolTable {
1572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic:
1573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Hashtable contents;
1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    TokenSymbolTable(UErrorCode& ec) : contents(FALSE, ec) {
1576103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        contents.setValueDeleter(uprv_deleteUObject);
1577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ~TokenSymbolTable() {}
1580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
1582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * (Non-SymbolTable API) Add the given variable and value to
1583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * the table.  Variable should NOT contain leading '$'.
1584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
1585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    void add(const UnicodeString& var, const UnicodeString& value,
1586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             UErrorCode& ec) {
1587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_SUCCESS(ec)) {
1588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            contents.put(var, new UnicodeString(value), ec);
1589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
1593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * SymbolTable API
1594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
1595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual const UnicodeString* lookup(const UnicodeString& s) const {
1596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (const UnicodeString*) contents.get(s);
1597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
1600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * SymbolTable API
1601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
1602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual const UnicodeFunctor* lookupMatcher(UChar32 /*ch*/) const {
1603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * SymbolTable API
1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual UnicodeString parseReference(const UnicodeString& text,
1610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                         ParsePosition& pos, int32_t limit) const {
1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t start = pos.getIndex();
1612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t i = start;
1613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString result;
1614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while (i < limit) {
1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar c = text.charAt(i);
1616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
1617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++i;
1620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (i == start) { // No valid name chars
1622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return result; // Indicate failure with empty string
1623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pos.setIndex(i);
1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        text.extractBetween(start, i, result);
1626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return result;
1627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
1629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestSymbolTable() {
1631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Multiple test cases can be set up here.  Each test case
1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // is terminated by null:
1633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // var, value, var, value,..., input pat., exp. output pat., null
1634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char* DATA[] = {
1635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "us", "a-z", "[0-1$us]", "[0-1a-z]", NULL,
1636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "us", "[a-z]", "[0-1$us]", "[0-1[a-z]]", NULL,
1637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "us", "\\[a\\-z\\]", "[0-1$us]", "[-01\\[\\]az]", NULL,
1638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        NULL
1639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
1640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; DATA[i]!=NULL; ++i) {
1642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode ec = U_ZERO_ERROR;
1643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TokenSymbolTable sym(ec);
1644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
1645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("FAIL: couldn't construct TokenSymbolTable");
1646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
1647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Set up variables
1650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while (DATA[i+2] != NULL) {
1651c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sym.add(UnicodeString(DATA[i], -1, US_INV), UnicodeString(DATA[i+1], -1, US_INV), ec);
1652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) {
1653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln("FAIL: couldn't add to TokenSymbolTable");
1654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
1655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            i += 2;
1657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Input pattern and expected output pattern
1660c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString inpat = UnicodeString(DATA[i], -1, US_INV), exppat = UnicodeString(DATA[i+1], -1, US_INV);
1661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        i += 2;
1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ParsePosition pos(0);
1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet us(inpat, pos, USET_IGNORE_SPACE, &sym, ec);
1665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
1666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("FAIL: couldn't construct UnicodeSet");
1667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
1668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // results
1671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (pos.getIndex() != inpat.length()) {
1672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"Failed to read to end of string \""
1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  + inpat + "\": read to "
1674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  + pos.getIndex() + ", length is "
1675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  + inpat.length());
1676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet us2(exppat, ec);
1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("FAIL: couldn't construct expected UnicodeSet");
1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
1682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString a, b;
1685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (us != us2) {
1686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"Failed, got " + us.toPattern(a, TRUE) +
1687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ", expected " + us2.toPattern(b, TRUE));
1688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            logln((UnicodeString)"Ok, got " + us.toPattern(a, TRUE));
1690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestSurrogate() {
1695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char* DATA[] = {
1696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // These should all behave identically
1697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[abc\\uD800\\uDC00]",
1698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // "[abc\uD800\uDC00]", // Can't do this on C -- only Java
1699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[abc\\U00010000]",
1700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0
1701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
1702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int i=0; DATA[i] != 0; ++i) {
1703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode ec = U_ZERO_ERROR;
1704c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        logln((UnicodeString)"Test pattern " + i + " :" + UnicodeString(DATA[i], -1, US_INV));
1705b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeString str = UnicodeString(DATA[i], -1, US_INV);
1706b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        UnicodeSet set(str, ec);
1707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("FAIL: UnicodeSet constructor");
1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
1710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectContainment(set,
1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          CharsToUnicodeString("abc\\U00010000"),
1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          CharsToUnicodeString("\\uD800;\\uDC00")); // split apart surrogate-pair
1714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (set.size() != 4) {
1715c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            errln((UnicodeString)"FAIL: " + UnicodeString(DATA[i], -1, US_INV) + ".size() == " +
1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  set.size() + ", expected 4");
1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestExhaustive() {
1722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // exhaustive tests. Simulate UnicodeSets with integers.
1723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // That gives us very solid tests (except for large memory tests).
1724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t limit = 128;
1726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet x, y, z, aa;
1728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i < limit; ++i) {
1730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bitsToSet(i, x);
1731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"Testing " + i + ", " + x);
1732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        _testComplement(i, x, y);
1733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // AS LONG AS WE ARE HERE, check roundtrip
1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        checkRoundTrip(bitsToSet(i, aa));
1736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (int32_t j = 0; j < limit; ++j) {
1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _testAdd(i,j,  x,y,z);
1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _testXor(i,j,  x,y,z);
1740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _testRetain(i,j,  x,y,z);
1741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _testRemove(i,j,  x,y,z);
1742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testComplement(int32_t a, UnicodeSet& x, UnicodeSet& z) {
1747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(a, x);
1748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z = x;
1749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z.complement();
1750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c = setToBits(z);
1751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c != (~a)) {
1752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: add: ~" + x +  " != " + z);
1753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: add: ~" + a + " != " + c);
1754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkCanonicalRep(z, (UnicodeString)"complement " + a);
1756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testAdd(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) {
1759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(a, x);
1760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(b, y);
1761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z = x;
1762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z.addAll(y);
1763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c = setToBits(z);
1764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c != (a | b)) {
1765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: add: " + x + " | " + y + " != " + z);
1766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: add: " + a + " | " + b + " != " + c);
1767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkCanonicalRep(z, (UnicodeString)"add " + a + "," + b);
1769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testRetain(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) {
1772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(a, x);
1773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(b, y);
1774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z = x;
1775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z.retainAll(y);
1776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c = setToBits(z);
1777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c != (a & b)) {
1778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: retain: " + x + " & " + y + " != " + z);
1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: retain: " + a + " & " + b + " != " + c);
1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkCanonicalRep(z, (UnicodeString)"retain " + a + "," + b);
1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testRemove(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) {
1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(a, x);
1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(b, y);
1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z = x;
1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z.removeAll(y);
1789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c = setToBits(z);
1790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c != (a &~ b)) {
1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: remove: " + x + " &~ " + y + " != " + z);
1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: remove: " + a + " &~ " + b + " != " + c);
1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkCanonicalRep(z, (UnicodeString)"remove " + a + "," + b);
1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testXor(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) {
1798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(a, x);
1799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(b, y);
1800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z = x;
1801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z.complementAll(y);
1802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c = setToBits(z);
1803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c != (a ^ b)) {
1804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: complement: " + x + " ^ " + y + " != " + z);
1805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: complement: " + a + " ^ " + b + " != " + c);
1806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkCanonicalRep(z, (UnicodeString)"complement " + a + "," + b);
1808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Check that ranges are monotonically increasing and non-
1812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * overlapping.
1813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::checkCanonicalRep(const UnicodeSet& set, const UnicodeString& msg) {
1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t n = set.getRangeCount();
1816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (n < 0) {
1817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL result of " + msg +
1818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              ": range count should be >= 0 but is " +
1819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              n /*+ " for " + set.toPattern())*/);
1820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 last = 0;
1823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; i<n; ++i) {
1824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 start = set.getRangeStart(i);
1825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 end = set.getRangeEnd(i);
1826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (start > end) {
1827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL result of " + msg +
1828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ": range " + (i+1) +
1829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  " start > end: " + (int)start + ", " + (int)end +
1830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  " for " + set);
1831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (i > 0 && start <= last) {
1833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL result of " + msg +
1834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ": range " + (i+1) +
1835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  " overlaps previous range: " + (int)start + ", " + (int)end +
1836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  " for " + set);
1837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last = end;
1839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Convert a bitmask to a UnicodeSet.
1844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSetTest::bitsToSet(int32_t a, UnicodeSet& result) {
1846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result.clear();
1847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (UChar32 i = 0; i < 32; ++i) {
1848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if ((a & (1<<i)) != 0) {
1849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result.add(i);
1850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
1853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Convert a UnicodeSet to a bitmask.  Only the characters
1857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * U+0000 to U+0020 are represented in the bitmask.
1858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSetTest::setToBits(const UnicodeSet& x) {
1860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t result = 0;
1861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i < 32; ++i) {
1862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (x.contains((UChar32)i)) {
1863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result |= (1<<i);
1864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
1867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return the representation of an inversion list based UnicodeSet
1871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * as a pairs list.  Ranges are listed in ascending Unicode order.
1872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For example, the set [a-zA-M3] is represented as "33AMaz".
1873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString UnicodeSetTest::getPairs(const UnicodeSet& set) {
1875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pairs;
1876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; i<set.getRangeCount(); ++i) {
1877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 start = set.getRangeStart(i);
1878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 end = set.getRangeEnd(i);
1879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (end > 0xFFFF) {
1880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            end = 0xFFFF;
1881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            i = set.getRangeCount(); // Should be unnecessary
1882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pairs.append((UChar)start).append((UChar)end);
1884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return pairs;
1886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Basic consistency check for a few items.
1890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * That the iterator works, and that we can create a pattern and
1891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * get the same thing back
1892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::checkRoundTrip(const UnicodeSet& s) {
1894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
1895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet t(s);
1897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkEqual(s, t, "copy ct");
1898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    t = s;
1900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkEqual(s, t, "operator=");
1901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    copyWithIterator(t, s, FALSE);
1903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkEqual(s, t, "iterator roundtrip");
1904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    copyWithIterator(t, s, TRUE); // try range
1906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkEqual(s, t, "iterator roundtrip");
1907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat; s.toPattern(pat, FALSE);
1909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    t.applyPattern(pat, ec);
1910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
1911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: applyPattern");
1912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        checkEqual(s, t, "toPattern(false)");
1915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s.toPattern(pat, TRUE);
1918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    t.applyPattern(pat, ec);
1919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
1920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: applyPattern");
1921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        checkEqual(s, t, "toPattern(true)");
1924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::copyWithIterator(UnicodeSet& t, const UnicodeSet& s, UBool withRange) {
1928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    t.clear();
1929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSetIterator it(s);
1930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (withRange) {
1931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while (it.nextRange()) {
1932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (it.isString()) {
1933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                t.add(it.getString());
1934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                t.add(it.getCodepoint(), it.getCodepointEnd());
1936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while (it.next()) {
1940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (it.isString()) {
1941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                t.add(it.getString());
1942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                t.add(it.getCodepoint());
1944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSetTest::checkEqual(const UnicodeSet& s, const UnicodeSet& t, const char* message) {
1950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString source; s.toPattern(source, TRUE);
1951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString result; t.toPattern(result, TRUE);
1952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (s != t) {
1953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: " + message
1954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              + "; source = " + source
1955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              + "; result = " + result
1956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              );
1957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
1958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"Ok: " + message
1960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              + "; source = " + source
1961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              + "; result = " + result
1962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              );
1963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
1965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
1968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectContainment(const UnicodeString& pat,
1969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                  const UnicodeString& charsIn,
1970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                  const UnicodeString& charsOut) {
1971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
1972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set(pat, ec);
1973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
19746d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru        dataerrln((UnicodeString)"FAIL: pattern \"" +
1975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              pat + "\" => " + u_errorName(ec));
1976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectContainment(set, pat, charsIn, charsOut);
1979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
1982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectContainment(const UnicodeSet& set,
1983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                  const UnicodeString& charsIn,
1984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                  const UnicodeString& charsOut) {
1985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat;
1986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.toPattern(pat);
1987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectContainment(set, pat, charsIn, charsOut);
1988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
1991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectContainment(const UnicodeSet& set,
1992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                  const UnicodeString& setName,
1993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                  const UnicodeString& charsIn,
1994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                  const UnicodeString& charsOut) {
1995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString bad;
1996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 c;
1997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
1998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<charsIn.length(); i+=U16_LENGTH(c)) {
2000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c = charsIn.char32At(i);
2001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!set.contains(c)) {
2002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            bad.append(c);
2003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (bad.length() > 0) {
2006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"Fail: set " + setName + " does not contain " + prettify(bad) +
2007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              ", expected containment of " + prettify(charsIn));
2008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
2009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"Ok: set " + setName + " contains " + prettify(charsIn));
2010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bad.truncate(0);
2013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<charsOut.length(); i+=U16_LENGTH(c)) {
2014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c = charsOut.char32At(i);
2015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (set.contains(c)) {
2016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            bad.append(c);
2017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (bad.length() > 0) {
2020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"Fail: set " + setName + " contains " + prettify(bad) +
2021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              ", expected non-containment of " + prettify(charsOut));
2022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
2023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"Ok: set " + setName + " does not contain " + prettify(charsOut));
2024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
2028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectPattern(UnicodeSet& set,
2029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const UnicodeString& pattern,
2030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const UnicodeString& expectedPairs){
2031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
2032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern(pattern, status);
2033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
2034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln(UnicodeString("FAIL: applyPattern(\"") + pattern +
2035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              "\") failed");
2036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
2037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
2038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (getPairs(set) != expectedPairs ) {
2039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln(UnicodeString("FAIL: applyPattern(\"") + pattern +
2040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  "\") => pairs \"" +
2041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  escape(getPairs(set)) + "\", expected \"" +
2042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  escape(expectedPairs) + "\"");
2043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            logln(UnicodeString("Ok:   applyPattern(\"") + pattern +
2045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  "\") => pairs \"" +
2046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  escape(getPairs(set)) + "\"");
2047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // the result of calling set.toPattern(), which is the string representation of
2050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // this set(set), is passed to a  UnicodeSet constructor, and tested that it
2051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // will produce another set that is equal to this one.
2052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString temppattern;
2053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.toPattern(temppattern);
2054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *tempset=new UnicodeSet(temppattern, status);
2055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
2056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln(UnicodeString("FAIL: applyPattern(\""+ pattern + "\").toPattern() => " + temppattern + " => invalid pattern"));
2057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
2058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(*tempset != set || getPairs(*tempset) != getPairs(set)){
2060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln(UnicodeString("FAIL: applyPattern(\""+ pattern + "\").toPattern() => " + temppattern + " => pairs \""+ escape(getPairs(*tempset)) + "\", expected pairs \"" +
2061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            escape(getPairs(set)) + "\""));
2062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else{
2063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln(UnicodeString("Ok:   applyPattern(\""+ pattern + "\").toPattern() => " + temppattern + " => pairs \"" + escape(getPairs(*tempset)) + "\""));
2064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete tempset;
2067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
2071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectPairs(const UnicodeSet& set, const UnicodeString& expectedPairs) {
2072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (getPairs(set) != expectedPairs) {
2073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln(UnicodeString("FAIL: Expected pair list \"") +
2074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              escape(expectedPairs) + "\", got \"" +
2075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              escape(getPairs(set)) + "\"");
2076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::expectToPattern(const UnicodeSet& set,
2080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     const UnicodeString& expPat,
2081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     const char** expStrings) {
2082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat;
2083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.toPattern(pat, TRUE);
2084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pat == expPat) {
2085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"Ok:   toPattern() => \"" + pat + "\"");
2086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
2087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: toPattern() => \"" + pat + "\", expected \"" + expPat + "\"");
2088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
2089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (expStrings == NULL) {
2091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
2092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool in = TRUE;
2094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; expStrings[i] != NULL; ++i) {
2095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (expStrings[i] == NOT) { // sic; pointer comparison
2096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            in = FALSE;
2097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
2098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString s = CharsToUnicodeString(expStrings[i]);
2100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool contained = set.contains(s);
2101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (contained == in) {
2102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            logln((UnicodeString)"Ok: " + expPat +
2103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  (contained ? " contains {" : " does not contain {") +
2104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  escape(expStrings[i]) + "}");
2105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL: " + expPat +
2107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  (contained ? " contains {" : " does not contain {") +
2108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  escape(expStrings[i]) + "}");
2109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar toHexString(int32_t i) { return (UChar)(i + (i < 10 ? 0x30 : (0x41 - 10))); }
2114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
2116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::doAssert(UBool condition, const char *message)
2117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
2118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!condition) {
2119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln(UnicodeString("ERROR : ") + message);
2120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString
2124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::escape(const UnicodeString& s) {
2125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString buf;
2126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; i<s.length(); )
2127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
2128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c = s.char32At(i);
2129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (0x0020 <= c && c <= 0x007F) {
2130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf += c;
2131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (c <= 0xFFFF) {
2133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf += (UChar)0x5c; buf += (UChar)0x75;
2134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
2135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf += (UChar)0x5c; buf += (UChar)0x55;
2136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf += toHexString((c & 0xF0000000) >> 28);
2137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf += toHexString((c & 0x0F000000) >> 24);
2138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf += toHexString((c & 0x00F00000) >> 20);
2139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf += toHexString((c & 0x000F0000) >> 16);
2140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf += toHexString((c & 0xF000) >> 12);
2142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf += toHexString((c & 0x0F00) >> 8);
2143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf += toHexString((c & 0x00F0) >> 4);
2144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf += toHexString(c & 0x000F);
2145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        i += U16_LENGTH(c);
2147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return buf;
2149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestFreezable() {
2152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode errorCode=U_ZERO_ERROR;
2153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString idPattern=UNICODE_STRING("[:ID_Continue:]", 15);
2154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet idSet(idPattern, errorCode);
2155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
21566d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru        dataerrln("FAIL: unable to create UnicodeSet([:ID_Continue:]) - %s", u_errorName(errorCode));
2157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
2158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString wsPattern=UNICODE_STRING("[:White_Space:]", 15);
2161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet wsSet(wsPattern, errorCode);
2162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
21636d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru        dataerrln("FAIL: unable to create UnicodeSet([:White_Space:]) - %s", u_errorName(errorCode));
2164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
2165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    idSet.add(idPattern);
2168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet frozen(idSet);
2169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.freeze();
2170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(idSet.isFrozen() || !frozen.isFrozen()) {
2172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: isFrozen() is wrong");
2173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: a copy-constructed frozen set differs from its original");
2176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen=wsSet;
2179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: a frozen set was modified by operator=");
2181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet frozen2(frozen);
2184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen2!=frozen || frozen2!=idSet) {
2185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: a copied frozen set differs from its frozen original");
2186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!frozen2.isFrozen()) {
2188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: copy-constructing a frozen set results in a thawed one");
2189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet frozen3(5, 55);  // Set to some values to really test assignment below, not copy construction.
2191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen3.contains(0, 4) || !frozen3.contains(5, 55) || frozen3.contains(56, 0x10ffff)) {
2192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet(5, 55) failed");
2193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen3=frozen;
2195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!frozen3.isFrozen()) {
2196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: copying a frozen set results in a thawed one");
2197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *cloned=(UnicodeSet *)frozen.clone();
2200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!cloned->isFrozen() || *cloned!=frozen || cloned->containsSome(0xd802, 0xd805)) {
2201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: clone() failed");
2202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    cloned->add(0xd802, 0xd805);
2204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(cloned->containsSome(0xd802, 0xd805)) {
2205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: unable to modify clone");
2206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete cloned;
2208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *thawed=(UnicodeSet *)frozen.cloneAsThawed();
2210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(thawed->isFrozen() || *thawed!=frozen || thawed->containsSome(0xd802, 0xd805)) {
2211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: cloneAsThawed() failed");
2212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    thawed->add(0xd802, 0xd805);
2214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!thawed->contains(0xd802, 0xd805)) {
2215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: unable to modify thawed clone");
2216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete thawed;
2218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.set(5, 55);
2220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::set() modified a frozen set");
2222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.clear();
2225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::clear() modified a frozen set");
2227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.closeOver(USET_CASE_INSENSITIVE);
2230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::closeOver() modified a frozen set");
2232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.compact();
2235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::compact() modified a frozen set");
2237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ParsePosition pos;
2240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.
2241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        applyPattern(wsPattern, errorCode).
2242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        applyPattern(wsPattern, USET_IGNORE_SPACE, NULL, errorCode).
2243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        applyPattern(wsPattern, pos, USET_IGNORE_SPACE, NULL, errorCode).
2244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        applyIntPropertyValue(UCHAR_CANONICAL_COMBINING_CLASS, 230, errorCode).
2245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        applyPropertyAlias(UNICODE_STRING_SIMPLE("Assigned"), UnicodeString(), errorCode);
2246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::applyXYZ() modified a frozen set");
2248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.
2251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        add(0xd800).
2252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        add(0xd802, 0xd805).
2253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        add(wsPattern).
2254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        addAll(idPattern).
2255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        addAll(wsSet);
2256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::addXYZ() modified a frozen set");
2258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.
2261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retain(0x62).
2262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retain(0x64, 0x69).
2263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retainAll(wsPattern).
2264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retainAll(wsSet);
2265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::retainXYZ() modified a frozen set");
2267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.
2270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        remove(0x62).
2271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        remove(0x64, 0x69).
2272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        remove(idPattern).
2273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        removeAll(idPattern).
2274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        removeAll(idSet);
2275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::removeXYZ() modified a frozen set");
2277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.
2280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complement().
2281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complement(0x62).
2282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complement(0x64, 0x69).
2283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complement(idPattern).
2284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complementAll(idPattern).
2285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complementAll(idSet);
2286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::complementXYZ() modified a frozen set");
2288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test span() etc. -------------------------------------------------------- ***
2292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Append the UTF-8 version of the string to t and return the appended UTF-8 length.
2294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
2295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruappendUTF8(const UChar *s, int32_t length, char *t, int32_t capacity) {
2296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode errorCode=U_ZERO_ERROR;
2297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t length8=0;
2298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    u_strToUTF8(t, capacity, &length8, s, length, &errorCode);
2299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_SUCCESS(errorCode)) {
2300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return length8;
2301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
2302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // The string contains an unpaired surrogate.
2303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Ignore this string.
2304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
2305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass UnicodeSetWithStringsIterator;
2309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Make the strings in a UnicodeSet easily accessible.
2311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass UnicodeSetWithStrings {
2312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic:
2313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSetWithStrings(const UnicodeSet &normalSet) :
2314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            set(normalSet), stringsLength(0), hasSurrogates(FALSE) {
2315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t size=set.size();
2316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(size>0 && set.charAt(size-1)<0) {
2317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // If a set's last element is not a code point, then it must contain strings.
2318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Iterate over the set, skip all code point ranges, and cache the strings.
2319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Convert them to UTF-8 for spanUTF8().
2320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeSetIterator iter(set);
2321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const UnicodeString *s;
2322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            char *s8=utf8;
2323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t length8, utf8Count=0;
2324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while(iter.nextRange() && stringsLength<LENGTHOF(strings)) {
2325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(iter.isString()) {
2326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Store the pointer to the set's string element
2327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // which we happen to know is a stable pointer.
2328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    strings[stringsLength]=s=&iter.getString();
2329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    utf8Count+=
2330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        utf8Lengths[stringsLength]=length8=
2331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        appendUTF8(s->getBuffer(), s->length(),
2332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                   s8, (int32_t)(sizeof(utf8)-utf8Count));
2333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(length8==0) {
2334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        hasSurrogates=TRUE;  // Contains unpaired surrogates.
2335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    s8+=length8;
2337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++stringsLength;
2338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSet &getSet() const {
2344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return set;
2345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool hasStrings() const {
2348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (UBool)(stringsLength>0);
2349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool hasStringsWithSurrogates() const {
2352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return hasSurrogates;
2353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate:
2356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    friend class UnicodeSetWithStringsIterator;
2357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSet &set;
2359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeString *strings[20];
2361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t stringsLength;
2362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool hasSurrogates;
2363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char utf8[1024];
2365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t utf8Lengths[20];
2366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t nextStringIndex;
2368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t nextUTF8Start;
2369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
2370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass UnicodeSetWithStringsIterator {
2372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic:
2373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSetWithStringsIterator(const UnicodeSetWithStrings &set) :
2374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fSet(set), nextStringIndex(0), nextUTF8Start(0) {
2375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    void reset() {
2378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        nextStringIndex=nextUTF8Start=0;
2379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeString *nextString() {
2382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(nextStringIndex<fSet.stringsLength) {
2383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return fSet.strings[nextStringIndex++];
2384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return NULL;
2386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Do not mix with calls to nextString().
2390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *nextUTF8(int32_t &length) {
2391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(nextStringIndex<fSet.stringsLength) {
2392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char *s8=fSet.utf8+nextUTF8Start;
2393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            nextUTF8Start+=length=fSet.utf8Lengths[nextStringIndex++];
2394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return s8;
2395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            length=0;
2397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return NULL;
2398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate:
2402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSetWithStrings &fSet;
2403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t nextStringIndex;
2404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t nextUTF8Start;
2405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
2406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Compare 16-bit Unicode strings (which may be malformed UTF-16)
2408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// at code point boundaries.
2409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// That is, each edge of a match must not be in the middle of a surrogate pair.
2410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UBool
2411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querumatches16CPB(const UChar *s, int32_t start, int32_t limit, const UnicodeString &t) {
2412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s+=start;
2413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    limit-=start;
2414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t length=t.length();
2415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return 0==t.compare(s, length) &&
2416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru           !(0<start && U16_IS_LEAD(s[-1]) && U16_IS_TRAIL(s[0])) &&
2417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru           !(length<limit && U16_IS_LEAD(s[length-1]) && U16_IS_TRAIL(s[length]));
2418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Implement span() with contains() for comparison.
2421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t containsSpanUTF16(const UnicodeSetWithStrings &set, const UChar *s, int32_t length,
2422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 USetSpanCondition spanCondition) {
2423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSet &realSet(set.getSet());
2424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!set.hasStrings()) {
2425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t start=0, prev;
2431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((prev=start)<length) {
2432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U16_NEXT(s, start, length, c);
2433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(realSet.contains(c)!=spanCondition) {
2434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return prev;
2438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(spanCondition==USET_SPAN_NOT_CONTAINED) {
2439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetWithStringsIterator iter(set);
2440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t start, next;
2442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(start=next=0; start<length;) {
2443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U16_NEXT(s, next, length, c);
2444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(realSet.contains(c)) {
2445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const UnicodeString *str;
2448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter.reset();
2449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((str=iter.nextString())!=NULL) {
2450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(str->length()<=(length-start) && matches16CPB(s, start, length, *str)) {
2451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // spanNeedsStrings=TRUE;
2452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return start;
2453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            start=next;
2456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return start;
2458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ {
2459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetWithStringsIterator iter(set);
2460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t start, next, maxSpanLimit=0;
2462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(start=next=0; start<length;) {
2463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U16_NEXT(s, next, length, c);
2464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(!realSet.contains(c)) {
2465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                next=start;  // Do not span this single, not-contained code point.
2466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const UnicodeString *str;
2468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter.reset();
2469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((str=iter.nextString())!=NULL) {
2470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(str->length()<=(length-start) && matches16CPB(s, start, length, *str)) {
2471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // spanNeedsStrings=TRUE;
2472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int32_t matchLimit=start+str->length();
2473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(matchLimit==length) {
2474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return length;
2475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(spanCondition==USET_SPAN_CONTAINED) {
2477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Iterate for the shortest match at each position.
2478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Recurse for each but the shortest match.
2479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(next==start) {
2480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            next=matchLimit;  // First match from start.
2481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        } else {
2482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            if(matchLimit<next) {
2483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                // Remember shortest match from start for iteration.
2484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                int32_t temp=next;
2485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                next=matchLimit;
2486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                matchLimit=temp;
2487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
2488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // Recurse for non-shortest match from start.
2489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            int32_t spanLength=containsSpanUTF16(set, s+matchLimit, length-matchLimit,
2490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                                 USET_SPAN_CONTAINED);
2491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            if((matchLimit+spanLength)>maxSpanLimit) {
2492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                maxSpanLimit=matchLimit+spanLength;
2493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                if(maxSpanLimit==length) {
2494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    return length;
2495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                }
2496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
2497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
2498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else /* spanCondition==USET_SPAN_SIMPLE */ {
2499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(matchLimit>next) {
2500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // Remember longest match from start.
2501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            next=matchLimit;
2502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
2503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(next==start) {
2507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;  // No match from start.
2508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            start=next;
2510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(start>maxSpanLimit) {
2512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return start;
2513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return maxSpanLimit;
2515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t containsSpanBackUTF16(const UnicodeSetWithStrings &set, const UChar *s, int32_t length,
2520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     USetSpanCondition spanCondition) {
2521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length==0) {
2522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
2523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSet &realSet(set.getSet());
2525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!set.hasStrings()) {
2526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t prev=length;
2532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        do {
2533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U16_PREV(s, 0, length, c);
2534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(realSet.contains(c)!=spanCondition) {
2535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } while((prev=length)>0);
2538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return prev;
2539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(spanCondition==USET_SPAN_NOT_CONTAINED) {
2540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetWithStringsIterator iter(set);
2541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t prev=length, length0=length;
2543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        do {
2544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U16_PREV(s, 0, length, c);
2545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(realSet.contains(c)) {
2546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const UnicodeString *str;
2549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter.reset();
2550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((str=iter.nextString())!=NULL) {
2551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(str->length()<=prev && matches16CPB(s, prev-str->length(), length0, *str)) {
2552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // spanNeedsStrings=TRUE;
2553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return prev;
2554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } while((prev=length)>0);
2557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return prev;
2558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ {
2559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetWithStringsIterator iter(set);
2560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t prev=length, minSpanStart=length, length0=length;
2562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        do {
2563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U16_PREV(s, 0, length, c);
2564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(!realSet.contains(c)) {
2565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                length=prev;  // Do not span this single, not-contained code point.
2566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const UnicodeString *str;
2568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter.reset();
2569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((str=iter.nextString())!=NULL) {
2570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(str->length()<=prev && matches16CPB(s, prev-str->length(), length0, *str)) {
2571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // spanNeedsStrings=TRUE;
2572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int32_t matchStart=prev-str->length();
2573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(matchStart==0) {
2574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return 0;
2575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(spanCondition==USET_SPAN_CONTAINED) {
2577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Iterate for the shortest match at each position.
2578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Recurse for each but the shortest match.
2579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(length==prev) {
2580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            length=matchStart;  // First match from prev.
2581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        } else {
2582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            if(matchStart>length) {
2583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                // Remember shortest match from prev for iteration.
2584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                int32_t temp=length;
2585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                length=matchStart;
2586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                matchStart=temp;
2587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
2588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // Recurse for non-shortest match from prev.
2589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            int32_t spanStart=containsSpanBackUTF16(set, s, matchStart,
2590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                                    USET_SPAN_CONTAINED);
2591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            if(spanStart<minSpanStart) {
2592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                minSpanStart=spanStart;
2593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                if(minSpanStart==0) {
2594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    return 0;
2595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                }
2596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
2597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
2598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else /* spanCondition==USET_SPAN_SIMPLE */ {
2599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(matchStart<length) {
2600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // Remember longest match from prev.
2601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            length=matchStart;
2602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
2603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(length==prev) {
2607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;  // No match from prev.
2608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } while((prev=length)>0);
2610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(prev<minSpanStart) {
2611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return prev;
2612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return minSpanStart;
2614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t containsSpanUTF8(const UnicodeSetWithStrings &set, const char *s, int32_t length,
2619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                USetSpanCondition spanCondition) {
2620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSet &realSet(set.getSet());
2621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!set.hasStrings()) {
2622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t start=0, prev;
2628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((prev=start)<length) {
26298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            U8_NEXT_OR_FFFD(s, start, length, c);
2630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(realSet.contains(c)!=spanCondition) {
2631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return prev;
2635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(spanCondition==USET_SPAN_NOT_CONTAINED) {
2636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetWithStringsIterator iter(set);
2637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t start, next;
2639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(start=next=0; start<length;) {
26408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            U8_NEXT_OR_FFFD(s, next, length, c);
2641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(realSet.contains(c)) {
2642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char *s8;
2645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t length8;
2646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter.reset();
2647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((s8=iter.nextUTF8(length8))!=NULL) {
2648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(length8!=0 && length8<=(length-start) && 0==memcmp(s+start, s8, length8)) {
2649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // spanNeedsStrings=TRUE;
2650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return start;
2651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            start=next;
2654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return start;
2656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ {
2657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetWithStringsIterator iter(set);
2658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t start, next, maxSpanLimit=0;
2660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(start=next=0; start<length;) {
26618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            U8_NEXT_OR_FFFD(s, next, length, c);
2662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(!realSet.contains(c)) {
2663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                next=start;  // Do not span this single, not-contained code point.
2664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char *s8;
2666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t length8;
2667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter.reset();
2668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((s8=iter.nextUTF8(length8))!=NULL) {
2669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(length8!=0 && length8<=(length-start) && 0==memcmp(s+start, s8, length8)) {
2670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // spanNeedsStrings=TRUE;
2671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int32_t matchLimit=start+length8;
2672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(matchLimit==length) {
2673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return length;
2674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(spanCondition==USET_SPAN_CONTAINED) {
2676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Iterate for the shortest match at each position.
2677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Recurse for each but the shortest match.
2678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(next==start) {
2679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            next=matchLimit;  // First match from start.
2680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        } else {
2681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            if(matchLimit<next) {
2682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                // Remember shortest match from start for iteration.
2683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                int32_t temp=next;
2684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                next=matchLimit;
2685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                matchLimit=temp;
2686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
2687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // Recurse for non-shortest match from start.
2688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            int32_t spanLength=containsSpanUTF8(set, s+matchLimit, length-matchLimit,
2689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                                USET_SPAN_CONTAINED);
2690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            if((matchLimit+spanLength)>maxSpanLimit) {
2691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                maxSpanLimit=matchLimit+spanLength;
2692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                if(maxSpanLimit==length) {
2693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    return length;
2694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                }
2695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
2696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
2697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else /* spanCondition==USET_SPAN_SIMPLE */ {
2698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(matchLimit>next) {
2699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // Remember longest match from start.
2700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            next=matchLimit;
2701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
2702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(next==start) {
2706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;  // No match from start.
2707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            start=next;
2709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(start>maxSpanLimit) {
2711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return start;
2712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return maxSpanLimit;
2714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t containsSpanBackUTF8(const UnicodeSetWithStrings &set, const char *s, int32_t length,
2719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    USetSpanCondition spanCondition) {
2720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length==0) {
2721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
2722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSet &realSet(set.getSet());
2724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!set.hasStrings()) {
2725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t prev=length;
2731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        do {
27328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            U8_PREV_OR_FFFD(s, 0, length, c);
2733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(realSet.contains(c)!=spanCondition) {
2734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } while((prev=length)>0);
2737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return prev;
2738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(spanCondition==USET_SPAN_NOT_CONTAINED) {
2739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetWithStringsIterator iter(set);
2740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t prev=length;
2742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        do {
27438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            U8_PREV_OR_FFFD(s, 0, length, c);
2744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(realSet.contains(c)) {
2745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char *s8;
2748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t length8;
2749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter.reset();
2750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((s8=iter.nextUTF8(length8))!=NULL) {
2751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(length8!=0 && length8<=prev && 0==memcmp(s+prev-length8, s8, length8)) {
2752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // spanNeedsStrings=TRUE;
2753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return prev;
2754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } while((prev=length)>0);
2757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return prev;
2758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ {
2759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetWithStringsIterator iter(set);
2760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t prev=length, minSpanStart=length;
2762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        do {
27638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            U8_PREV_OR_FFFD(s, 0, length, c);
2764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(!realSet.contains(c)) {
2765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                length=prev;  // Do not span this single, not-contained code point.
2766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char *s8;
2768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t length8;
2769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter.reset();
2770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((s8=iter.nextUTF8(length8))!=NULL) {
2771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(length8!=0 && length8<=prev && 0==memcmp(s+prev-length8, s8, length8)) {
2772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // spanNeedsStrings=TRUE;
2773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int32_t matchStart=prev-length8;
2774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(matchStart==0) {
2775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return 0;
2776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(spanCondition==USET_SPAN_CONTAINED) {
2778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Iterate for the shortest match at each position.
2779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Recurse for each but the shortest match.
2780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(length==prev) {
2781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            length=matchStart;  // First match from prev.
2782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        } else {
2783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            if(matchStart>length) {
2784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                // Remember shortest match from prev for iteration.
2785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                int32_t temp=length;
2786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                length=matchStart;
2787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                matchStart=temp;
2788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
2789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // Recurse for non-shortest match from prev.
2790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            int32_t spanStart=containsSpanBackUTF8(set, s, matchStart,
2791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                                   USET_SPAN_CONTAINED);
2792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            if(spanStart<minSpanStart) {
2793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                minSpanStart=spanStart;
2794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                if(minSpanStart==0) {
2795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    return 0;
2796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                }
2797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
2798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
2799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else /* spanCondition==USET_SPAN_SIMPLE */ {
2800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(matchStart<length) {
2801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // Remember longest match from prev.
2802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            length=matchStart;
2803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
2804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(length==prev) {
2808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;  // No match from prev.
2809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } while((prev=length)>0);
2811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(prev<minSpanStart) {
2812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return prev;
2813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return minSpanStart;
2815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// spans to be performed and compared
2820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum {
2821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_UTF16          =1,
2822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_UTF8           =2,
2823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_UTFS           =3,
2824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_SET            =4,
2826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_COMPLEMENT     =8,
2827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_POLARITY       =0xc,
2828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_FWD            =0x10,
2830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_BACK           =0x20,
2831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_DIRS           =0x30,
2832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_CONTAINED      =0x100,
2834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_SIMPLE         =0x200,
2835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_CONDITION      =0x300,
2836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_ALL            =0x33f
2838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
2839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline USetSpanCondition invertSpanCondition(USetSpanCondition spanCondition, USetSpanCondition contained) {
2841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return spanCondition == USET_SPAN_NOT_CONTAINED ? contained : USET_SPAN_NOT_CONTAINED;
2842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline int32_t slen(const void *s, UBool isUTF16) {
2845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return isUTF16 ? u_strlen((const UChar *)s) : strlen((const char *)s);
2846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Count spans on a string with the method according to type and set the span limits.
2850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set may be the complement of the original.
2851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * When using spanBack() and comparing with span(), use a span condition for the first spanBack()
2852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * according to the expected number of spans.
2853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Sets typeName to an empty string if there is no such type.
2854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns -1 if the span option is filtered out.
2855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
2856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t getSpans(const UnicodeSetWithStrings &set, UBool isComplement,
2857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        const void *s, int32_t length, UBool isUTF16,
2858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        uint32_t whichSpans,
2859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        int type, const char *&typeName,
2860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        int32_t limits[], int32_t limitsCapacity,
2861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        int32_t expectCount) {
2862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSet &realSet(set.getSet());
2863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t start, count;
2864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    USetSpanCondition spanCondition, firstSpanCondition, contained;
2865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool isForward;
2866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(type<0 || 7<type) {
2868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        typeName="";
2869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
2870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char *const typeNames16[]={
2873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "contains", "contains(LM)",
2874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "span", "span(LM)",
2875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "containsBack", "containsBack(LM)",
2876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "spanBack", "spanBack(LM)"
2877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
2878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char *const typeNames8[]={
2880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "containsUTF8", "containsUTF8(LM)",
2881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "spanUTF8", "spanUTF8(LM)",
2882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "containsBackUTF8", "containsBackUTF8(LM)", // not implemented
2883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "spanBackUTF8", "spanBackUTF8(LM)"
2884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
2885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    typeName= isUTF16 ? typeNames16[type] : typeNames8[type];
2887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // filter span options
2889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(type<=3) {
2890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // span forward
2891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if((whichSpans&SPAN_FWD)==0) {
2892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -1;
2893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        isForward=TRUE;
2895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
2896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // span backward
2897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if((whichSpans&SPAN_BACK)==0) {
2898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -1;
2899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        isForward=FALSE;
2901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((type&1)==0) {
2903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // use USET_SPAN_CONTAINED
2904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if((whichSpans&SPAN_CONTAINED)==0) {
2905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -1;
2906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        contained=USET_SPAN_CONTAINED;
2908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
2909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // use USET_SPAN_SIMPLE
2910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if((whichSpans&SPAN_SIMPLE)==0) {
2911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -1;
2912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        contained=USET_SPAN_SIMPLE;
2914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Default first span condition for going forward with an uncomplemented set.
2917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    spanCondition=USET_SPAN_NOT_CONTAINED;
2918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(isComplement) {
2919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        spanCondition=invertSpanCondition(spanCondition, contained);
2920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // First span condition for span(), used to terminate the spanBack() iteration.
2923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    firstSpanCondition=spanCondition;
2924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // spanBack(): Its initial span condition is span()'s last span condition,
2926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // which is the opposite of span()'s first span condition
2927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // if we expect an even number of spans.
2928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // (The loop inverts spanCondition (expectCount-1) times
2929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // before the expectCount'th span() call.)
2930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // If we do not compare forward and backward directions, then we do not have an
2931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // expectCount and just start with firstSpanCondition.
2932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!isForward && (whichSpans&SPAN_FWD)!=0 && (expectCount&1)==0) {
2933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        spanCondition=invertSpanCondition(spanCondition, contained);
2934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    count=0;
2937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch(type) {
2938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0:
2939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 1:
2940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        start=0;
2941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(length<0) {
2942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            length=slen(s, isUTF16);
2943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(;;) {
2945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            start+= isUTF16 ? containsSpanUTF16(set, (const UChar *)s+start, length-start, spanCondition) :
2946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              containsSpanUTF8(set, (const char *)s+start, length-start, spanCondition);
2947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(count<limitsCapacity) {
2948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                limits[count]=start;
2949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++count;
2951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(start>=length) {
2952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            spanCondition=invertSpanCondition(spanCondition, contained);
2955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
2957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 2:
2958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 3:
2959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        start=0;
2960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(;;) {
2961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            start+= isUTF16 ? realSet.span((const UChar *)s+start, length>=0 ? length-start : length, spanCondition) :
2962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              realSet.spanUTF8((const char *)s+start, length>=0 ? length-start : length, spanCondition);
2963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(count<limitsCapacity) {
2964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                limits[count]=start;
2965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++count;
2967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(length>=0 ? start>=length :
2968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                           isUTF16 ? ((const UChar *)s)[start]==0 :
2969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     ((const char *)s)[start]==0
2970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ) {
2971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            spanCondition=invertSpanCondition(spanCondition, contained);
2974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
2976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 4:
2977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 5:
2978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(length<0) {
2979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            length=slen(s, isUTF16);
2980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(;;) {
2982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++count;
2983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(count<=limitsCapacity) {
2984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                limits[limitsCapacity-count]=length;
2985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            length= isUTF16 ? containsSpanBackUTF16(set, (const UChar *)s, length, spanCondition) :
2987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              containsSpanBackUTF8(set, (const char *)s, length, spanCondition);
2988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(length==0 && spanCondition==firstSpanCondition) {
2989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            spanCondition=invertSpanCondition(spanCondition, contained);
2992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(count<limitsCapacity) {
2994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            memmove(limits, limits+(limitsCapacity-count), count*4);
2995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
2997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 6:
2998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 7:
2999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(;;) {
3000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++count;
3001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(count<=limitsCapacity) {
3002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                limits[limitsCapacity-count]= length >=0 ? length : slen(s, isUTF16);
3003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Note: Length<0 is tested only for the first spanBack().
3005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // If we wanted to keep length<0 for all spanBack()s, we would have to
3006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // temporarily modify the string by placing a NUL where the previous spanBack() stopped.
3007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            length= isUTF16 ? realSet.spanBack((const UChar *)s, length, spanCondition) :
3008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              realSet.spanBackUTF8((const char *)s, length, spanCondition);
3009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(length==0 && spanCondition==firstSpanCondition) {
3010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
3011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            spanCondition=invertSpanCondition(spanCondition, contained);
3013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(count<limitsCapacity) {
3015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            memmove(limits, limits+(limitsCapacity-count), count*4);
3016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
3018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
3019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        typeName="";
3020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -1;
3021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return count;
3024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// sets to be tested; odd index=isComplement
3027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum {
3028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SLOW,
3029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SLOW_NOT,
3030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    FAST,
3031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    FAST_NOT,
3032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SET_COUNT
3033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
3034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char *const setNames[SET_COUNT]={
3036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    "slow",
3037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    "slow.not",
3038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    "fast",
3039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    "fast.not"
3040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
3041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
3043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Verify that we get the same results whether we look at text with contains(),
3044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * span() or spanBack(), using unfrozen or frozen versions of the set,
3045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and using the set or its complement (switching the spanConditions accordingly).
3046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The latter verifies that
3047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *   set.span(spanCondition) == set.complement().span(!spanCondition).
3048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
3049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The expectLimits[] are either provided by the caller (with expectCount>=0)
3050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * or returned to the caller (with an input expectCount<0).
3051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
3052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpan(const UnicodeSetWithStrings *sets[4],
3053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const void *s, int32_t length, UBool isUTF16,
3054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              uint32_t whichSpans,
3055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              int32_t expectLimits[], int32_t &expectCount,
3056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const char *testName, int32_t index) {
3057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t limits[500];
3058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t limitsCount;
3059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int i, j;
3060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *typeName;
3062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int type;
3063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i=0; i<SET_COUNT; ++i) {
3065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if((i&1)==0) {
3066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Even-numbered sets are original, uncomplemented sets.
3067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if((whichSpans&SPAN_SET)==0) {
3068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
3069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
3071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Odd-numbered sets are complemented.
3072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if((whichSpans&SPAN_COMPLEMENT)==0) {
3073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
3074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(type=0;; ++type) {
3077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            limitsCount=getSpans(*sets[i], (UBool)(i&1),
3078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 s, length, isUTF16,
3079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 whichSpans,
3080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 type, typeName,
3081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 limits, LENGTHOF(limits), expectCount);
3082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(typeName[0]==0) {
3083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break; // All types tried.
3084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(limitsCount<0) {
3086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue; // Span option filtered out.
3087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(expectCount<0) {
3089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                expectCount=limitsCount;
3090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(limitsCount>LENGTHOF(limits)) {
3091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    errln("FAIL: %s[0x%lx].%s.%s span count=%ld > %ld capacity - too many spans",
3092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)LENGTHOF(limits));
3093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return;
3094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                memcpy(expectLimits, limits, limitsCount*4);
3096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(limitsCount!=expectCount) {
3097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln("FAIL: %s[0x%lx].%s.%s span count=%ld != %ld",
3098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)expectCount);
3099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
3100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                for(j=0; j<limitsCount; ++j) {
3101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(limits[j]!=expectLimits[j]) {
3102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        errln("FAIL: %s[0x%lx].%s.%s span count=%ld limits[%d]=%ld != %ld",
3103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              testName, (long)index, setNames[i], typeName, (long)limitsCount,
3104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              j, (long)limits[j], (long)expectLimits[j]);
3105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
3106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
3107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Compare span() with containsAll()/containsNone(),
3113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // but only if we have expectLimits[] from the uncomplemented set.
3114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(isUTF16 && (whichSpans&SPAN_SET)!=0) {
3115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar *s16=(const UChar *)s;
3116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString string;
3117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t prev=0, limit, length;
3118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(i=0; i<expectCount; ++i) {
3119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            limit=expectLimits[i];
3120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            length=limit-prev;
3121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(length>0) {
3122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                string.setTo(FALSE, s16+prev, length);  // read-only alias
3123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(i&1) {
3124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(!sets[SLOW]->getSet().containsAll(string)) {
3125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        errln("FAIL: %s[0x%lx].%s.containsAll(%ld..%ld)==FALSE contradicts span()",
3126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              testName, (long)index, setNames[SLOW], (long)prev, (long)limit);
3127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return;
3128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
3129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(!sets[FAST]->getSet().containsAll(string)) {
3130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        errln("FAIL: %s[0x%lx].%s.containsAll(%ld..%ld)==FALSE contradicts span()",
3131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              testName, (long)index, setNames[FAST], (long)prev, (long)limit);
3132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return;
3133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
3134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
3135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(!sets[SLOW]->getSet().containsNone(string)) {
3136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        errln("FAIL: %s[0x%lx].%s.containsNone(%ld..%ld)==FALSE contradicts span()",
3137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              testName, (long)index, setNames[SLOW], (long)prev, (long)limit);
3138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return;
3139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
3140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(!sets[FAST]->getSet().containsNone(string)) {
3141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        errln("FAIL: %s[0x%lx].%s.containsNone(%ld..%ld)==FALSE contradicts span()",
3142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              testName, (long)index, setNames[FAST], (long)prev, (long)limit);
3143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return;
3144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
3145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            prev=limit;
3148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Specifically test either UTF-16 or UTF-8.
3153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpan(const UnicodeSetWithStrings *sets[4],
3154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const void *s, int32_t length, UBool isUTF16,
3155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              uint32_t whichSpans,
3156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const char *testName, int32_t index) {
3157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t expectLimits[500];
3158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t expectCount=-1;
3159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSpan(sets, s, length, isUTF16, whichSpans, expectLimits, expectCount, testName, index);
3160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool stringContainsUnpairedSurrogate(const UChar *s, int32_t length) {
3163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar c, c2;
3164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length>=0) {
3166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while(length>0) {
3167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=*s++;
3168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            --length;
3169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(0xd800<=c && c<0xe000) {
3170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(c>=0xdc00 || length==0 || !U16_IS_TRAIL(c2=*s++)) {
3171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return TRUE;
3172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                --length;
3174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
3177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((c=*s++)!=0) {
3178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(0xd800<=c && c<0xe000) {
3179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(c>=0xdc00 || !U16_IS_TRAIL(c2=*s++)) {
3180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return TRUE;
3181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return FALSE;
3186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test both UTF-16 and UTF-8 versions of span() etc. on the same sets and text,
3189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// unless either UTF is turned off in whichSpans.
3190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Testing UTF-16 and UTF-8 together requires that surrogate code points
3191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// have the same contains(c) value as U+FFFD.
3192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpanBothUTFs(const UnicodeSetWithStrings *sets[4],
3193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                      const UChar *s16, int32_t length16,
3194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                      uint32_t whichSpans,
3195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                      const char *testName, int32_t index) {
3196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t expectLimits[500];
3197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t expectCount;
3198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectCount=-1;  // Get expectLimits[] from testSpan().
3200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((whichSpans&SPAN_UTF16)!=0) {
3202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        testSpan(sets, s16, length16, TRUE, whichSpans, expectLimits, expectCount, testName, index);
3203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((whichSpans&SPAN_UTF8)==0) {
3205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
3206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Convert s16[] and expectLimits[] to UTF-8.
3209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t s8[3000];
3210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t offsets[3000];
3211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *s16Limit=s16+length16;
3213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *t=(char *)s8;
3214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *tLimit=t+sizeof(s8);
3215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t *o=offsets;
3216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode errorCode=U_ZERO_ERROR;
3217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Convert with substitution: Turn unpaired surrogates into U+FFFD.
3219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_fromUnicode(openUTF8Converter(), &t, tLimit, &s16, s16Limit, o, TRUE, &errorCode);
3220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
3221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: %s[0x%lx] ucnv_fromUnicode(to UTF-8) fails with %s",
3222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              testName, (long)index, u_errorName(errorCode));
3223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ucnv_resetFromUnicode(utf8Cnv);
3224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
3225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t length8=(int32_t)(t-(char *)s8);
3227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Convert expectLimits[].
3229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i, j, expect;
3230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i=j=0; i<expectCount; ++i) {
3231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expect=expectLimits[i];
3232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(expect==length16) {
3233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            expectLimits[i]=length8;
3234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
3235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while(offsets[j]<expect) {
3236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++j;
3237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            expectLimits[i]=j;
3239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSpan(sets, s8, length8, FALSE, whichSpans, expectLimits, expectCount, testName, index);
3243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32 nextCodePoint(UChar32 c) {
3246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Skip some large and boring ranges.
3247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch(c) {
3248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0x3441:
3249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0x4d7f;
3250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0x5100:
3251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0x9f00;
3252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0xb040:
3253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0xd780;
3254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0xe041:
3255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0xf8fe;
3256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0x10100:
3257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0x20000;
3258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0x20041:
3259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0xe0000;
3260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0xe0101:
3261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0x10fffd;
3262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
3263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return c+1;
3264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Verify that all implementations represent the same set.
3268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpanContents(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) {
3269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // contains(U+FFFD) is inconsistent with contains(some surrogates),
3270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // or the set contains strings with unpaired surrogates which don't translate to valid UTF-8:
3271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Skip the UTF-8 part of the test - if the string contains surrogates -
3272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // because it is likely to produce a different result.
3273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool inconsistentSurrogates=
3274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            (!(sets[0]->getSet().contains(0xfffd) ?
3275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru               sets[0]->getSet().contains(0xd800, 0xdfff) :
3276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru               sets[0]->getSet().containsNone(0xd800, 0xdfff)) ||
3277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             sets[0]->hasStringsWithSurrogates());
3278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar s[1000];
3280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t length=0;
3281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t localWhichSpans;
3282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 c, first;
3284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(first=c=0;; c=nextCodePoint(c)) {
3285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(c>0x10ffff || length>(LENGTHOF(s)-U16_MAX_LENGTH)) {
3286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            localWhichSpans=whichSpans;
3287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(stringContainsUnpairedSurrogate(s, length) && inconsistentSurrogates) {
3288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                localWhichSpans&=~SPAN_UTF8;
3289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            testSpanBothUTFs(sets, s, length, localWhichSpans, testName, first);
3291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(c>0x10ffff) {
3292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
3293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            length=0;
3295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            first=c;
3296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U16_APPEND_UNSAFE(s, length, c);
3298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test with a particular, interesting string.
3302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Specify length and try NUL-termination.
3303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpanUTF16String(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) {
3304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const UChar s[]={
3305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0x61, 0x62, 0x20,                       // Latin, space
3306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0x3b1, 0x3b2, 0x3b3,                    // Greek
3307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0xd900,                                 // lead surrogate
3308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0x3000, 0x30ab, 0x30ad,                 // wide space, Katakana
3309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0xdc05,                                 // trail surrogate
3310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0xa0, 0xac00, 0xd7a3,                   // nbsp, Hangul
3311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0xd900, 0xdc05,                         // unassigned supplementary
3312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0xd840, 0xdfff, 0xd860, 0xdffe,         // Han supplementary
3313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0xd7a4, 0xdc05, 0xd900, 0x2028,         // unassigned, surrogates in wrong order, LS
3314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0                                       // NUL
3315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
3316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((whichSpans&SPAN_UTF16)==0) {
3318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
3319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSpan(sets, s, -1, TRUE, (whichSpans&~SPAN_UTF8), testName, 0);
3321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSpan(sets, s, LENGTHOF(s)-1, TRUE, (whichSpans&~SPAN_UTF8), testName, 1);
3322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpanUTF8String(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) {
3325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char s[]={
3326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abc"                                   // Latin
3327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* trail byte in lead position */
3329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\x80"
3330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        " "                                     // space
3332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* truncated multi-byte sequences */
3334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xd0"
3335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xe0"
3336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xe1"
3337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xed"
3338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xee"
3339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf0"
3340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf1"
3341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf4"
3342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf8"
3343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xfc"
3344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xCE\xB1\xCE\xB2\xCE\xB3"              // Greek
3346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* trail byte in lead position */
3348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\x80"
3349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xe0\x80"
3351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xe0\xa0"
3352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xe1\x80"
3353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xed\x80"
3354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xed\xa0"
3355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xee\x80"
3356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf0\x80"
3357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf0\x90"
3358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf1\x80"
3359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf4\x80"
3360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf4\x90"
3361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf8\x80"
3362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xfc\x80"
3363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xE3\x80\x80\xE3\x82\xAB\xE3\x82\xAD"  // wide space, Katakana
3365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* trail byte in lead position */
3367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\x80"
3368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf0\x80\x80"
3370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf0\x90\x80"
3371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf1\x80\x80"
3372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf4\x80\x80"
3373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf4\x90\x80"
3374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf8\x80\x80"
3375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xfc\x80\x80"
3376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xC2\xA0\xEA\xB0\x80\xED\x9E\xA3"      // nbsp, Hangul
3378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* trail byte in lead position */
3380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\x80"
3381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf8\x80\x80\x80"
3383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xfc\x80\x80\x80"
3384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xF1\x90\x80\x85"                      // unassigned supplementary
3386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* trail byte in lead position */
3388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\x80"
3389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xfc\x80\x80\x80\x80"
3391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xF0\xA0\x8F\xBF\xF0\xA8\x8F\xBE"      // Han supplementary
3393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* trail byte in lead position */
3395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\x80"
3396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* complete sequences but non-shortest forms or out of range etc. */
3398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xc0\x80"
3399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xe0\x80\x80"
3400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xed\xa0\x80"
3401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf0\x80\x80\x80"
3402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf4\x90\x80\x80"
3403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf8\x80\x80\x80\x80"
3404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xfc\x80\x80\x80\x80\x80"
3405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xfe"
3406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xff"
3407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* trail byte in lead position */
3409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\x80"
3410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xED\x9E\xA4\xE2\x80\xA8"              // unassigned, LS, NUL-terminated
3412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
3413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((whichSpans&SPAN_UTF8)==0) {
3415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
3416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSpan(sets, s, -1, FALSE, (whichSpans&~SPAN_UTF16), testName, 0);
3418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSpan(sets, s, LENGTHOF(s)-1, FALSE, (whichSpans&~SPAN_UTF16), testName, 1);
3419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Take a set of span options and multiply them so that
3422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// each portion only has one of the options a, b and c.
3423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// If b==0, then the set of options is just modified with mask and a.
3424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// If b!=0 and c==0, then the set of options is just modified with mask, a and b.
3425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
3426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruaddAlternative(uint32_t whichSpans[], int32_t whichSpansCount,
3427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru               uint32_t mask, uint32_t a, uint32_t b, uint32_t c) {
3428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t s;
3429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
3430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i=0; i<whichSpansCount; ++i) {
3432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        s=whichSpans[i]&mask;
3433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        whichSpans[i]=s|a;
3434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(b!=0) {
3435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            whichSpans[whichSpansCount+i]=s|b;
3436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(c!=0) {
3437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                whichSpans[2*whichSpansCount+i]=s|c;
3438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return b==0 ? whichSpansCount : c==0 ? 2*whichSpansCount : 3*whichSpansCount;
3442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _63_a "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _64_a "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _63_b "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
3447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _64_b "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
3448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestSpan() {
3450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // "[...]" is a UnicodeSet pattern.
3451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // "*" performs tests on all Unicode code points and on a selection of
3452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   malformed UTF-8/16 strings.
3453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // "-options" limits the scope of testing for the current set.
3454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   By default, the test verifies that equivalent boundaries are found
3455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   for UTF-16 and UTF-8, going forward and backward,
3456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   alternating USET_SPAN_NOT_CONTAINED with
3457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   either USET_SPAN_CONTAINED or USET_SPAN_SIMPLE.
3458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   Single-character options:
3459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     8 -- UTF-16 and UTF-8 boundaries may differ.
3460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          Cause: contains(U+FFFD) is inconsistent with contains(some surrogates),
3461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          or the set contains strings with unpaired surrogates
3462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          which do not translate to valid UTF-8.
3463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     c -- set.span() and set.complement().span() boundaries may differ.
3464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          Cause: Set strings are not complemented.
3465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     b -- span() and spanBack() boundaries may differ.
3466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          Cause: Strings in the set overlap, and spanBack(USET_SPAN_CONTAINED)
3467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          and spanBack(USET_SPAN_SIMPLE) are defined to
3468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          match with non-overlapping substrings.
3469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          For example, with a set containing "ab" and "ba",
3470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          span() of "aba" yields boundaries { 0, 2, 3 }
3471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          because the initial "ab" matches from 0 to 2,
3472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          while spanBack() yields boundaries { 0, 1, 3 }
3473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          because the final "ba" matches from 1 to 3.
3474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     l -- USET_SPAN_CONTAINED and USET_SPAN_SIMPLE boundaries may differ.
3475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          Cause: Strings in the set overlap, and a longer match may
3476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          require a sequence including non-longest substrings.
3477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          For example, with a set containing "ab", "abc" and "cd",
3478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          span(contained) of "abcd" spans the entire string
3479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          but span(longest match) only spans the first 3 characters.
3480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   Each "-options" first resets all options and then applies the specified options.
3481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   A "-" without options resets the options.
3482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   The options are also reset for each new set.
3483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Other strings will be spanned.
3484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char *const testdata[]={
3485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:ID_Continue:]",
3486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "*",
3487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:White_Space:]",
3488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "*",
3489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[]",
3490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "*",
3491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u0000-\\U0010FFFF]",
3492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "*",
3493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u0000\\u0080\\u0800\\U00010000]",
3494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "*",
3495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u007F\\u07FF\\uFFFF\\U0010FFFF]",
3496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "*",
3497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[[[:ID_Continue:]-[\\u30ab\\u30ad]]{\\u3000\\u30ab}{\\u3000\\u30ab\\u30ad}]",
3498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-c",
3499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "*",
3500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[[[:ID_Continue:]-[\\u30ab\\u30ad]]{\\u30ab\\u30ad}{\\u3000\\u30ab\\u30ad}]",
3501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-c",
3502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "*",
3503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Overlapping strings cause overlapping attempts to match.
3505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[x{xy}{xya}{axy}{ax}]",
3506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-cl",
3507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // More repetitions of "xya" would take too long with the recursive
3509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // reference implementation.
3510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // containsAll()=FALSE
3511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // test_string 0x14
3512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"
3513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxya"  // set.complement().span(longest match) will stop here.
3514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"            // set.complement().span(contained) will stop between the two 'x'es.
3515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxya"
3516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"
3517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxya"  // span() ends here.
3518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "aaa",
3519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // containsAll()=TRUE
3521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // test_string 0x15
3522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"
3523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxya"
3524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"
3525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxya"
3526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"
3527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxy",
3528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-bc",
3530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // test_string 0x17
3531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "byayaxya",  // span() -> { 4, 7, 8 }  spanBack() -> { 5, 8 }
3532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-c",
3533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "byayaxy",   // span() -> { 4, 7 }     complement.span() -> { 7 }
3534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "byayax",    // span() -> { 4, 6 }     complement.span() -> { 6 }
3535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-",
3536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "byaya",     // span() -> { 5 }
3537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "byay",      // span() -> { 4 }
3538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "bya",       // span() -> { 3 }
3539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // span(longest match) will not span the whole string.
3541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[a{ab}{bc}]",
3542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-cl",
3543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // test_string 0x21
3544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abc",
3545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[a{ab}{abc}{cd}]",
3547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-cl",
3548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "acdabcdabccd",
3549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // spanBack(longest match) will not span the whole string.
3551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[c{ab}{bc}]",
3552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-cl",
3553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abc",
3554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[d{cd}{bcd}{ab}]",
3556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-cl",
3557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abbcdabcdabd",
3558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Test with non-ASCII set strings - test proper handling of surrogate pairs
3560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // and UTF-8 trail bytes.
3561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Copies of above test sets and strings, but transliterated to have
3562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // different code points with similar trail units.
3563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Previous: a      b         c            d
3564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Unicode:  042B   30AB      200AB        204AB
3565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // UTF-16:   042B   30AB      D840 DCAB    D841 DCAB
3566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // UTF-8:    D0 AB  E3 82 AB  F0 A0 82 AB  F0 A0 92 AB
3567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u042B{\\u042B\\u30AB}{\\u042B\\u30AB\\U000200AB}{\\U000200AB\\U000204AB}]",
3568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-cl",
3569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u042B\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000200AB\\U000204AB",
3570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\U000204AB{\\U000200AB\\U000204AB}{\\u30AB\\U000200AB\\U000204AB}{\\u042B\\u30AB}]",
3572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-cl",
3573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u042B\\u30AB\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000204AB",
3574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Stress bookkeeping and recursion.
3576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // The following strings are barely doable with the recursive
3577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // reference implementation.
3578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // The not-contained character at the end prevents an early exit from the span().
3579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[b{bb}]",
3580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-c",
3581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // test_string 0x33
3582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "bbbbbbbbbbbbbbbbbbbbbbbb-",
3583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // On complement sets, span() and spanBack() get different results
3584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // because b is not in the complement set and there is an odd number of b's
3585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // in the test string.
3586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-bc",
3587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "bbbbbbbbbbbbbbbbbbbbbbbbb-",
3588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Test with set strings with an initial or final code point span
3590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // longer than 254.
3591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[a{" _64_a _64_a _64_a _64_a "b}"
3592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          "{a" _64_b _64_b _64_b _64_b "}]",
3593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-c",
3594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        _64_a _64_a _64_a _63_a "b",
3595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        _64_a _64_a _64_a _64_a "b",
3596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        _64_a _64_a _64_a _64_a "aaaabbbb",
3597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "a" _64_b _64_b _64_b _63_b,
3598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "a" _64_b _64_b _64_b _64_b,
3599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "aaaabbbb" _64_b _64_b _64_b _64_b,
3600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Test with strings containing unpaired surrogates.
3602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // They are not representable in UTF-8, and a leading trail surrogate
3603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // and a trailing lead surrogate must not match in the middle of a proper surrogate pair.
3604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // U+20001 == \\uD840\\uDC01
3605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // U+20400 == \\uD841\\uDC00
3606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[a\\U00020001\\U00020400{ab}{b\\uD840}{\\uDC00a}]",
3607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-8cl",
3608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "aaab\\U00020001ba\\U00020400aba\\uD840ab\\uD840\\U00020000b\\U00020000a\\U00020000\\uDC00a\\uDC00babbb"
3609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
3610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t whichSpans[96]={ SPAN_ALL };
3611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t whichSpansCount=1;
3612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *sets[SET_COUNT]={ NULL };
3614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSetWithStrings *sets_with_str[SET_COUNT]={ NULL };
3615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char testName[1024];
3617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *testNameLimit=testName;
3618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i, j;
3620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i=0; i<LENGTHOF(testdata); ++i) {
3621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const char *s=testdata[i];
3622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(s[0]=='[') {
3623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Create new test sets from this pattern.
3624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for(j=0; j<SET_COUNT; ++j) {
3625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delete sets_with_str[j];
3626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delete sets[j];
3627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UErrorCode errorCode=U_ZERO_ERROR;
3629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sets[SLOW]=new UnicodeSet(UnicodeString(s, -1, US_INV).unescape(), errorCode);
3630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(U_FAILURE(errorCode)) {
36316d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru                dataerrln("FAIL: Unable to create UnicodeSet(%s) - %s", s, u_errorName(errorCode));
3632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
3633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sets[SLOW_NOT]=new UnicodeSet(*sets[SLOW]);
3635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sets[SLOW_NOT]->complement();
3636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Intermediate set: Test cloning of a frozen set.
3637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeSet *fast=new UnicodeSet(*sets[SLOW]);
3638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fast->freeze();
3639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sets[FAST]=(UnicodeSet *)fast->clone();
3640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete fast;
3641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeSet *fastNot=new UnicodeSet(*sets[SLOW_NOT]);
3642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fastNot->freeze();
3643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sets[FAST_NOT]=(UnicodeSet *)fastNot->clone();
3644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete fastNot;
3645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for(j=0; j<SET_COUNT; ++j) {
3647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                sets_with_str[j]=new UnicodeSetWithStrings(*sets[j]);
3648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            strcpy(testName, s);
3651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            testNameLimit=strchr(testName, 0);
3652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *testNameLimit++=':';
3653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *testNameLimit=0;
3654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            whichSpans[0]=SPAN_ALL;
3656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            whichSpansCount=1;
3657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(s[0]=='-') {
3658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            whichSpans[0]=SPAN_ALL;
3659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            whichSpansCount=1;
3660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while(*++s!=0) {
3662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                switch(*s) {
3663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                case 'c':
3664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    whichSpansCount=addAlternative(whichSpans, whichSpansCount,
3665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   ~SPAN_POLARITY,
3666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_SET,
3667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_COMPLEMENT,
3668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   0);
3669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
3670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                case 'b':
3671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    whichSpansCount=addAlternative(whichSpans, whichSpansCount,
3672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   ~SPAN_DIRS,
3673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_FWD,
3674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_BACK,
3675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   0);
3676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
3677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                case 'l':
3678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // test USET_SPAN_CONTAINED FWD & BACK, and separately
3679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // USET_SPAN_SIMPLE only FWD, and separately
3680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // USET_SPAN_SIMPLE only BACK
3681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    whichSpansCount=addAlternative(whichSpans, whichSpansCount,
3682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   ~(SPAN_DIRS|SPAN_CONDITION),
3683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_DIRS|SPAN_CONTAINED,
3684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_FWD|SPAN_SIMPLE,
3685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_BACK|SPAN_SIMPLE);
3686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
3687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                case '8':
3688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    whichSpansCount=addAlternative(whichSpans, whichSpansCount,
3689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   ~SPAN_UTFS,
3690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_UTF16,
3691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_UTF8,
3692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   0);
3693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
3694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                default:
3695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    errln("FAIL: unrecognized span set option in \"%s\"", testdata[i]);
3696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
3697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(0==strcmp(s, "*")) {
3700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            strcpy(testNameLimit, "bad_string");
3701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for(j=0; j<whichSpansCount; ++j) {
3702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(whichSpansCount>1) {
3703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    sprintf(testNameLimit+10 /* strlen("bad_string") */,
3704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            "%%0x%3x",
3705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            whichSpans[j]);
3706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                testSpanUTF16String(sets_with_str, whichSpans[j], testName);
3708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                testSpanUTF8String(sets_with_str, whichSpans[j], testName);
3709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            strcpy(testNameLimit, "contents");
3712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for(j=0; j<whichSpansCount; ++j) {
3713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(whichSpansCount>1) {
3714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    sprintf(testNameLimit+8 /* strlen("contents") */,
3715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            "%%0x%3x",
3716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            whichSpans[j]);
3717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                testSpanContents(sets_with_str, whichSpans[j], testName);
3719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
3721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString string=UnicodeString(s, -1, US_INV).unescape();
3722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            strcpy(testNameLimit, "test_string");
3723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for(j=0; j<whichSpansCount; ++j) {
3724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(whichSpansCount>1) {
3725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    sprintf(testNameLimit+11 /* strlen("test_string") */,
3726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            "%%0x%3x",
3727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            whichSpans[j]);
3728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                testSpanBothUTFs(sets_with_str, string.getBuffer(), string.length(), whichSpans[j], testName, i);
3730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(j=0; j<SET_COUNT; ++j) {
3734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete sets_with_str[j];
3735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete sets[j];
3736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test select patterns and strings, and test USET_SPAN_SIMPLE.
3740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestStringSpan() {
3741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char *pattern="[x{xy}{xya}{axy}{ax}]";
3742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char *const string=
3743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"
3744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxya"
3745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"
3746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxya"
3747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"
3748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxy"
3749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "aaaa";
3750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode errorCode=U_ZERO_ERROR;
3752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pattern16=UnicodeString(pattern, -1, US_INV);
3753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set(pattern16, errorCode);
3754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
3755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode));
3756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
3757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString string16=UnicodeString(string, -1, US_INV).unescape();
3760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(set.containsAll(string16)) {
3762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet(%s).containsAll(%s) should be FALSE", pattern, string);
3763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Remove trailing "aaaa".
3766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    string16.truncate(string16.length()-4);
3767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!set.containsAll(string16)) {
3768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet(%s).containsAll(%s[:-4]) should be TRUE", pattern, string);
3769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    string16=UNICODE_STRING_SIMPLE("byayaxya");
3772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *s16=string16.getBuffer();
3773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t length16=string16.length();
3774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if( set.span(s16, 8, USET_SPAN_NOT_CONTAINED)!=4 ||
3775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.span(s16, 7, USET_SPAN_NOT_CONTAINED)!=4 ||
3776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.span(s16, 6, USET_SPAN_NOT_CONTAINED)!=4 ||
3777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.span(s16, 5, USET_SPAN_NOT_CONTAINED)!=5 ||
3778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.span(s16, 4, USET_SPAN_NOT_CONTAINED)!=4 ||
3779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.span(s16, 3, USET_SPAN_NOT_CONTAINED)!=3
3780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
3781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet(%s).span(while not) returns the wrong value", pattern);
3782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pattern="[a{ab}{abc}{cd}]";
3785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pattern16=UnicodeString(pattern, -1, US_INV);
3786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern(pattern16, errorCode);
3787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
3788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode));
3789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
3790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    string16=UNICODE_STRING_SIMPLE("acdabcdabccd");
3792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s16=string16.getBuffer();
3793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    length16=string16.length();
3794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if( set.span(s16, 12, USET_SPAN_CONTAINED)!=12 ||
3795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.span(s16, 12, USET_SPAN_SIMPLE)!=6 ||
3796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.span(s16+7, 5, USET_SPAN_SIMPLE)!=5
3797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
3798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet(%s).span(while longest match) returns the wrong value", pattern);
3799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pattern="[d{cd}{bcd}{ab}]";
3802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pattern16=UnicodeString(pattern, -1, US_INV);
3803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern(pattern16, errorCode).freeze();
3804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
3805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode));
3806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
3807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    string16=UNICODE_STRING_SIMPLE("abbcdabcdabd");
3809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s16=string16.getBuffer();
3810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    length16=string16.length();
3811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if( set.spanBack(s16, 12, USET_SPAN_CONTAINED)!=0 ||
3812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.spanBack(s16, 12, USET_SPAN_SIMPLE)!=6 ||
3813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.spanBack(s16, 5, USET_SPAN_SIMPLE)!=0
3814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
3815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet(%s).spanBack(while longest match) returns the wrong value", pattern);
3816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3818