usettest.cpp revision c69afcec261fc345fda8daf46f0ea6b4351dc777
1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru********************************************************************************
3c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru*   Copyright (C) 1999-2008 International Business Machines Corporation and
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   others. All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru********************************************************************************
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Date        Name        Description
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   10/20/99    alan        Creation.
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   03/22/2000  Madhu       Added additional tests
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru********************************************************************************
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h>
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h>
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "usettest.h"
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h"
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h"
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h"
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/usetiter.h"
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h"
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/parsepos.h"
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/symtable.h"
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uversion.h"
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "hash.h"
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    errln("fail in file \"%s\", line %d: \"%s\"", __FILE__, __LINE__, \
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    u_errorName(status));}}
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define TEST_ASSERT(expr) {if (!(expr)) { \
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    errln("fail in file \"%s\", line %d", __FILE__, __LINE__); }}
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) {
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat;
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.toPattern(pat);
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return left + UnicodeSetTest::escape(pat);
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define CASE(id,test) case id:                          \
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          name = #test;                 \
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          if (exec) {                   \
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              logln(#test "---");       \
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              logln();                  \
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              test();                   \
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          }                             \
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          break
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::UnicodeSetTest() : utf8Cnv(NULL) {
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUConverter *UnicodeSetTest::openUTF8Converter() {
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(utf8Cnv==NULL) {
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode errorCode=U_ZERO_ERROR;
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        utf8Cnv=ucnv_open("UTF-8", &errorCode);
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return utf8Cnv;
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::~UnicodeSetTest() {
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_close(utf8Cnv);
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::runIndexedTest(int32_t index, UBool exec,
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               const char* &name, char* /*par*/) {
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // if (exec) logln((UnicodeString)"TestSuite UnicodeSetTest");
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch (index) {
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(0,TestPatterns);
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(1,TestAddRemove);
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(2,TestCategories);
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(3,TestCloneEqualHash);
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(4,TestMinimalRep);
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(5,TestAPI);
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(6,TestScriptSet);
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(7,TestPropertySet);
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(8,TestClone);
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(9,TestExhaustive);
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(10,TestToPattern);
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(11,TestIndexOf);
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(12,TestStrings);
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(13,Testj2268);
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(14,TestCloseOver);
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(15,TestEscapePattern);
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(16,TestInvalidCodePoint);
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(17,TestSymbolTable);
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(18,TestSurrogate);
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(19,TestPosixClasses);
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(20,TestIteration);
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(21,TestFreezable);
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(22,TestSpan);
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE(23,TestStringSpan);
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        default: name = ""; break;
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char NOT[] = "%%%%";
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * UVector was improperly copying contents
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This code will crash this is still true
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::Testj2268() {
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UnicodeSet t;
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  t.add(UnicodeString("abc"));
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UnicodeSet test(t);
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UnicodeString ustrPat;
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  test.toPattern(ustrPat, TRUE);
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test toPattern().
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestToPattern() {
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Test that toPattern() round trips with syntax characters and
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // whitespace.
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        static const char* OTHER_TOPATTERN_TESTS[] = {
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "[[:latin:]&[:greek:]]",
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "[[:latin:]-[:greek:]]",
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "[:nonspacing mark:]",
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            NULL
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        };
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (int32_t j=0; OTHER_TOPATTERN_TESTS[j]!=NULL; ++j) {
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ec = U_ZERO_ERROR;
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeSet s(OTHER_TOPATTERN_TESTS[j], ec);
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) {
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln((UnicodeString)"FAIL: bad pattern " + OTHER_TOPATTERN_TESTS[j]);
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            checkPat(OTHER_TOPATTERN_TESTS[j], s);
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (UChar32 i = 0; i <= 0x10FFFF; ++i) {
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if ((i <= 0xFF && !u_isalpha(i)) || u_isspace(i)) {
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // check various combinations to make sure they all work.
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (i != 0 && !toPatternAux(i, i)){
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (!toPatternAux(0, i)){
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (!toPatternAux(i, 0xFFFF)){
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Test pattern behavior of multicharacter strings.
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ec = U_ZERO_ERROR;
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet* s = new UnicodeSet("[a-z {aa} {ab}]", ec);
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // This loop isn't a loop.  It's here to make the compiler happy.
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // If you're curious, try removing it and changing the 'break'
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // statements (except for the last) to goto's.
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (;;) {
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) break;
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char* exp1[] = {"aa", "ab", NOT, "ac", NULL};
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            expectToPattern(*s, "[a-z{aa}{ab}]", exp1);
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s->add("ac");
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char* exp2[] = {"aa", "ab", "ac", NOT, "xy", NULL};
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            expectToPattern(*s, "[a-z{aa}{ab}{ac}]", exp2);
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\{l} {r\\}}]"), ec);
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) break;
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char* exp3[] = {"{l", "r}", NOT, "xy", NULL};
176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{r\\}}{\\{l}]"), exp3);
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s->add("[]");
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char* exp4[] = {"{l", "r}", "[]", NOT, "xy", NULL};
180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\[\\]}{r\\}}{\\{l}]"), exp4);
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\u4E01\\u4E02}{\\n\\r}]"), ec);
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) break;
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char* exp5[] = {"\\u4E01\\u4E02", "\n\r", NULL};
185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]"), exp5);
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // j2189
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s->clear();
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s->add(UnicodeString("abc", ""));
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s->add(UnicodeString("abc", ""));
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char* exp6[] = {"abc", NOT, "ab", NULL};
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            expectToPattern(*s, "[{abc}]", exp6);
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) errln("FAIL: pattern parse error");
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete s;
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // JB#3400: For 2 character ranges prefer [ab] to [a-b]
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet s;
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s.add((UChar)97, (UChar)98); // 'a', 'b'
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectToPattern(s, "[ab]", NULL);
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSetTest::toPatternAux(UChar32 start, UChar32 end) {
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // use Integer.toString because Utility.hex doesn't handle ints
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat = "";
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // TODO do these in hex
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //String source = "0x" + Integer.toString(start,16).toUpperCase();
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //if (start != end) source += "..0x" + Integer.toString(end,16).toUpperCase();
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString source;
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    source = source + (uint32_t)start;
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (start != end)
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        source = source + ".." + (uint32_t)end;
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet testSet;
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSet.add(start, end);
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return checkPat(source, testSet);
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSetTest::checkPat(const UnicodeString& source,
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               const UnicodeSet& testSet) {
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // What we want to make sure of is that a pattern generated
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // by toPattern(), with or without escaped unprintables, can
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // be passed back into the UnicodeSet constructor.
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat0;
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSet.toPattern(pat0, TRUE);
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!checkPat(source + " (escaped)", testSet, pat0)) return FALSE;
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //String pat1 = unescapeLeniently(pat0);
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //if (!checkPat(source + " (in code)", testSet, pat1)) return false;
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat2;
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSet.toPattern(pat2, FALSE);
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!checkPat(source, testSet, pat2)) return FALSE;
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //String pat3 = unescapeLeniently(pat2);
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // if (!checkPat(source + " (in code)", testSet, pat3)) return false;
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //logln(source + " => " + pat0 + ", " + pat1 + ", " + pat2 + ", " + pat3);
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    logln((UnicodeString)source + " => " + pat0 + ", " + pat2);
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSetTest::checkPat(const UnicodeString& source,
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               const UnicodeSet& testSet,
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               const UnicodeString& pat) {
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet testSet2(pat, ec);
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (testSet2 != testSet) {
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"Fail toPattern: " + source + " => " + pat);
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::TestPatterns(void) {
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set;
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, UnicodeString("[[a-m]&[d-z]&[k-y]]", ""),  "km");
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, UnicodeString("[[a-z]-[m-y]-[d-r]]", ""),  "aczz");
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, UnicodeString("[a\\-z]", ""),  "--aazz");
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, UnicodeString("[-az]", ""),  "--aazz");
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, UnicodeString("[az-]", ""),  "--aazz");
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, UnicodeString("[[[a-z]-[aeiou]i]]", ""), "bdfnptvz");
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Throw in a test of complement
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.complement();
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString exp;
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.append((UChar)0x0000).append("aeeoouu").append((UChar)(0x007a+1)).append((UChar)0xFFFF);
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, exp);
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::TestCategories(void) {
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char* pat = " [:Lu:] "; // Whitespace ok outside [:..:]
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set(pat, status);
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"Fail: Can't construct set with " + pat);
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectContainment(set, pat, "ABC", "abc");
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 i;
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t failures = 0;
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Make sure generation of L doesn't pollute cached Lu set
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // First generate L, then Lu
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern("[:L:]", status);
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<0x200; ++i) {
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool l = u_isalpha((UChar)i);
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (l != set.contains(i)) {
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL: L contains " + (unsigned short)i + " = " +
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  set.contains(i));
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (++failures == 10) break;
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern("[:Lu:]", status);
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<0x200; ++i) {
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool lu = (u_charType((UChar)i) == U_UPPERCASE_LETTER);
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (lu != set.contains(i)) {
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL: Lu contains " + (unsigned short)i + " = " +
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  set.contains(i));
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (++failures == 20) break;
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::TestCloneEqualHash(void) {
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // set1 and set2 used to be built with the obsolete constructor taking
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // UCharCategory values; replaced with pattern constructors
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // markus 20030502
321c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeSet *set1=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Lowercase Letter}"), status); //  :Ll: Letter, lowercase
322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeSet *set1a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Ll:]"), status); //  Letter, lowercase
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)){
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: Can't construst set with category->Ll");
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeSet *set2=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Decimal Number}"), status);   //Number, Decimal digit
328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeSet *set2a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Nd:]"), status);   //Number, Decimal digit
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)){
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: Can't construct set with category->Nd");
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (*set1 != *set1a) {
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: category constructor for Ll broken");
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (*set2 != *set2a) {
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: category constructor for Nd broken");
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete set1a;
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete set2a;
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    logln("Testing copy construction");
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *set1copy=new UnicodeSet(*set1);
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(*set1 != *set1copy || *set1 == *set2 ||
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        getPairs(*set1) != getPairs(*set1copy) ||
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set1->hashCode() != set1copy->hashCode()){
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL : Error in copy construction");
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    logln("Testing =operator");
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set1equal=*set1;
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set2equal=*set2;
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(set1equal != *set1 || set1equal != *set1copy || set2equal != *set2 ||
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set2equal == *set1 || set2equal == *set1copy || set2equal == set1equal){
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: Error in =operator");
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    logln("Testing clone()");
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *set1clone=(UnicodeSet*)set1->clone();
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *set2clone=(UnicodeSet*)set2->clone();
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(*set1clone != *set1 || *set1clone != *set1copy || *set1clone != set1equal ||
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *set2clone != *set2 || *set2clone == *set1copy || *set2clone != set2equal ||
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *set2clone == *set1 || *set2clone == set1equal || *set2clone == *set1clone){
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: Error in clone");
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    logln("Testing hashcode");
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(set1->hashCode() != set1equal.hashCode() || set1->hashCode() != set1clone->hashCode() ||
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set2->hashCode() != set2equal.hashCode() || set2->hashCode() != set2clone->hashCode() ||
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set1copy->hashCode() != set1equal.hashCode() || set1copy->hashCode() != set1clone->hashCode() ||
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set1->hashCode() == set2->hashCode()  || set1copy->hashCode() == set2->hashCode() ||
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set2->hashCode() == set1clone->hashCode() || set2->hashCode() == set1equal.hashCode() ){
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: Error in hashCode()");
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete set1;
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete set1copy;
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete set2;
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete set1clone;
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete set2clone;
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::TestAddRemove(void) {
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set; // Construct empty set
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.isEmpty() == TRUE, "set should be empty");
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 0, "size should be 0");
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.complement();
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 0x110000, "size should be 0x110000");
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.clear();
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0061, 0x007a);
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "az");
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.isEmpty() == FALSE, "set should not be empty");
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() != 0, "size should not be equal to 0");
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 26, "size should be equal to 26");
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.remove(0x006d, 0x0070);
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "alqz");
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 22, "size should be equal to 22");
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.remove(0x0065, 0x0067);
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "adhlqz");
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 19, "size should be equal to 19");
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.remove(0x0064, 0x0069);
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "acjlqz");
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 16, "size should be equal to 16");
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.remove(0x0063, 0x0072);
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "absz");
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 10, "size should be equal to 10");
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0066, 0x0071);
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "abfqsz");
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 22, "size should be equal to 22");
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.remove(0x0061, 0x0067);
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "hqsz");
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.remove(0x0061, 0x007a);
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "");
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.isEmpty() == TRUE, "set should be empty");
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 0, "size should be 0");
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0061);
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.isEmpty() == FALSE, "set should not be empty");
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 1, "size should not be equal to 1");
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0062);
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0063);
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "ac");
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 3, "size should not be equal to 3");
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0070);
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0071);
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "acpq");
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 5, "size should not be equal to 5");
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.clear();
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "");
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.isEmpty() == TRUE, "set should be empty");
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == 0, "size should be 0");
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Try removing an entire set from another set
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, "[c-x]", "cx");
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set2;
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set2, "[f-ky-za-bc[vw]]", "acfkvwyz");
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.removeAll(set2);
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "deluxx");
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Try adding an entire set to another set
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, "[jackiemclean]", "aacceein");
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set2, "[hitoshinamekatajamesanderson]", "aadehkmort");
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(set2);
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "aacehort");
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.containsAll(set2) == TRUE, "set should contain all the elements in set2");
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Try retaining an set of elements contained in another set (intersection)
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set3;
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set3, "[a-c]", "ac");
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.containsAll(set3) == FALSE, "set doesn't contain all the elements in set3");
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set3.remove(0x0062);
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set3, "aacc");
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.containsAll(set3) == TRUE, "set should contain all the elements in set3");
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.retainAll(set3);
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "aacc");
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() == set3.size(), "set.size() should be set3.size()");
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.containsAll(set3) == TRUE, "set should contain all the elements in set3");
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.clear();
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.size() != set3.size(), "set.size() != set3.size()");
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Test commutativity
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set, "[hitoshinamekatajamesanderson]", "aadehkmort");
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPattern(set2, "[jackiemclean]", "aacceein");
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(set2);
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "aacehort");
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    doAssert(set.containsAll(set2) == TRUE, "set should contain all the elements in set2");
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Make sure minimal representation is maintained.
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestMinimalRep() {
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // This is pretty thoroughly tested by checkCanonicalRep()
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // run against the exhaustive operation results.  Use the code
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // here for debugging specific spot problems.
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // 1 overlap against 2
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set("[h-km-q]", status);
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set2("[i-o]", status);
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(set2);
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "hq");
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // right
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern("[a-m]", status);
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set2.applyPattern("[e-o]", status);
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(set2);
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "ao");
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // left
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern("[e-o]", status);
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set2.applyPattern("[a-m]", status);
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(set2);
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "ao");
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // 1 overlap against 3
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern("[a-eg-mo-w]", status);
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set2.applyPattern("[d-q]", status);
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(set2);
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectPairs(set, "aw");
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestAPI() {
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // default ct
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set;
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!set.isEmpty() || set.getRangeCount() != 0) {
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL, set should be empty but isn't: " +
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              set);
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // clear(), isEmpty()
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0061);
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set.isEmpty()) {
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL, set shouldn't be empty but is: " +
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              set);
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.clear();
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!set.isEmpty()) {
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL, set should be empty but isn't: " +
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              set);
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // size()
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.clear();
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set.size() != 0) {
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL, size should be 0, but is " + set.size() +
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              ": " + set);
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0061);
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set.size() != 1) {
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL, size should be 1, but is " + set.size() +
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              ": " + set);
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add(0x0031, 0x0039);
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set.size() != 10) {
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL, size should be 10, but is " + set.size() +
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              ": " + set);
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // contains(first, last)
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.clear();
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern("[A-Y 1-8 b-d l-y]", status);
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i<set.getRangeCount(); ++i) {
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 a = set.getRangeStart(i);
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 b = set.getRangeEnd(i);
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!set.contains(a, b)) {
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL, should contain " + (unsigned short)a + '-' + (unsigned short)b +
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  " but doesn't: " + set);
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (set.contains((UChar32)(a-1), b)) {
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL, shouldn't contain " +
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  (unsigned short)(a-1) + '-' + (unsigned short)b +
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  " but does: " + set);
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (set.contains(a, (UChar32)(b+1))) {
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL, shouldn't contain " +
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  (unsigned short)a + '-' + (unsigned short)(b+1) +
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  " but does: " + set);
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Ported InversionList test.
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet a((UChar32)3,(UChar32)10);
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet b((UChar32)7,(UChar32)15);
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet c;
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    logln((UnicodeString)"a [3-10]: " + a);
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    logln((UnicodeString)"b [7-15]: " + b);
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    c = a;
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    c.addAll(b);
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet exp((UChar32)3,(UChar32)15);
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c == exp) {
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"c.set(a).add(b): " + c);
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: c.set(a).add(b) = " + c + ", expect " + exp);
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    c.complement();
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.set((UChar32)0, (UChar32)2);
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.add((UChar32)16, UnicodeSet::MAX_VALUE);
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c == exp) {
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"c.complement(): " + c);
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: c.complement() = " + c + ", expect " + exp);
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    c.complement();
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.set((UChar32)3, (UChar32)15);
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c == exp) {
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"c.complement(): " + c);
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: c.complement() = " + c + ", expect " + exp);
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    c = a;
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    c.complementAll(b);
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.set((UChar32)3,(UChar32)6);
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.add((UChar32)11,(UChar32) 15);
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c == exp) {
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"c.set(a).exclusiveOr(b): " + c);
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: c.set(a).exclusiveOr(b) = " + c + ", expect " + exp);
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp = c;
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(setToBits(c), c);
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c == exp) {
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"bitsToSet(setToBits(c)): " + c);
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: bitsToSet(setToBits(c)) = " + c + ", expect " + exp);
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Additional tests for coverage JB#2118
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::complement(class UnicodeString const &)
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::complementAll(class UnicodeString const &)
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::containsNone(class UnicodeSet const &)
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::containsNone(long,long)
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::containsSome(class UnicodeSet const &)
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::containsSome(long,long)
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::removeAll(class UnicodeString const &)
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::retain(long)
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::retainAll(class UnicodeString const &)
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSet::serialize(unsigned short *,long,enum UErrorCode &)
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //UnicodeSetIterator::getString(void)
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.clear();
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.complement("ab");
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.applyPattern("[{ab}]", status);
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set != exp) { errln("FAIL: complement(\"ab\")"); return; }
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSetIterator iset(set);
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!iset.next() || !iset.isString()) {
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSetIterator::next/isString");
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if (iset.getString() != "ab") {
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSetIterator::getString");
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.add((UChar32)0x61, (UChar32)0x7A);
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.complementAll("alan");
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.applyPattern("[{ab}b-kmo-z]", status);
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set != exp) { errln("FAIL: complementAll(\"alan\")"); return; }
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.applyPattern("[a-z]", status);
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); }
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); }
659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.applyPattern("[aln]", status);
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); }
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); }
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set.containsNone((UChar32)0x61, (UChar32)0x7A)) {
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: containsNone(UChar32, UChar32)");
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!set.containsSome((UChar32)0x61, (UChar32)0x7A)) {
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: containsSome(UChar32, UChar32)");
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!set.containsNone((UChar32)0x41, (UChar32)0x5A)) {
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: containsNone(UChar32, UChar32)");
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set.containsSome((UChar32)0x41, (UChar32)0x5A)) {
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: containsSome(UChar32, UChar32)");
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.removeAll("liu");
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.applyPattern("[{ab}b-hj-kmo-tv-z]", status);
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set != exp) { errln("FAIL: removeAll(\"liu\")"); return; }
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.retainAll("star");
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.applyPattern("[rst]", status);
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set != exp) { errln("FAIL: retainAll(\"star\")"); return; }
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.retain((UChar32)0x73);
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exp.applyPattern("[s]", status);
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL"); return; }
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set != exp) { errln("FAIL: retain('s')"); return; }
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint16_t buf[32];
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t slen = set.serialize(buf, sizeof(buf)/sizeof(buf[0]), status);
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) { errln("FAIL: serialize"); return; }
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (slen != 3 || buf[0] != 2 || buf[1] != 0x73 || buf[2] != 0x74) {
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: serialize");
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestIteration() {
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int i = 0;
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int outerLoop;
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // 6 code points, 3 ranges, 2 strings, 8 total elements
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   Iteration will access them in sorted order -  a, b, c, y, z, U0001abcd, "str1", "str2"
708c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeSet set(UNICODE_STRING_SIMPLE("[zabyc\\U0001abcd{str1}{str2}]"), ec);
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    TEST_ASSERT_SUCCESS(ec);
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSetIterator it(set);
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (outerLoop=0; outerLoop<3; outerLoop++) {
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Run the test multiple times, to check that iterator.reset() is working.
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i=0; i<10; i++) {
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UBool         nextv        = it.next();
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UBool         isString     = it.isString();
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t       codePoint    = it.getCodepoint();
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //int32_t       codePointEnd = it.getCodepointEnd();
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString s   = it.getString();
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            switch (i) {
721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 0:
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == TRUE);
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(isString == FALSE);
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(codePoint==0x61);
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(s == "a");
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 1:
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == TRUE);
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(isString == FALSE);
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(codePoint==0x62);
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(s == "b");
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 2:
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == TRUE);
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(isString == FALSE);
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(codePoint==0x63);
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(s == "c");
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 3:
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == TRUE);
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(isString == FALSE);
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(codePoint==0x79);
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(s == "y");
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 4:
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == TRUE);
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(isString == FALSE);
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(codePoint==0x7a);
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(s == "z");
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 5:
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == TRUE);
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(isString == FALSE);
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(codePoint==0x1abcd);
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(s == UnicodeString((UChar32)0x1abcd));
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 6:
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == TRUE);
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(isString == TRUE);
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(s == "str1");
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 7:
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == TRUE);
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(isString == TRUE);
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(s == "str2");
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 8:
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == FALSE);
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 9:
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                TEST_ASSERT(nextv == FALSE);
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        it.reset();  // prepare to run the iteration again.
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestStrings() {
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet* testList[] = {
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet::createFromAll("abc"),
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        new UnicodeSet("[a-c]", ec),
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        &(UnicodeSet::createFrom("ch")->add('a','z').add("ll")),
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        new UnicodeSet("[{ll}{ch}a-z]", ec),
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet::createFrom("ab}c"),
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        new UnicodeSet("[{ab\\}c}]", ec),
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        &((new UnicodeSet('a','z'))->add('A', 'Z').retain('M','m').complement('X')),
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]", ec),
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        NULL
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: couldn't construct test sets");
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; testList[i] != NULL; i+=2) {
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_SUCCESS(ec)) {
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString pat0, pat1;
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            testList[i]->toPattern(pat0, TRUE);
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            testList[i+1]->toPattern(pat1, TRUE);
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (*testList[i] == *testList[i+1]) {
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                logln((UnicodeString)"Ok: " + pat0 + " == " + pat1);
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                logln((UnicodeString)"FAIL: " + pat0 + " != " + pat1);
814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete testList[i];
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete testList[i+1];
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test the [:Latin:] syntax.
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestScriptSet() {
825c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    expectContainment(UNICODE_STRING_SIMPLE("[:Latin:]"), "aA", CharsToUnicodeString("\\u0391\\u03B1"));
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
827c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    expectContainment(UNICODE_STRING_SIMPLE("[:Greek:]"), CharsToUnicodeString("\\u0391\\u03B1"), "aA");
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Jitterbug 1423 */
830c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    expectContainment(UNICODE_STRING_SIMPLE("[[:Common:][:Inherited:]]"), CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA");
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test the [:Latin:] syntax.
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestPropertySet() {
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char* const DATA[] = {
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Pattern, Chars IN, Chars NOT in
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:Latin:]",
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "aA",
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0391\\u03B1",
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\p{Greek}]",
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0391\\u03B1",
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "aA",
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\P{ GENERAL Category = upper case letter }",
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abc",
851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ABC",
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Combining class: @since ICU 2.2
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Check both symbolic and numeric
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\p{ccc=Nukta}",
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0ABC",
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abc",
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\p{Canonical Combining Class = 11}",
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u05B1",
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u05B2",
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:c c c = iota subscript :]",
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0345",
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyz",
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Bidi class: @since ICU 2.2
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\p{bidiclass=lefttoright}",
869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abc",
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0671\\u0672",
871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Binary properties: @since ICU 2.2
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\p{ideographic}",
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u4E0A",
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "x",
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:math=false:]",
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "q)*(",
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // weiv: )(and * were removed from math in Unicode 4.0.1
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //"(*+)",
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "+<>^",
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // JB#1767 \N{}, \p{ASCII}
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:Ascii:]",
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abc\\u0000\\u007F",
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0080\\u4E00",
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\N{ latin small letter  a  }[:name= latin small letter z:]]",
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "az",
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "qrs",
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // JB#2015
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:any:]",
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "a\\U0010FFFF",
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "",
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:nv=0.5:]",
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u00BD\\u0F2A",
899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u00BC",
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // JB#2653: Age
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:Age=1.1:]",
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u03D6", // 1.1
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u03D8\\u03D9", // 3.2
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:Age=3.1:]",
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u1800\\u3400\\U0002f800",
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0220\\u034f\\u30ff\\u33ff\\ufe73\\U00010000\\U00050000",
909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // JB#2350: Case_Sensitive
911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:Case Sensitive:]",
912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "A\\u1FFC\\U00010410",
913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ";\\u00B4\\U00010500",
914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // JB#2832: C99-compatibility props
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:blank:]",
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        " \\u0009",
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "1-9A-Z",
919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:graph:]",
921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "19AZ",
922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        " \\u0003\\u0007\\u0009\\u000A\\u000D",
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:punct:]",
925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "!@#%&*()[]{}-_\\/;:,.?'\"",
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "09azAZ",
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:xdigit:]",
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "09afAF",
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "gG!",
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Regex compatibility test
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[-b]", // leading '-' is literal
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-b",
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ac",
936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[^-b]", // leading '-' is literal
938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ac",
939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-b",
940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[b-]", // trailing '-' is literal
942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-b",
943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ac",
944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[^b-]", // trailing '-' is literal
946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ac",
947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-b",
948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[a-b-]", // trailing '-' is literal
950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ab-",
951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "c=",
952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[[a-q]&[p-z]-]", // trailing '-' is literal
954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "pq-",
955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "or=",
956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\s|\\)|:|$|\\>]", // from regex tests
958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "s|):$>",
959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abc",
960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\uDC00cd]", // JB#2906: isolated trail at start
962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "cd\\uDC00",
963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ab\\uD800\\U00010000",
964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[ab\\uD800]", // JB#2906: isolated trail at start
966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ab\\uD800",
967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "cd\\uDC00\\U00010000",
968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[ab\\uD800cd]", // JB#2906: isolated lead in middle
970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abcd\\uD800",
971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ef\\uDC00\\U00010000",
972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[ab\\uDC00cd]", // JB#2906: isolated trail in middle
974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abcd\\uDC00",
975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "ef\\uD800\\U00010000",
976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:^lccc=0:]", // Lead canonical class
978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0300\\u0301",
979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abcd\\u00c0\\u00c5",
980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:^tccc=0:]", // Trail canonical class
982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0300\\u0301\\u00c0\\u00c5",
983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abcd",
984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class
986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0300\\u0301\\u00c0\\u00c5",
987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abcd",
988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now)
990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "",
991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abcd\\u0300\\u0301\\u00c0\\u00c5",
992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0F73\\u0F75\\u0F81",
995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abcd\\u0300\\u0301\\u00c0\\u00c5",
996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:Assigned:]",
998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD",
999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u0888\\uFDD3\\uFFFE\\U00050005"
1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]);
1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; i<DATA_LEN; i+=3) {
1005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        expectContainment(UnicodeString(DATA[i], -1, US_INV), CharsToUnicodeString(DATA[i+1]),
1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          CharsToUnicodeString(DATA[i+2]));
1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  * Test that Posix style character classes [:digit:], etc.
1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  *   have the Unicode definitions from TR 18.
1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  */
1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestPosixClasses() {
1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:alpha:]", status);
1018c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Alphabetic}"), status);
1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:lower:]", status);
1025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{lowercase}"), status);
1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:upper:]", status);
1032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Uppercase}"), status);
1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:punct:]", status);
1039c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=Punctuation}"), status);
1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:digit:]", status);
1046c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=DecimalNumber}"), status);
1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:xdigit:]", status);
1053c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{DecimalNumber}\\p{HexDigit}]"), status);
1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:alnum:]", status);
1060c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Alphabetic}\\p{DecimalNumber}]"), status);
1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:space:]", status);
1067c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Whitespace}"), status);
1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:blank:]", status);
1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1075c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]"),
1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            status);
1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:cntrl:]", status);
1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1084c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Control}"), status);
1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:graph:]", status);
1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1092c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]"), status);
1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet s1("[:print:]", status);
1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet s2(UNICODE_STRING_SIMPLE("[[:graph:][:blank:]-[\\p{Control}]]") ,status);
1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT_SUCCESS(status);
1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TEST_ASSERT(s1==s2);
1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test cloning of UnicodeSet.  For C++, we test the copy constructor.
1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestClone() {
1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet s("[abcxyz]", ec);
1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet t(s);
1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectContainment(t, "abc", "def");
1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test the indexOf() and charAt() methods.
1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestIndexOf() {
1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set("[a-cx-y3578]", ec);
1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet constructor");
1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; i<set.size(); ++i) {
1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c = set.charAt(i);
1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (set.indexOf(c) != i) {
1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("FAIL: charAt(%d) = %X => indexOf() => %d",
1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                i, c, set.indexOf(c));
1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 c = set.charAt(set.size());
1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c != -1) {
1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: charAt(<out of range>) = %X", c);
1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t j = set.indexOf((UChar32)0x71/*'q'*/);
1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (j != -1) {
1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: indexOf('q') = " + j);
1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test closure API.
1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestCloseOver() {
1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char CASE[] = {(char)USET_CASE_INSENSITIVE};
1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char CASE_MAPPINGS[] = {(char)USET_ADD_CASE_MAPPINGS};
1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char* DATA[] = {
1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // selector, input, output
1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE,
1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[aq\\u00DF{Bc}{bC}{Fi}]",
1154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        "[aAqQ\\u00DF\\u1E9E\\uFB01{ss}{bc}{fi}]",  // U+1E9E LATIN CAPITAL LETTER SHARP S is new in Unicode 5.1
1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE,
1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u01F1]", // 'DZ'
1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u01F1\\u01F2\\u01F3]",
1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE,
1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u1FB4]",
1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u1FB4{\\u03AC\\u03B9}]",
1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE,
1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[{F\\uFB01}]",
1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\uFB03{ffi}]",
1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, // make sure binary search finds limits
1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[a\\uFF3A]",
1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[aA\\uFF3A\\uFF5A]",
1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE,
1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[a-z]","[A-Za-z\\u017F\\u212A]",
1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE,
1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[abc]","[A-Ca-c]",
1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE,
1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[ABC]","[A-Ca-c]",
1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[i]", "[iI]",
1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\u0130]",          "[\\u0130{i\\u0307}]", // dotted I
1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[{i\\u0307}]",       "[\\u0130{i\\u0307}]", // i with dot
1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\u0131]",          "[\\u0131]", // dotless i
1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\u0390]",          "[\\u0390\\u1FD3{\\u03B9\\u0308\\u0301}]",
1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\u03c2]",          "[\\u03a3\\u03c2\\u03c3]", // sigmas
1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\u03f2]",          "[\\u03f2\\u03f9]", // lunate sigmas
1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\u03f7]",          "[\\u03f7\\u03f8]",
1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\u1fe3]",          "[\\u03b0\\u1fe3{\\u03c5\\u0308\\u0301}]",
1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\ufb05]",          "[\\ufb05\\ufb06{st}]",
1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[{st}]",             "[\\ufb05\\ufb06{st}]",
1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[\\U0001044F]",      "[\\U00010427\\U0001044F]",
1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[{a\\u02BE}]",       "[\\u1E9A{a\\u02BE}]", // first in sorted table
1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE, "[{\\u1f7c\\u03b9}]", "[\\u1ff2{\\u1f7c\\u03b9}]", // last in sorted table
1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE_MAPPINGS,
1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[aq\\u00DF{Bc}{bC}{Fi}]",
1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[aAqQ\\u00DF{ss}{Ss}{SS}{Bc}{BC}{bC}{bc}{FI}{Fi}{fi}]",
1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE_MAPPINGS,
1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u01F1]", // 'DZ'
1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u01F1\\u01F2\\u01F3]",
1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        CASE_MAPPINGS,
1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[a-z]",
1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[A-Za-z]",
1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        NULL
1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet s;
1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet t;
1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString buf;
1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; DATA[i]!=NULL; i+=3) {
1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t selector = DATA[i][0];
1225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString pat(DATA[i+1], -1, US_INV);
1226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString exp(DATA[i+2], -1, US_INV);
1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        s.applyPattern(pat, ec);
1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        s.closeOver(selector);
1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        t.applyPattern(exp, ec);
1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("FAIL: applyPattern failed");
1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (s == t) {
1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            logln((UnicodeString)"Ok: " + pat + ".closeOver(" + selector + ") => " + exp);
1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL: " + pat + ".closeOver(" + selector + ") => " +
1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  s.toPattern(buf, TRUE) + ", expected " + exp);
1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0
1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Unused test code.
1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * This was used to compare the old implementation (using USET_CASE)
1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * with the new one (using 0x100 temporarily)
1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * while transitioning from hardcoded case closure tables in uniset.cpp
1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * (moved to uniset_props.cpp) to building the data by gencase into ucase.icu.
1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * and using ucase.c functions for closure.
1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * See Jitterbug 3432 RFE: Move uniset.cpp data to a data file
1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *
1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Note: The old and new implementation never fully matched because
1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * the old implementation turned out to not map U+0130 and U+0131 correctly
1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * (dotted I and dotless i) and because the old implementation's data tables
1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * were outdated compared to Unicode 4.0.1 at the time of the change to the
1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * new implementation. (So sigmas and some other characters were not handled
1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * according to the newer Unicode version.)
1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet sens("[:case_sensitive:]", ec), sens2, s2;
1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSetIterator si(sens);
1261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString str, buf2;
1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeString *pStr;
1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 c;
1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while(si.next()) {
1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(!si.isString()) {
1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=si.getCodepoint();
1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s.clear();
1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s.add(c);
1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            str.setTo(c);
1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            str.foldCase();
1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sens2.add(str);
1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            t=s;
1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s.closeOver(USET_CASE);
1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            t.closeOver(0x100);
1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(s!=t) {
1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln("FAIL: closeOver(U+%04x) differs: ", c);
1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln((UnicodeString)"old "+s.toPattern(buf, TRUE)+" new: "+t.toPattern(buf2, TRUE));
1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // remove all code points
1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // should contain all full case folding mapping strings
1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sens2.remove(0, 0x10ffff);
1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    si.reset(sens2);
1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while(si.next()) {
1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(si.isString()) {
1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pStr=&si.getString();
1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s.clear();
1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s.add(*pStr);
1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            t=s2=s;
1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            s.closeOver(USET_CASE);
1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            t.closeOver(0x100);
1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(s!=t) {
1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln((UnicodeString)"FAIL: closeOver("+s2.toPattern(buf, TRUE)+") differs: ");
1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln((UnicodeString)"old "+s.toPattern(buf, TRUE)+" new: "+t.toPattern(buf2, TRUE));
1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Test the pattern API
1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s.applyPattern("[abc]", USET_CASE_INSENSITIVE, NULL, ec);
1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: applyPattern failed");
1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectContainment(s, "abcABC", "defDEF");
1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet v("[^abc]", USET_CASE_INSENSITIVE, NULL, ec);
1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: constructor failed");
1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectContainment(v, "defDEF", "abcABC");
1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet cm("[abck]", USET_ADD_CASE_MAPPINGS, NULL, ec);
1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: construct w/case mappings failed");
1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectContainment(cm, "abckABCK", CharsToUnicodeString("defDEF\\u212A"));
1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestEscapePattern() {
1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char pattern[] =
1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\uFEFF \\u200A-\\u200E \\U0001D173-\\U0001D17A \\U000F0000-\\U000FFFFD ]";
1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char exp[] =
1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u200A-\\u200E\\uFEFF\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]";
1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // We test this with two passes; in the second pass we
1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // pre-unescape the pattern.  Since U+200E is rule whitespace,
1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // this fails -- which is what we expect.
1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t pass=1; pass<=2; ++pass) {
1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode ec = U_ZERO_ERROR;
1334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString pat(pattern, -1, US_INV);
1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (pass==2) {
1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pat = pat.unescape();
1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Pattern is only good for pass 1
1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool isPatternValid = (pass==1);
1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet set(pat, ec);
1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_SUCCESS(ec) != isPatternValid){
1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL: applyPattern(" +
1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  escape(pat) + ") => " +
1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  u_errorName(ec));
1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (set.contains((UChar)0x0644)){
1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL: " + escape(pat) + " contains(U+0664)");
1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString newpat;
1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.toPattern(newpat, TRUE);
1357c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (newpat == UnicodeString(exp, -1, US_INV)) {
1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            logln(escape(pat) + " => " + newpat);
1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL: " + escape(pat) + " => " + newpat);
1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (int32_t i=0; i<set.getRangeCount(); ++i) {
1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString str("Range ");
1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            str.append((UChar)(0x30 + i))
1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                .append(": ")
1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                .append((UChar32)set.getRangeStart(i))
1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                .append(" - ")
1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                .append((UChar32)set.getRangeEnd(i));
1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            str = str + " (" + set.getRangeStart(i) + " - " +
1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                set.getRangeEnd(i) + ")";
1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (set.getRangeStart(i) < 0) {
1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln((UnicodeString)"FAIL: " + escape(str));
1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                logln(escape(str));
1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::expectRange(const UnicodeString& label,
1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 const UnicodeSet& set,
1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 UChar32 start, UChar32 end) {
1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet exp(start, end);
1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat;
1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (set == exp) {
1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln(label + " => " + set.toPattern(pat, TRUE));
1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString xpat;
1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: " + label + " => " +
1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              set.toPattern(pat, TRUE) +
1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              ", expected " + exp.toPattern(xpat, TRUE));
1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestInvalidCodePoint() {
1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar32 DATA[] = {
1399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Test range             Expected range
1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0, 0x10FFFF,              0, 0x10FFFF,
1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (UChar32)-1, 8,           0, 8,
1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        8, 0x110000,              8, 0x10FFFF
1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const int32_t DATA_LENGTH = sizeof(DATA)/sizeof(DATA[0]);
1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat;
1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<DATA_LENGTH; i+=4) {
1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 start  = DATA[i];
1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 end    = DATA[i+1];
1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 xstart = DATA[i+2];
1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 xend   = DATA[i+3];
1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Try various API using the test code points
1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet set(start, end);
1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectRange((UnicodeString)"ct(" + start + "," + end + ")",
1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    set, xstart, xend);
1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.clear();
1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.set(start, end);
1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectRange((UnicodeString)"set(" + start + "," + end + ")",
1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    set, xstart, xend);
1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool b = set.contains(start);
1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b = set.contains(start, end);
1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b = set.containsNone(start, end);
1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b = set.containsSome(start, end);
1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*int32_t index = set.indexOf(start);*/
1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.clear();
1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.add(start);
1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.add(start, end);
1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectRange((UnicodeString)"add(" + start + "," + end + ")",
1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    set, xstart, xend);
1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.set(0, 0x10FFFF);
1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.retain(start, end);
1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectRange((UnicodeString)"retain(" + start + "," + end + ")",
1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    set, xstart, xend);
1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.retain(start);
1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.set(0, 0x10FFFF);
1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.remove(start);
1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.remove(start, end);
1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.complement();
1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectRange((UnicodeString)"!remove(" + start + "," + end + ")",
1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    set, xstart, xend);
1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.set(0, 0x10FFFF);
1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.complement(start, end);
1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.complement();
1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectRange((UnicodeString)"!complement(" + start + "," + end + ")",
1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    set, xstart, xend);
1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.complement(start);
1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar32 DATA2[] = {
1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0,
1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0x10FFFF,
1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        (UChar32)-1,
1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0x110000
1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const int32_t DATA2_LENGTH = sizeof(DATA2)/sizeof(DATA2[0]);
1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<DATA2_LENGTH; ++i) {
1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c = DATA2[i], end = 0x10FFFF;
1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool valid = (c >= 0 && c <= 0x10FFFF);
1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet set(0, 0x10FFFF);
1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // For single-codepoint contains, invalid codepoints are NOT contained
1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool b = set.contains(c);
1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (b == valid) {
1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            logln((UnicodeString)"[\\u0000-\\U0010FFFF].contains(" + c +
1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ") = " + b);
1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL: [\\u0000-\\U0010FFFF].contains(" + c +
1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ") = " + b);
1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // For codepoint range contains, containsNone, and containsSome,
1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // invalid or empty (start > end) ranges have UNDEFINED behavior.
1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b = set.contains(c, end);
1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"* [\\u0000-\\U0010FFFF].contains(" + c +
1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              "," + end + ") = " + b);
1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b = set.containsNone(c, end);
1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"* [\\u0000-\\U0010FFFF].containsNone(" + c +
1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              "," + end + ") = " + b);
1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b = set.containsSome(c, end);
1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"* [\\u0000-\\U0010FFFF].containsSome(" + c +
1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              "," + end + ") = " + b);
1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t index = set.indexOf(c);
1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if ((index >= 0) == valid) {
1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            logln((UnicodeString)"[\\u0000-\\U0010FFFF].indexOf(" + c +
1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ") = " + index);
1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL: [\\u0000-\\U0010FFFF].indexOf(" + c +
1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ") = " + index);
1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Used by TestSymbolTable
1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass TokenSymbolTable : public SymbolTable {
1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic:
1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Hashtable contents;
1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    TokenSymbolTable(UErrorCode& ec) : contents(FALSE, ec) {
1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        contents.setValueDeleter(uhash_deleteUnicodeString);
1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ~TokenSymbolTable() {}
1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * (Non-SymbolTable API) Add the given variable and value to
1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * the table.  Variable should NOT contain leading '$'.
1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    void add(const UnicodeString& var, const UnicodeString& value,
1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             UErrorCode& ec) {
1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_SUCCESS(ec)) {
1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            contents.put(var, new UnicodeString(value), ec);
1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * SymbolTable API
1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual const UnicodeString* lookup(const UnicodeString& s) const {
1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (const UnicodeString*) contents.get(s);
1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * SymbolTable API
1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual const UnicodeFunctor* lookupMatcher(UChar32 /*ch*/) const {
1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * SymbolTable API
1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    virtual UnicodeString parseReference(const UnicodeString& text,
1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                         ParsePosition& pos, int32_t limit) const {
1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t start = pos.getIndex();
1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t i = start;
1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString result;
1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while (i < limit) {
1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar c = text.charAt(i);
1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++i;
1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (i == start) { // No valid name chars
1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return result; // Indicate failure with empty string
1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pos.setIndex(i);
1564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        text.extractBetween(start, i, result);
1565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return result;
1566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
1568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestSymbolTable() {
1570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Multiple test cases can be set up here.  Each test case
1571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // is terminated by null:
1572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // var, value, var, value,..., input pat., exp. output pat., null
1573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char* DATA[] = {
1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "us", "a-z", "[0-1$us]", "[0-1a-z]", NULL,
1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "us", "[a-z]", "[0-1$us]", "[0-1[a-z]]", NULL,
1576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "us", "\\[a\\-z\\]", "[0-1$us]", "[-01\\[\\]az]", NULL,
1577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        NULL
1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
1579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; DATA[i]!=NULL; ++i) {
1581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode ec = U_ZERO_ERROR;
1582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        TokenSymbolTable sym(ec);
1583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
1584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("FAIL: couldn't construct TokenSymbolTable");
1585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
1586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Set up variables
1589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while (DATA[i+2] != NULL) {
1590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            sym.add(UnicodeString(DATA[i], -1, US_INV), UnicodeString(DATA[i+1], -1, US_INV), ec);
1591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) {
1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln("FAIL: couldn't add to TokenSymbolTable");
1593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
1594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            i += 2;
1596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Input pattern and expected output pattern
1599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeString inpat = UnicodeString(DATA[i], -1, US_INV), exppat = UnicodeString(DATA[i+1], -1, US_INV);
1600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        i += 2;
1601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ParsePosition pos(0);
1603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet us(inpat, pos, USET_IGNORE_SPACE, &sym, ec);
1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
1605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("FAIL: couldn't construct UnicodeSet");
1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // results
1610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (pos.getIndex() != inpat.length()) {
1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"Failed to read to end of string \""
1612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  + inpat + "\": read to "
1613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  + pos.getIndex() + ", length is "
1614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  + inpat.length());
1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet us2(exppat, ec);
1618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("FAIL: couldn't construct expected UnicodeSet");
1620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
1621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString a, b;
1624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (us != us2) {
1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"Failed, got " + us.toPattern(a, TRUE) +
1626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ", expected " + us2.toPattern(b, TRUE));
1627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            logln((UnicodeString)"Ok, got " + us.toPattern(a, TRUE));
1629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestSurrogate() {
1634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char* DATA[] = {
1635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // These should all behave identically
1636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[abc\\uD800\\uDC00]",
1637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // "[abc\uD800\uDC00]", // Can't do this on C -- only Java
1638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[abc\\U00010000]",
1639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0
1640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
1641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int i=0; DATA[i] != 0; ++i) {
1642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode ec = U_ZERO_ERROR;
1643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        logln((UnicodeString)"Test pattern " + i + " :" + UnicodeString(DATA[i], -1, US_INV));
1644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UnicodeSet set(UnicodeString(DATA[i], -1, US_INV), ec);
1645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
1646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln("FAIL: UnicodeSet constructor");
1647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
1648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expectContainment(set,
1650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          CharsToUnicodeString("abc\\U00010000"),
1651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          CharsToUnicodeString("\\uD800;\\uDC00")); // split apart surrogate-pair
1652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (set.size() != 4) {
1653c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            errln((UnicodeString)"FAIL: " + UnicodeString(DATA[i], -1, US_INV) + ".size() == " +
1654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  set.size() + ", expected 4");
1655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestExhaustive() {
1660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // exhaustive tests. Simulate UnicodeSets with integers.
1661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // That gives us very solid tests (except for large memory tests).
1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t limit = 128;
1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet x, y, z, aa;
1666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i < limit; ++i) {
1668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bitsToSet(i, x);
1669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"Testing " + i + ", " + x);
1670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        _testComplement(i, x, y);
1671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // AS LONG AS WE ARE HERE, check roundtrip
1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        checkRoundTrip(bitsToSet(i, aa));
1674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (int32_t j = 0; j < limit; ++j) {
1676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _testAdd(i,j,  x,y,z);
1677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _testXor(i,j,  x,y,z);
1678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _testRetain(i,j,  x,y,z);
1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _testRemove(i,j,  x,y,z);
1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testComplement(int32_t a, UnicodeSet& x, UnicodeSet& z) {
1685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(a, x);
1686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z = x;
1687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z.complement();
1688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c = setToBits(z);
1689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c != (~a)) {
1690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: add: ~" + x +  " != " + z);
1691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: add: ~" + a + " != " + c);
1692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkCanonicalRep(z, (UnicodeString)"complement " + a);
1694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testAdd(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) {
1697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(a, x);
1698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(b, y);
1699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z = x;
1700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z.addAll(y);
1701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c = setToBits(z);
1702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c != (a | b)) {
1703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: add: " + x + " | " + y + " != " + z);
1704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: add: " + a + " | " + b + " != " + c);
1705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkCanonicalRep(z, (UnicodeString)"add " + a + "," + b);
1707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testRetain(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) {
1710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(a, x);
1711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(b, y);
1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z = x;
1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z.retainAll(y);
1714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c = setToBits(z);
1715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c != (a & b)) {
1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: retain: " + x + " & " + y + " != " + z);
1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: retain: " + a + " & " + b + " != " + c);
1718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkCanonicalRep(z, (UnicodeString)"retain " + a + "," + b);
1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testRemove(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) {
1723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(a, x);
1724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(b, y);
1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z = x;
1726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z.removeAll(y);
1727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c = setToBits(z);
1728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c != (a &~ b)) {
1729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: remove: " + x + " &~ " + y + " != " + z);
1730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: remove: " + a + " &~ " + b + " != " + c);
1731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkCanonicalRep(z, (UnicodeString)"remove " + a + "," + b);
1733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::_testXor(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) {
1736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(a, x);
1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bitsToSet(b, y);
1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z = x;
1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    z.complementAll(y);
1740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c = setToBits(z);
1741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c != (a ^ b)) {
1742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: complement: " + x + " ^ " + y + " != " + z);
1743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAILED: complement: " + a + " ^ " + b + " != " + c);
1744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkCanonicalRep(z, (UnicodeString)"complement " + a + "," + b);
1746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Check that ranges are monotonically increasing and non-
1750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * overlapping.
1751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::checkCanonicalRep(const UnicodeSet& set, const UnicodeString& msg) {
1753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t n = set.getRangeCount();
1754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (n < 0) {
1755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL result of " + msg +
1756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              ": range count should be >= 0 but is " +
1757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              n /*+ " for " + set.toPattern())*/);
1758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 last = 0;
1761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; i<n; ++i) {
1762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 start = set.getRangeStart(i);
1763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 end = set.getRangeEnd(i);
1764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (start > end) {
1765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL result of " + msg +
1766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ": range " + (i+1) +
1767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  " start > end: " + (int)start + ", " + (int)end +
1768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  " for " + set);
1769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (i > 0 && start <= last) {
1771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL result of " + msg +
1772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ": range " + (i+1) +
1773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  " overlaps previous range: " + (int)start + ", " + (int)end +
1774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  " for " + set);
1775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        last = end;
1777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Convert a bitmask to a UnicodeSet.
1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSetTest::bitsToSet(int32_t a, UnicodeSet& result) {
1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result.clear();
1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (UChar32 i = 0; i < 32; ++i) {
1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if ((a & (1<<i)) != 0) {
1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result.add(i);
1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Convert a UnicodeSet to a bitmask.  Only the characters
1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * U+0000 to U+0020 are represented in the bitmask.
1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSetTest::setToBits(const UnicodeSet& x) {
1798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t result = 0;
1799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i < 32; ++i) {
1800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (x.contains((UChar32)i)) {
1801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result |= (1<<i);
1802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
1805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return the representation of an inversion list based UnicodeSet
1809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * as a pairs list.  Ranges are listed in ascending Unicode order.
1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For example, the set [a-zA-M3] is represented as "33AMaz".
1811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString UnicodeSetTest::getPairs(const UnicodeSet& set) {
1813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pairs;
1814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; i<set.getRangeCount(); ++i) {
1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 start = set.getRangeStart(i);
1816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 end = set.getRangeEnd(i);
1817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (end > 0xFFFF) {
1818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            end = 0xFFFF;
1819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            i = set.getRangeCount(); // Should be unnecessary
1820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pairs.append((UChar)start).append((UChar)end);
1822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return pairs;
1824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Basic consistency check for a few items.
1828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * That the iterator works, and that we can create a pattern and
1829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * get the same thing back
1830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::checkRoundTrip(const UnicodeSet& s) {
1832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
1833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet t(s);
1835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkEqual(s, t, "copy ct");
1836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    t = s;
1838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkEqual(s, t, "operator=");
1839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    copyWithIterator(t, s, FALSE);
1841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkEqual(s, t, "iterator roundtrip");
1842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    copyWithIterator(t, s, TRUE); // try range
1844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    checkEqual(s, t, "iterator roundtrip");
1845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat; s.toPattern(pat, FALSE);
1847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    t.applyPattern(pat, ec);
1848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
1849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: applyPattern");
1850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        checkEqual(s, t, "toPattern(false)");
1853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s.toPattern(pat, TRUE);
1856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    t.applyPattern(pat, ec);
1857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
1858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: applyPattern");
1859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        checkEqual(s, t, "toPattern(true)");
1862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::copyWithIterator(UnicodeSet& t, const UnicodeSet& s, UBool withRange) {
1866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    t.clear();
1867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSetIterator it(s);
1868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (withRange) {
1869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while (it.nextRange()) {
1870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (it.isString()) {
1871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                t.add(it.getString());
1872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                t.add(it.getCodepoint(), it.getCodepointEnd());
1874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while (it.next()) {
1878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (it.isString()) {
1879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                t.add(it.getString());
1880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                t.add(it.getCodepoint());
1882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSetTest::checkEqual(const UnicodeSet& s, const UnicodeSet& t, const char* message) {
1888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString source; s.toPattern(source, TRUE);
1889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString result; t.toPattern(result, TRUE);
1890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (s != t) {
1891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: " + message
1892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              + "; source = " + source
1893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              + "; result = " + result
1894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              );
1895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
1896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"Ok: " + message
1898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              + "; source = " + source
1899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              + "; result = " + result
1900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              );
1901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
1903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
1906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectContainment(const UnicodeString& pat,
1907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                  const UnicodeString& charsIn,
1908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                  const UnicodeString& charsOut) {
1909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
1910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set(pat, ec);
1911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
1912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: pattern \"" +
1913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              pat + "\" => " + u_errorName(ec));
1914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectContainment(set, pat, charsIn, charsOut);
1917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
1920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectContainment(const UnicodeSet& set,
1921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                  const UnicodeString& charsIn,
1922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                  const UnicodeString& charsOut) {
1923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat;
1924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.toPattern(pat);
1925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectContainment(set, pat, charsIn, charsOut);
1926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
1929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectContainment(const UnicodeSet& set,
1930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                  const UnicodeString& setName,
1931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                  const UnicodeString& charsIn,
1932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                  const UnicodeString& charsOut) {
1933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString bad;
1934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 c;
1935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
1936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<charsIn.length(); i+=U16_LENGTH(c)) {
1938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c = charsIn.char32At(i);
1939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!set.contains(c)) {
1940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            bad.append(c);
1941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (bad.length() > 0) {
1944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"Fail: set " + setName + " does not contain " + prettify(bad) +
1945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              ", expected containment of " + prettify(charsIn));
1946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"Ok: set " + setName + " contains " + prettify(charsIn));
1948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bad.truncate(0);
1951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<charsOut.length(); i+=U16_LENGTH(c)) {
1952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c = charsOut.char32At(i);
1953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (set.contains(c)) {
1954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            bad.append(c);
1955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (bad.length() > 0) {
1958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"Fail: set " + setName + " contains " + prettify(bad) +
1959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              ", expected non-containment of " + prettify(charsOut));
1960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"Ok: set " + setName + " does not contain " + prettify(charsOut));
1962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
1966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectPattern(UnicodeSet& set,
1967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const UnicodeString& pattern,
1968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const UnicodeString& expectedPairs){
1969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
1970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern(pattern, status);
1971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
1972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln(UnicodeString("FAIL: applyPattern(\"") + pattern +
1973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              "\") failed");
1974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (getPairs(set) != expectedPairs ) {
1977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln(UnicodeString("FAIL: applyPattern(\"") + pattern +
1978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  "\") => pairs \"" +
1979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  escape(getPairs(set)) + "\", expected \"" +
1980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  escape(expectedPairs) + "\"");
1981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            logln(UnicodeString("Ok:   applyPattern(\"") + pattern +
1983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  "\") => pairs \"" +
1984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  escape(getPairs(set)) + "\"");
1985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // the result of calling set.toPattern(), which is the string representation of
1988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // this set(set), is passed to a  UnicodeSet constructor, and tested that it
1989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // will produce another set that is equal to this one.
1990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString temppattern;
1991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.toPattern(temppattern);
1992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *tempset=new UnicodeSet(temppattern, status);
1993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
1994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln(UnicodeString("FAIL: applyPattern(\""+ pattern + "\").toPattern() => " + temppattern + " => invalid pattern"));
1995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(*tempset != set || getPairs(*tempset) != getPairs(set)){
1998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln(UnicodeString("FAIL: applyPattern(\""+ pattern + "\").toPattern() => " + temppattern + " => pairs \""+ escape(getPairs(*tempset)) + "\", expected pairs \"" +
1999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            escape(getPairs(set)) + "\""));
2000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else{
2001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln(UnicodeString("Ok:   applyPattern(\""+ pattern + "\").toPattern() => " + temppattern + " => pairs \"" + escape(getPairs(*tempset)) + "\""));
2002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete tempset;
2005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
2009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::expectPairs(const UnicodeSet& set, const UnicodeString& expectedPairs) {
2010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (getPairs(set) != expectedPairs) {
2011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln(UnicodeString("FAIL: Expected pair list \"") +
2012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              escape(expectedPairs) + "\", got \"" +
2013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              escape(getPairs(set)) + "\"");
2014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::expectToPattern(const UnicodeSet& set,
2018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     const UnicodeString& expPat,
2019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     const char** expStrings) {
2020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pat;
2021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.toPattern(pat, TRUE);
2022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pat == expPat) {
2023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        logln((UnicodeString)"Ok:   toPattern() => \"" + pat + "\"");
2024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
2025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln((UnicodeString)"FAIL: toPattern() => \"" + pat + "\", expected \"" + expPat + "\"");
2026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
2027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (expStrings == NULL) {
2029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
2030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool in = TRUE;
2032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; expStrings[i] != NULL; ++i) {
2033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (expStrings[i] == NOT) { // sic; pointer comparison
2034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            in = FALSE;
2035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
2036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString s = CharsToUnicodeString(expStrings[i]);
2038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool contained = set.contains(s);
2039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (contained == in) {
2040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            logln((UnicodeString)"Ok: " + expPat +
2041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  (contained ? " contains {" : " does not contain {") +
2042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  escape(expStrings[i]) + "}");
2043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errln((UnicodeString)"FAIL: " + expPat +
2045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  (contained ? " contains {" : " does not contain {") +
2046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  escape(expStrings[i]) + "}");
2047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar toHexString(int32_t i) { return (UChar)(i + (i < 10 ? 0x30 : (0x41 - 10))); }
2052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
2054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::doAssert(UBool condition, const char *message)
2055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
2056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!condition) {
2057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln(UnicodeString("ERROR : ") + message);
2058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString
2062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSetTest::escape(const UnicodeString& s) {
2063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString buf;
2064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; i<s.length(); )
2065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {
2066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c = s.char32At(i);
2067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (0x0020 <= c && c <= 0x007F) {
2068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf += c;
2069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (c <= 0xFFFF) {
2071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf += (UChar)0x5c; buf += (UChar)0x75;
2072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
2073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf += (UChar)0x5c; buf += (UChar)0x55;
2074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf += toHexString((c & 0xF0000000) >> 28);
2075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf += toHexString((c & 0x0F000000) >> 24);
2076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf += toHexString((c & 0x00F00000) >> 20);
2077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf += toHexString((c & 0x000F0000) >> 16);
2078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf += toHexString((c & 0xF000) >> 12);
2080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf += toHexString((c & 0x0F00) >> 8);
2081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf += toHexString((c & 0x00F0) >> 4);
2082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf += toHexString(c & 0x000F);
2083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        i += U16_LENGTH(c);
2085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return buf;
2087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestFreezable() {
2090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode errorCode=U_ZERO_ERROR;
2091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString idPattern=UNICODE_STRING("[:ID_Continue:]", 15);
2092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet idSet(idPattern, errorCode);
2093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
2094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: unable to create UnicodeSet([:ID_Continue:]) - %s", u_errorName(errorCode));
2095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
2096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString wsPattern=UNICODE_STRING("[:White_Space:]", 15);
2099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet wsSet(wsPattern, errorCode);
2100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
2101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: unable to create UnicodeSet([:White_Space:]) - %s", u_errorName(errorCode));
2102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
2103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    idSet.add(idPattern);
2106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet frozen(idSet);
2107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.freeze();
2108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(idSet.isFrozen() || !frozen.isFrozen()) {
2110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: isFrozen() is wrong");
2111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: a copy-constructed frozen set differs from its original");
2114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen=wsSet;
2117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: a frozen set was modified by operator=");
2119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet frozen2(frozen);
2122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen2!=frozen || frozen2!=idSet) {
2123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: a copied frozen set differs from its frozen original");
2124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!frozen2.isFrozen()) {
2126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: copy-constructing a frozen set results in a thawed one");
2127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet frozen3(5, 55);  // Set to some values to really test assignment below, not copy construction.
2129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen3.contains(0, 4) || !frozen3.contains(5, 55) || frozen3.contains(56, 0x10ffff)) {
2130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet(5, 55) failed");
2131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen3=frozen;
2133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!frozen3.isFrozen()) {
2134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: copying a frozen set results in a thawed one");
2135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *cloned=(UnicodeSet *)frozen.clone();
2138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!cloned->isFrozen() || *cloned!=frozen || cloned->containsSome(0xd802, 0xd805)) {
2139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: clone() failed");
2140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    cloned->add(0xd802, 0xd805);
2142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(cloned->containsSome(0xd802, 0xd805)) {
2143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: unable to modify clone");
2144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete cloned;
2146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *thawed=(UnicodeSet *)frozen.cloneAsThawed();
2148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(thawed->isFrozen() || *thawed!=frozen || thawed->containsSome(0xd802, 0xd805)) {
2149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: cloneAsThawed() failed");
2150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    thawed->add(0xd802, 0xd805);
2152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!thawed->contains(0xd802, 0xd805)) {
2153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: unable to modify thawed clone");
2154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete thawed;
2156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.set(5, 55);
2158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::set() modified a frozen set");
2160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.clear();
2163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::clear() modified a frozen set");
2165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.closeOver(USET_CASE_INSENSITIVE);
2168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::closeOver() modified a frozen set");
2170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.compact();
2173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::compact() modified a frozen set");
2175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ParsePosition pos;
2178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.
2179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        applyPattern(wsPattern, errorCode).
2180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        applyPattern(wsPattern, USET_IGNORE_SPACE, NULL, errorCode).
2181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        applyPattern(wsPattern, pos, USET_IGNORE_SPACE, NULL, errorCode).
2182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        applyIntPropertyValue(UCHAR_CANONICAL_COMBINING_CLASS, 230, errorCode).
2183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        applyPropertyAlias(UNICODE_STRING_SIMPLE("Assigned"), UnicodeString(), errorCode);
2184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::applyXYZ() modified a frozen set");
2186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.
2189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        add(0xd800).
2190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        add(0xd802, 0xd805).
2191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        add(wsPattern).
2192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        addAll(idPattern).
2193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        addAll(wsSet);
2194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::addXYZ() modified a frozen set");
2196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.
2199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retain(0x62).
2200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retain(0x64, 0x69).
2201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retainAll(wsPattern).
2202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retainAll(wsSet);
2203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::retainXYZ() modified a frozen set");
2205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.
2208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        remove(0x62).
2209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        remove(0x64, 0x69).
2210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        remove(idPattern).
2211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        removeAll(idPattern).
2212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        removeAll(idSet);
2213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::removeXYZ() modified a frozen set");
2215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    frozen.
2218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complement().
2219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complement(0x62).
2220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complement(0x64, 0x69).
2221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complement(idPattern).
2222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complementAll(idPattern).
2223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complementAll(idSet);
2224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(frozen!=idSet || !(frozen==idSet)) {
2225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet::complementXYZ() modified a frozen set");
2226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test span() etc. -------------------------------------------------------- ***
2230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Append the UTF-8 version of the string to t and return the appended UTF-8 length.
2232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
2233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruappendUTF8(const UChar *s, int32_t length, char *t, int32_t capacity) {
2234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode errorCode=U_ZERO_ERROR;
2235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t length8=0;
2236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    u_strToUTF8(t, capacity, &length8, s, length, &errorCode);
2237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_SUCCESS(errorCode)) {
2238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return length8;
2239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
2240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // The string contains an unpaired surrogate.
2241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Ignore this string.
2242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
2243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass UnicodeSetWithStringsIterator;
2247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Make the strings in a UnicodeSet easily accessible.
2249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass UnicodeSetWithStrings {
2250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic:
2251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSetWithStrings(const UnicodeSet &normalSet) :
2252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            set(normalSet), stringsLength(0), hasSurrogates(FALSE) {
2253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t size=set.size();
2254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(size>0 && set.charAt(size-1)<0) {
2255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // If a set's last element is not a code point, then it must contain strings.
2256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Iterate over the set, skip all code point ranges, and cache the strings.
2257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Convert them to UTF-8 for spanUTF8().
2258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeSetIterator iter(set);
2259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const UnicodeString *s;
2260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            char *s8=utf8;
2261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t length8, utf8Count=0;
2262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while(iter.nextRange() && stringsLength<LENGTHOF(strings)) {
2263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(iter.isString()) {
2264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Store the pointer to the set's string element
2265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // which we happen to know is a stable pointer.
2266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    strings[stringsLength]=s=&iter.getString();
2267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    utf8Count+=
2268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        utf8Lengths[stringsLength]=length8=
2269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        appendUTF8(s->getBuffer(), s->length(),
2270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                   s8, (int32_t)(sizeof(utf8)-utf8Count));
2271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(length8==0) {
2272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        hasSurrogates=TRUE;  // Contains unpaired surrogates.
2273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    s8+=length8;
2275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++stringsLength;
2276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSet &getSet() const {
2282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return set;
2283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool hasStrings() const {
2286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (UBool)(stringsLength>0);
2287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool hasStringsWithSurrogates() const {
2290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return hasSurrogates;
2291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate:
2294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    friend class UnicodeSetWithStringsIterator;
2295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSet &set;
2297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeString *strings[20];
2299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t stringsLength;
2300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool hasSurrogates;
2301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char utf8[1024];
2303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t utf8Lengths[20];
2304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t nextStringIndex;
2306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t nextUTF8Start;
2307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
2308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass UnicodeSetWithStringsIterator {
2310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic:
2311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSetWithStringsIterator(const UnicodeSetWithStrings &set) :
2312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fSet(set), nextStringIndex(0), nextUTF8Start(0) {
2313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    void reset() {
2316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        nextStringIndex=nextUTF8Start=0;
2317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeString *nextString() {
2320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(nextStringIndex<fSet.stringsLength) {
2321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return fSet.strings[nextStringIndex++];
2322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return NULL;
2324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Do not mix with calls to nextString().
2328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *nextUTF8(int32_t &length) {
2329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(nextStringIndex<fSet.stringsLength) {
2330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char *s8=fSet.utf8+nextUTF8Start;
2331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            nextUTF8Start+=length=fSet.utf8Lengths[nextStringIndex++];
2332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return s8;
2333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            length=0;
2335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return NULL;
2336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate:
2340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSetWithStrings &fSet;
2341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t nextStringIndex;
2342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t nextUTF8Start;
2343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
2344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Compare 16-bit Unicode strings (which may be malformed UTF-16)
2346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// at code point boundaries.
2347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// That is, each edge of a match must not be in the middle of a surrogate pair.
2348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UBool
2349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querumatches16CPB(const UChar *s, int32_t start, int32_t limit, const UnicodeString &t) {
2350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s+=start;
2351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    limit-=start;
2352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t length=t.length();
2353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return 0==t.compare(s, length) &&
2354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru           !(0<start && U16_IS_LEAD(s[-1]) && U16_IS_TRAIL(s[0])) &&
2355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru           !(length<limit && U16_IS_LEAD(s[length-1]) && U16_IS_TRAIL(s[length]));
2356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Implement span() with contains() for comparison.
2359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t containsSpanUTF16(const UnicodeSetWithStrings &set, const UChar *s, int32_t length,
2360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 USetSpanCondition spanCondition) {
2361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSet &realSet(set.getSet());
2362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!set.hasStrings()) {
2363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t start=0, prev;
2369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((prev=start)<length) {
2370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U16_NEXT(s, start, length, c);
2371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(realSet.contains(c)!=spanCondition) {
2372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return prev;
2376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(spanCondition==USET_SPAN_NOT_CONTAINED) {
2377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetWithStringsIterator iter(set);
2378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t start, next;
2380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(start=next=0; start<length;) {
2381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U16_NEXT(s, next, length, c);
2382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(realSet.contains(c)) {
2383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const UnicodeString *str;
2386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter.reset();
2387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((str=iter.nextString())!=NULL) {
2388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(str->length()<=(length-start) && matches16CPB(s, start, length, *str)) {
2389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // spanNeedsStrings=TRUE;
2390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return start;
2391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            start=next;
2394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return start;
2396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ {
2397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetWithStringsIterator iter(set);
2398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t start, next, maxSpanLimit=0;
2400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(start=next=0; start<length;) {
2401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U16_NEXT(s, next, length, c);
2402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(!realSet.contains(c)) {
2403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                next=start;  // Do not span this single, not-contained code point.
2404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const UnicodeString *str;
2406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter.reset();
2407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((str=iter.nextString())!=NULL) {
2408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(str->length()<=(length-start) && matches16CPB(s, start, length, *str)) {
2409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // spanNeedsStrings=TRUE;
2410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int32_t matchLimit=start+str->length();
2411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(matchLimit==length) {
2412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return length;
2413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(spanCondition==USET_SPAN_CONTAINED) {
2415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Iterate for the shortest match at each position.
2416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Recurse for each but the shortest match.
2417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(next==start) {
2418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            next=matchLimit;  // First match from start.
2419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        } else {
2420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            if(matchLimit<next) {
2421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                // Remember shortest match from start for iteration.
2422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                int32_t temp=next;
2423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                next=matchLimit;
2424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                matchLimit=temp;
2425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
2426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // Recurse for non-shortest match from start.
2427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            int32_t spanLength=containsSpanUTF16(set, s+matchLimit, length-matchLimit,
2428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                                 USET_SPAN_CONTAINED);
2429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            if((matchLimit+spanLength)>maxSpanLimit) {
2430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                maxSpanLimit=matchLimit+spanLength;
2431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                if(maxSpanLimit==length) {
2432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    return length;
2433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                }
2434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
2435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
2436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else /* spanCondition==USET_SPAN_SIMPLE */ {
2437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(matchLimit>next) {
2438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // Remember longest match from start.
2439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            next=matchLimit;
2440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
2441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(next==start) {
2445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;  // No match from start.
2446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            start=next;
2448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(start>maxSpanLimit) {
2450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return start;
2451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return maxSpanLimit;
2453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t containsSpanBackUTF16(const UnicodeSetWithStrings &set, const UChar *s, int32_t length,
2458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     USetSpanCondition spanCondition) {
2459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length==0) {
2460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
2461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSet &realSet(set.getSet());
2463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!set.hasStrings()) {
2464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t prev=length;
2470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        do {
2471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U16_PREV(s, 0, length, c);
2472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(realSet.contains(c)!=spanCondition) {
2473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } while((prev=length)>0);
2476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return prev;
2477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(spanCondition==USET_SPAN_NOT_CONTAINED) {
2478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetWithStringsIterator iter(set);
2479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t prev=length, length0=length;
2481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        do {
2482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U16_PREV(s, 0, length, c);
2483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(realSet.contains(c)) {
2484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const UnicodeString *str;
2487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter.reset();
2488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((str=iter.nextString())!=NULL) {
2489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(str->length()<=prev && matches16CPB(s, prev-str->length(), length0, *str)) {
2490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // spanNeedsStrings=TRUE;
2491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return prev;
2492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } while((prev=length)>0);
2495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return prev;
2496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ {
2497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetWithStringsIterator iter(set);
2498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t prev=length, minSpanStart=length, length0=length;
2500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        do {
2501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U16_PREV(s, 0, length, c);
2502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(!realSet.contains(c)) {
2503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                length=prev;  // Do not span this single, not-contained code point.
2504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const UnicodeString *str;
2506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter.reset();
2507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((str=iter.nextString())!=NULL) {
2508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(str->length()<=prev && matches16CPB(s, prev-str->length(), length0, *str)) {
2509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // spanNeedsStrings=TRUE;
2510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int32_t matchStart=prev-str->length();
2511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(matchStart==0) {
2512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return 0;
2513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(spanCondition==USET_SPAN_CONTAINED) {
2515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Iterate for the shortest match at each position.
2516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Recurse for each but the shortest match.
2517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(length==prev) {
2518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            length=matchStart;  // First match from prev.
2519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        } else {
2520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            if(matchStart>length) {
2521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                // Remember shortest match from prev for iteration.
2522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                int32_t temp=length;
2523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                length=matchStart;
2524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                matchStart=temp;
2525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
2526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // Recurse for non-shortest match from prev.
2527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            int32_t spanStart=containsSpanBackUTF16(set, s, matchStart,
2528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                                    USET_SPAN_CONTAINED);
2529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            if(spanStart<minSpanStart) {
2530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                minSpanStart=spanStart;
2531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                if(minSpanStart==0) {
2532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    return 0;
2533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                }
2534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
2535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
2536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else /* spanCondition==USET_SPAN_SIMPLE */ {
2537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(matchStart<length) {
2538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // Remember longest match from prev.
2539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            length=matchStart;
2540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
2541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(length==prev) {
2545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;  // No match from prev.
2546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } while((prev=length)>0);
2548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(prev<minSpanStart) {
2549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return prev;
2550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return minSpanStart;
2552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t containsSpanUTF8(const UnicodeSetWithStrings &set, const char *s, int32_t length,
2557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                USetSpanCondition spanCondition) {
2558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSet &realSet(set.getSet());
2559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!set.hasStrings()) {
2560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t start=0, prev;
2566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((prev=start)<length) {
2567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U8_NEXT(s, start, length, c);
2568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(c<0) {
2569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                c=0xfffd;
2570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(realSet.contains(c)!=spanCondition) {
2572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return prev;
2576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(spanCondition==USET_SPAN_NOT_CONTAINED) {
2577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetWithStringsIterator iter(set);
2578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t start, next;
2580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(start=next=0; start<length;) {
2581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U8_NEXT(s, next, length, c);
2582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(c<0) {
2583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                c=0xfffd;
2584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(realSet.contains(c)) {
2586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char *s8;
2589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t length8;
2590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter.reset();
2591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((s8=iter.nextUTF8(length8))!=NULL) {
2592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(length8!=0 && length8<=(length-start) && 0==memcmp(s+start, s8, length8)) {
2593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // spanNeedsStrings=TRUE;
2594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return start;
2595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            start=next;
2598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return start;
2600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ {
2601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetWithStringsIterator iter(set);
2602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t start, next, maxSpanLimit=0;
2604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(start=next=0; start<length;) {
2605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U8_NEXT(s, next, length, c);
2606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(c<0) {
2607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                c=0xfffd;
2608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(!realSet.contains(c)) {
2610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                next=start;  // Do not span this single, not-contained code point.
2611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char *s8;
2613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t length8;
2614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter.reset();
2615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((s8=iter.nextUTF8(length8))!=NULL) {
2616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(length8!=0 && length8<=(length-start) && 0==memcmp(s+start, s8, length8)) {
2617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // spanNeedsStrings=TRUE;
2618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int32_t matchLimit=start+length8;
2619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(matchLimit==length) {
2620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return length;
2621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(spanCondition==USET_SPAN_CONTAINED) {
2623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Iterate for the shortest match at each position.
2624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Recurse for each but the shortest match.
2625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(next==start) {
2626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            next=matchLimit;  // First match from start.
2627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        } else {
2628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            if(matchLimit<next) {
2629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                // Remember shortest match from start for iteration.
2630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                int32_t temp=next;
2631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                next=matchLimit;
2632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                matchLimit=temp;
2633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
2634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // Recurse for non-shortest match from start.
2635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            int32_t spanLength=containsSpanUTF8(set, s+matchLimit, length-matchLimit,
2636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                                USET_SPAN_CONTAINED);
2637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            if((matchLimit+spanLength)>maxSpanLimit) {
2638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                maxSpanLimit=matchLimit+spanLength;
2639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                if(maxSpanLimit==length) {
2640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    return length;
2641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                }
2642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
2643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
2644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else /* spanCondition==USET_SPAN_SIMPLE */ {
2645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(matchLimit>next) {
2646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // Remember longest match from start.
2647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            next=matchLimit;
2648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
2649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(next==start) {
2653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;  // No match from start.
2654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            start=next;
2656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(start>maxSpanLimit) {
2658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return start;
2659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return maxSpanLimit;
2661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t containsSpanBackUTF8(const UnicodeSetWithStrings &set, const char *s, int32_t length,
2666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    USetSpanCondition spanCondition) {
2667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length==0) {
2668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
2669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSet &realSet(set.getSet());
2671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!set.hasStrings()) {
2672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t prev=length;
2678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        do {
2679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U8_PREV(s, 0, length, c);
2680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(c<0) {
2681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                c=0xfffd;
2682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(realSet.contains(c)!=spanCondition) {
2684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } while((prev=length)>0);
2687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return prev;
2688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(spanCondition==USET_SPAN_NOT_CONTAINED) {
2689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetWithStringsIterator iter(set);
2690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t prev=length;
2692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        do {
2693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U8_PREV(s, 0, length, c);
2694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(c<0) {
2695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                c=0xfffd;
2696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(realSet.contains(c)) {
2698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char *s8;
2701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t length8;
2702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter.reset();
2703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((s8=iter.nextUTF8(length8))!=NULL) {
2704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(length8!=0 && length8<=prev && 0==memcmp(s+prev-length8, s8, length8)) {
2705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // spanNeedsStrings=TRUE;
2706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return prev;
2707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } while((prev=length)>0);
2710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return prev;
2711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ {
2712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetWithStringsIterator iter(set);
2713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c;
2714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t prev=length, minSpanStart=length;
2715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        do {
2716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U8_PREV(s, 0, length, c);
2717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(c<0) {
2718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                c=0xfffd;
2719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(!realSet.contains(c)) {
2721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                length=prev;  // Do not span this single, not-contained code point.
2722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char *s8;
2724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t length8;
2725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter.reset();
2726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while((s8=iter.nextUTF8(length8))!=NULL) {
2727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(length8!=0 && length8<=prev && 0==memcmp(s+prev-length8, s8, length8)) {
2728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // spanNeedsStrings=TRUE;
2729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int32_t matchStart=prev-length8;
2730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(matchStart==0) {
2731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return 0;
2732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(spanCondition==USET_SPAN_CONTAINED) {
2734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Iterate for the shortest match at each position.
2735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Recurse for each but the shortest match.
2736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(length==prev) {
2737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            length=matchStart;  // First match from prev.
2738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        } else {
2739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            if(matchStart>length) {
2740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                // Remember shortest match from prev for iteration.
2741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                int32_t temp=length;
2742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                length=matchStart;
2743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                matchStart=temp;
2744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
2745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // Recurse for non-shortest match from prev.
2746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            int32_t spanStart=containsSpanBackUTF8(set, s, matchStart,
2747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                                   USET_SPAN_CONTAINED);
2748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            if(spanStart<minSpanStart) {
2749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                minSpanStart=spanStart;
2750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                if(minSpanStart==0) {
2751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    return 0;
2752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                }
2753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
2754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
2755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else /* spanCondition==USET_SPAN_SIMPLE */ {
2756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(matchStart<length) {
2757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // Remember longest match from prev.
2758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            length=matchStart;
2759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
2760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
2761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(length==prev) {
2764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;  // No match from prev.
2765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } while((prev=length)>0);
2767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(prev<minSpanStart) {
2768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return prev;
2769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
2770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return minSpanStart;
2771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// spans to be performed and compared
2776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum {
2777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_UTF16          =1,
2778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_UTF8           =2,
2779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_UTFS           =3,
2780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_SET            =4,
2782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_COMPLEMENT     =8,
2783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_POLARITY       =0xc,
2784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_FWD            =0x10,
2786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_BACK           =0x20,
2787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_DIRS           =0x30,
2788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_CONTAINED      =0x100,
2790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_SIMPLE         =0x200,
2791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_CONDITION      =0x300,
2792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SPAN_ALL            =0x33f
2794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
2795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline USetSpanCondition invertSpanCondition(USetSpanCondition spanCondition, USetSpanCondition contained) {
2797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return spanCondition == USET_SPAN_NOT_CONTAINED ? contained : USET_SPAN_NOT_CONTAINED;
2798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline int32_t slen(const void *s, UBool isUTF16) {
2801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return isUTF16 ? u_strlen((const UChar *)s) : strlen((const char *)s);
2802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Count spans on a string with the method according to type and set the span limits.
2806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set may be the complement of the original.
2807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * When using spanBack() and comparing with span(), use a span condition for the first spanBack()
2808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * according to the expected number of spans.
2809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Sets typeName to an empty string if there is no such type.
2810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns -1 if the span option is filtered out.
2811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
2812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t getSpans(const UnicodeSetWithStrings &set, UBool isComplement,
2813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        const void *s, int32_t length, UBool isUTF16,
2814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        uint32_t whichSpans,
2815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        int type, const char *&typeName,
2816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        int32_t limits[], int32_t limitsCapacity,
2817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        int32_t expectCount) {
2818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSet &realSet(set.getSet());
2819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t start, count;
2820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    USetSpanCondition spanCondition, firstSpanCondition, contained;
2821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool isForward;
2822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(type<0 || 7<type) {
2824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        typeName="";
2825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
2826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char *const typeNames16[]={
2829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "contains", "contains(LM)",
2830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "span", "span(LM)",
2831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "containsBack", "containsBack(LM)",
2832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "spanBack", "spanBack(LM)"
2833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
2834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char *const typeNames8[]={
2836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "containsUTF8", "containsUTF8(LM)",
2837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "spanUTF8", "spanUTF8(LM)",
2838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "containsBackUTF8", "containsBackUTF8(LM)", // not implemented
2839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "spanBackUTF8", "spanBackUTF8(LM)"
2840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
2841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    typeName= isUTF16 ? typeNames16[type] : typeNames8[type];
2843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // filter span options
2845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(type<=3) {
2846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // span forward
2847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if((whichSpans&SPAN_FWD)==0) {
2848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -1;
2849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        isForward=TRUE;
2851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
2852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // span backward
2853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if((whichSpans&SPAN_BACK)==0) {
2854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -1;
2855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        isForward=FALSE;
2857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((type&1)==0) {
2859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // use USET_SPAN_CONTAINED
2860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if((whichSpans&SPAN_CONTAINED)==0) {
2861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -1;
2862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        contained=USET_SPAN_CONTAINED;
2864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
2865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // use USET_SPAN_SIMPLE
2866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if((whichSpans&SPAN_SIMPLE)==0) {
2867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -1;
2868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        contained=USET_SPAN_SIMPLE;
2870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Default first span condition for going forward with an uncomplemented set.
2873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    spanCondition=USET_SPAN_NOT_CONTAINED;
2874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(isComplement) {
2875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        spanCondition=invertSpanCondition(spanCondition, contained);
2876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // First span condition for span(), used to terminate the spanBack() iteration.
2879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    firstSpanCondition=spanCondition;
2880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // spanBack(): Its initial span condition is span()'s last span condition,
2882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // which is the opposite of span()'s first span condition
2883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // if we expect an even number of spans.
2884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // (The loop inverts spanCondition (expectCount-1) times
2885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // before the expectCount'th span() call.)
2886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // If we do not compare forward and backward directions, then we do not have an
2887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // expectCount and just start with firstSpanCondition.
2888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!isForward && (whichSpans&SPAN_FWD)!=0 && (expectCount&1)==0) {
2889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        spanCondition=invertSpanCondition(spanCondition, contained);
2890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    count=0;
2893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch(type) {
2894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0:
2895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 1:
2896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        start=0;
2897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(length<0) {
2898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            length=slen(s, isUTF16);
2899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(;;) {
2901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            start+= isUTF16 ? containsSpanUTF16(set, (const UChar *)s+start, length-start, spanCondition) :
2902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              containsSpanUTF8(set, (const char *)s+start, length-start, spanCondition);
2903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(count<limitsCapacity) {
2904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                limits[count]=start;
2905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++count;
2907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(start>=length) {
2908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            spanCondition=invertSpanCondition(spanCondition, contained);
2911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
2913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 2:
2914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 3:
2915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        start=0;
2916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(;;) {
2917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            start+= isUTF16 ? realSet.span((const UChar *)s+start, length>=0 ? length-start : length, spanCondition) :
2918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              realSet.spanUTF8((const char *)s+start, length>=0 ? length-start : length, spanCondition);
2919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(count<limitsCapacity) {
2920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                limits[count]=start;
2921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++count;
2923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(length>=0 ? start>=length :
2924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                           isUTF16 ? ((const UChar *)s)[start]==0 :
2925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     ((const char *)s)[start]==0
2926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ) {
2927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            spanCondition=invertSpanCondition(spanCondition, contained);
2930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
2932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 4:
2933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 5:
2934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(length<0) {
2935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            length=slen(s, isUTF16);
2936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(;;) {
2938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++count;
2939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(count<=limitsCapacity) {
2940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                limits[limitsCapacity-count]=length;
2941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            length= isUTF16 ? containsSpanBackUTF16(set, (const UChar *)s, length, spanCondition) :
2943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              containsSpanBackUTF8(set, (const char *)s, length, spanCondition);
2944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(length==0 && spanCondition==firstSpanCondition) {
2945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            spanCondition=invertSpanCondition(spanCondition, contained);
2948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(count<limitsCapacity) {
2950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            memmove(limits, limits+(limitsCapacity-count), count*4);
2951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
2953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 6:
2954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 7:
2955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(;;) {
2956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++count;
2957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(count<=limitsCapacity) {
2958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                limits[limitsCapacity-count]= length >=0 ? length : slen(s, isUTF16);
2959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Note: Length<0 is tested only for the first spanBack().
2961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // If we wanted to keep length<0 for all spanBack()s, we would have to
2962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // temporarily modify the string by placing a NUL where the previous spanBack() stopped.
2963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            length= isUTF16 ? realSet.spanBack((const UChar *)s, length, spanCondition) :
2964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              realSet.spanBackUTF8((const char *)s, length, spanCondition);
2965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(length==0 && spanCondition==firstSpanCondition) {
2966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
2967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            spanCondition=invertSpanCondition(spanCondition, contained);
2969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(count<limitsCapacity) {
2971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            memmove(limits, limits+(limitsCapacity-count), count*4);
2972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
2974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
2975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        typeName="";
2976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -1;
2977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return count;
2980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// sets to be tested; odd index=isComplement
2983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum {
2984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SLOW,
2985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SLOW_NOT,
2986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    FAST,
2987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    FAST_NOT,
2988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SET_COUNT
2989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
2990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char *const setNames[SET_COUNT]={
2992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    "slow",
2993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    "slow.not",
2994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    "fast",
2995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    "fast.not"
2996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
2997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Verify that we get the same results whether we look at text with contains(),
3000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * span() or spanBack(), using unfrozen or frozen versions of the set,
3001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and using the set or its complement (switching the spanConditions accordingly).
3002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The latter verifies that
3003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *   set.span(spanCondition) == set.complement().span(!spanCondition).
3004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
3005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The expectLimits[] are either provided by the caller (with expectCount>=0)
3006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * or returned to the caller (with an input expectCount<0).
3007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
3008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpan(const UnicodeSetWithStrings *sets[4],
3009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const void *s, int32_t length, UBool isUTF16,
3010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              uint32_t whichSpans,
3011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              int32_t expectLimits[], int32_t &expectCount,
3012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const char *testName, int32_t index) {
3013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t limits[500];
3014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t limitsCount;
3015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int i, j;
3016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *typeName;
3018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int type;
3019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i=0; i<SET_COUNT; ++i) {
3021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if((i&1)==0) {
3022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Even-numbered sets are original, uncomplemented sets.
3023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if((whichSpans&SPAN_SET)==0) {
3024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
3025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
3027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Odd-numbered sets are complemented.
3028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if((whichSpans&SPAN_COMPLEMENT)==0) {
3029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
3030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(type=0;; ++type) {
3033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            limitsCount=getSpans(*sets[i], (UBool)(i&1),
3034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 s, length, isUTF16,
3035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 whichSpans,
3036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 type, typeName,
3037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 limits, LENGTHOF(limits), expectCount);
3038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(typeName[0]==0) {
3039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break; // All types tried.
3040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(limitsCount<0) {
3042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue; // Span option filtered out.
3043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(expectCount<0) {
3045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                expectCount=limitsCount;
3046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(limitsCount>LENGTHOF(limits)) {
3047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    errln("FAIL: %s[0x%lx].%s.%s span count=%ld > %ld capacity - too many spans",
3048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)LENGTHOF(limits));
3049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return;
3050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                memcpy(expectLimits, limits, limitsCount*4);
3052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(limitsCount!=expectCount) {
3053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln("FAIL: %s[0x%lx].%s.%s span count=%ld != %ld",
3054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)expectCount);
3055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
3056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                for(j=0; j<limitsCount; ++j) {
3057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(limits[j]!=expectLimits[j]) {
3058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        errln("FAIL: %s[0x%lx].%s.%s span count=%ld limits[%d]=%ld != %ld",
3059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              testName, (long)index, setNames[i], typeName, (long)limitsCount,
3060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              j, (long)limits[j], (long)expectLimits[j]);
3061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
3062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
3063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Compare span() with containsAll()/containsNone(),
3069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // but only if we have expectLimits[] from the uncomplemented set.
3070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(isUTF16 && (whichSpans&SPAN_SET)!=0) {
3071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar *s16=(const UChar *)s;
3072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString string;
3073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t prev=0, limit, length;
3074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(i=0; i<expectCount; ++i) {
3075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            limit=expectLimits[i];
3076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            length=limit-prev;
3077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(length>0) {
3078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                string.setTo(FALSE, s16+prev, length);  // read-only alias
3079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(i&1) {
3080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(!sets[SLOW]->getSet().containsAll(string)) {
3081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        errln("FAIL: %s[0x%lx].%s.containsAll(%ld..%ld)==FALSE contradicts span()",
3082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              testName, (long)index, setNames[SLOW], (long)prev, (long)limit);
3083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return;
3084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
3085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(!sets[FAST]->getSet().containsAll(string)) {
3086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        errln("FAIL: %s[0x%lx].%s.containsAll(%ld..%ld)==FALSE contradicts span()",
3087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              testName, (long)index, setNames[FAST], (long)prev, (long)limit);
3088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return;
3089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
3090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
3091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(!sets[SLOW]->getSet().containsNone(string)) {
3092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        errln("FAIL: %s[0x%lx].%s.containsNone(%ld..%ld)==FALSE contradicts span()",
3093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              testName, (long)index, setNames[SLOW], (long)prev, (long)limit);
3094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return;
3095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
3096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(!sets[FAST]->getSet().containsNone(string)) {
3097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        errln("FAIL: %s[0x%lx].%s.containsNone(%ld..%ld)==FALSE contradicts span()",
3098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              testName, (long)index, setNames[FAST], (long)prev, (long)limit);
3099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return;
3100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
3101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            prev=limit;
3104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Specifically test either UTF-16 or UTF-8.
3109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpan(const UnicodeSetWithStrings *sets[4],
3110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const void *s, int32_t length, UBool isUTF16,
3111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              uint32_t whichSpans,
3112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const char *testName, int32_t index) {
3113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t expectLimits[500];
3114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t expectCount=-1;
3115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSpan(sets, s, length, isUTF16, whichSpans, expectLimits, expectCount, testName, index);
3116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool stringContainsUnpairedSurrogate(const UChar *s, int32_t length) {
3119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar c, c2;
3120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length>=0) {
3122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while(length>0) {
3123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=*s++;
3124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            --length;
3125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(0xd800<=c && c<0xe000) {
3126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(c>=0xdc00 || length==0 || !U16_IS_TRAIL(c2=*s++)) {
3127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return TRUE;
3128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                --length;
3130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
3133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((c=*s++)!=0) {
3134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(0xd800<=c && c<0xe000) {
3135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(c>=0xdc00 || !U16_IS_TRAIL(c2=*s++)) {
3136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return TRUE;
3137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return FALSE;
3142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test both UTF-16 and UTF-8 versions of span() etc. on the same sets and text,
3145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// unless either UTF is turned off in whichSpans.
3146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Testing UTF-16 and UTF-8 together requires that surrogate code points
3147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// have the same contains(c) value as U+FFFD.
3148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpanBothUTFs(const UnicodeSetWithStrings *sets[4],
3149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                      const UChar *s16, int32_t length16,
3150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                      uint32_t whichSpans,
3151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                      const char *testName, int32_t index) {
3152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t expectLimits[500];
3153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t expectCount;
3154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    expectCount=-1;  // Get expectLimits[] from testSpan().
3156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((whichSpans&SPAN_UTF16)!=0) {
3158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        testSpan(sets, s16, length16, TRUE, whichSpans, expectLimits, expectCount, testName, index);
3159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((whichSpans&SPAN_UTF8)==0) {
3161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
3162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Convert s16[] and expectLimits[] to UTF-8.
3165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t s8[3000];
3166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t offsets[3000];
3167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *s16Limit=s16+length16;
3169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *t=(char *)s8;
3170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *tLimit=t+sizeof(s8);
3171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t *o=offsets;
3172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode errorCode=U_ZERO_ERROR;
3173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Convert with substitution: Turn unpaired surrogates into U+FFFD.
3175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_fromUnicode(openUTF8Converter(), &t, tLimit, &s16, s16Limit, o, TRUE, &errorCode);
3176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
3177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: %s[0x%lx] ucnv_fromUnicode(to UTF-8) fails with %s",
3178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              testName, (long)index, u_errorName(errorCode));
3179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ucnv_resetFromUnicode(utf8Cnv);
3180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
3181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t length8=(int32_t)(t-(char *)s8);
3183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Convert expectLimits[].
3185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i, j, expect;
3186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i=j=0; i<expectCount; ++i) {
3187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        expect=expectLimits[i];
3188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(expect==length16) {
3189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            expectLimits[i]=length8;
3190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
3191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while(offsets[j]<expect) {
3192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++j;
3193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            expectLimits[i]=j;
3195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSpan(sets, s8, length8, FALSE, whichSpans, expectLimits, expectCount, testName, index);
3199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32 nextCodePoint(UChar32 c) {
3202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Skip some large and boring ranges.
3203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch(c) {
3204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0x3441:
3205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0x4d7f;
3206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0x5100:
3207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0x9f00;
3208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0xb040:
3209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0xd780;
3210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0xe041:
3211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0xf8fe;
3212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0x10100:
3213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0x20000;
3214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0x20041:
3215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0xe0000;
3216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0xe0101:
3217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0x10fffd;
3218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
3219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return c+1;
3220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Verify that all implementations represent the same set.
3224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpanContents(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) {
3225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // contains(U+FFFD) is inconsistent with contains(some surrogates),
3226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // or the set contains strings with unpaired surrogates which don't translate to valid UTF-8:
3227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Skip the UTF-8 part of the test - if the string contains surrogates -
3228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // because it is likely to produce a different result.
3229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool inconsistentSurrogates=
3230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            (!(sets[0]->getSet().contains(0xfffd) ?
3231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru               sets[0]->getSet().contains(0xd800, 0xdfff) :
3232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru               sets[0]->getSet().containsNone(0xd800, 0xdfff)) ||
3233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             sets[0]->hasStringsWithSurrogates());
3234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar s[1000];
3236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t length=0;
3237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t localWhichSpans;
3238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 c, first;
3240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(first=c=0;; c=nextCodePoint(c)) {
3241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(c>0x10ffff || length>(LENGTHOF(s)-U16_MAX_LENGTH)) {
3242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            localWhichSpans=whichSpans;
3243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(stringContainsUnpairedSurrogate(s, length) && inconsistentSurrogates) {
3244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                localWhichSpans&=~SPAN_UTF8;
3245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            testSpanBothUTFs(sets, s, length, localWhichSpans, testName, first);
3247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(c>0x10ffff) {
3248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
3249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            length=0;
3251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            first=c;
3252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U16_APPEND_UNSAFE(s, length, c);
3254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test with a particular, interesting string.
3258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Specify length and try NUL-termination.
3259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpanUTF16String(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) {
3260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const UChar s[]={
3261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0x61, 0x62, 0x20,                       // Latin, space
3262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0x3b1, 0x3b2, 0x3b3,                    // Greek
3263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0xd900,                                 // lead surrogate
3264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0x3000, 0x30ab, 0x30ad,                 // wide space, Katakana
3265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0xdc05,                                 // trail surrogate
3266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0xa0, 0xac00, 0xd7a3,                   // nbsp, Hangul
3267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0xd900, 0xdc05,                         // unassigned supplementary
3268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0xd840, 0xdfff, 0xd860, 0xdffe,         // Han supplementary
3269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0xd7a4, 0xdc05, 0xd900, 0x2028,         // unassigned, surrogates in wrong order, LS
3270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0                                       // NUL
3271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
3272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((whichSpans&SPAN_UTF16)==0) {
3274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
3275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSpan(sets, s, -1, TRUE, (whichSpans&~SPAN_UTF8), testName, 0);
3277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSpan(sets, s, LENGTHOF(s)-1, TRUE, (whichSpans&~SPAN_UTF8), testName, 1);
3278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::testSpanUTF8String(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) {
3281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char s[]={
3282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abc"                                   // Latin
3283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* trail byte in lead position */
3285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\x80"
3286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        " "                                     // space
3288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* truncated multi-byte sequences */
3290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xd0"
3291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xe0"
3292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xe1"
3293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xed"
3294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xee"
3295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf0"
3296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf1"
3297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf4"
3298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf8"
3299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xfc"
3300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xCE\xB1\xCE\xB2\xCE\xB3"              // Greek
3302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* trail byte in lead position */
3304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\x80"
3305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xe0\x80"
3307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xe0\xa0"
3308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xe1\x80"
3309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xed\x80"
3310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xed\xa0"
3311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xee\x80"
3312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf0\x80"
3313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf0\x90"
3314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf1\x80"
3315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf4\x80"
3316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf4\x90"
3317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf8\x80"
3318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xfc\x80"
3319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xE3\x80\x80\xE3\x82\xAB\xE3\x82\xAD"  // wide space, Katakana
3321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* trail byte in lead position */
3323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\x80"
3324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf0\x80\x80"
3326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf0\x90\x80"
3327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf1\x80\x80"
3328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf4\x80\x80"
3329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf4\x90\x80"
3330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf8\x80\x80"
3331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xfc\x80\x80"
3332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xC2\xA0\xEA\xB0\x80\xED\x9E\xA3"      // nbsp, Hangul
3334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* trail byte in lead position */
3336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\x80"
3337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf8\x80\x80\x80"
3339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xfc\x80\x80\x80"
3340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xF1\x90\x80\x85"                      // unassigned supplementary
3342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* trail byte in lead position */
3344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\x80"
3345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xfc\x80\x80\x80\x80"
3347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xF0\xA0\x8F\xBF\xF0\xA8\x8F\xBE"      // Han supplementary
3349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* trail byte in lead position */
3351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\x80"
3352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* complete sequences but non-shortest forms or out of range etc. */
3354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xc0\x80"
3355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xe0\x80\x80"
3356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xed\xa0\x80"
3357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf0\x80\x80\x80"
3358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf4\x90\x80\x80"
3359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xf8\x80\x80\x80\x80"
3360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xfc\x80\x80\x80\x80\x80"
3361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xfe"
3362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xff"
3363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* trail byte in lead position */
3365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\x80"
3366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\xED\x9E\xA4\xE2\x80\xA8"              // unassigned, LS, NUL-terminated
3368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
3369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((whichSpans&SPAN_UTF8)==0) {
3371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
3372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSpan(sets, s, -1, FALSE, (whichSpans&~SPAN_UTF16), testName, 0);
3374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    testSpan(sets, s, LENGTHOF(s)-1, FALSE, (whichSpans&~SPAN_UTF16), testName, 1);
3375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Take a set of span options and multiply them so that
3378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// each portion only has one of the options a, b and c.
3379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// If b==0, then the set of options is just modified with mask and a.
3380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// If b!=0 and c==0, then the set of options is just modified with mask, a and b.
3381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
3382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruaddAlternative(uint32_t whichSpans[], int32_t whichSpansCount,
3383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru               uint32_t mask, uint32_t a, uint32_t b, uint32_t c) {
3384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t s;
3385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
3386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i=0; i<whichSpansCount; ++i) {
3388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        s=whichSpans[i]&mask;
3389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        whichSpans[i]=s|a;
3390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(b!=0) {
3391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            whichSpans[whichSpansCount+i]=s|b;
3392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(c!=0) {
3393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                whichSpans[2*whichSpansCount+i]=s|c;
3394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return b==0 ? whichSpansCount : c==0 ? 2*whichSpansCount : 3*whichSpansCount;
3398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _63_a "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _64_a "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _63_b "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
3403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _64_b "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
3404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestSpan() {
3406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // "[...]" is a UnicodeSet pattern.
3407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // "*" performs tests on all Unicode code points and on a selection of
3408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   malformed UTF-8/16 strings.
3409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // "-options" limits the scope of testing for the current set.
3410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   By default, the test verifies that equivalent boundaries are found
3411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   for UTF-16 and UTF-8, going forward and backward,
3412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   alternating USET_SPAN_NOT_CONTAINED with
3413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   either USET_SPAN_CONTAINED or USET_SPAN_SIMPLE.
3414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   Single-character options:
3415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     8 -- UTF-16 and UTF-8 boundaries may differ.
3416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          Cause: contains(U+FFFD) is inconsistent with contains(some surrogates),
3417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          or the set contains strings with unpaired surrogates
3418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          which do not translate to valid UTF-8.
3419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     c -- set.span() and set.complement().span() boundaries may differ.
3420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          Cause: Set strings are not complemented.
3421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     b -- span() and spanBack() boundaries may differ.
3422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          Cause: Strings in the set overlap, and spanBack(USET_SPAN_CONTAINED)
3423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          and spanBack(USET_SPAN_SIMPLE) are defined to
3424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          match with non-overlapping substrings.
3425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          For example, with a set containing "ab" and "ba",
3426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          span() of "aba" yields boundaries { 0, 2, 3 }
3427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          because the initial "ab" matches from 0 to 2,
3428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          while spanBack() yields boundaries { 0, 1, 3 }
3429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          because the final "ba" matches from 1 to 3.
3430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     l -- USET_SPAN_CONTAINED and USET_SPAN_SIMPLE boundaries may differ.
3431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          Cause: Strings in the set overlap, and a longer match may
3432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          require a sequence including non-longest substrings.
3433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          For example, with a set containing "ab", "abc" and "cd",
3434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          span(contained) of "abcd" spans the entire string
3435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //          but span(longest match) only spans the first 3 characters.
3436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   Each "-options" first resets all options and then applies the specified options.
3437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   A "-" without options resets the options.
3438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //   The options are also reset for each new set.
3439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Other strings will be spanned.
3440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char *const testdata[]={
3441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:ID_Continue:]",
3442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "*",
3443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[:White_Space:]",
3444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "*",
3445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[]",
3446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "*",
3447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u0000-\\U0010FFFF]",
3448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "*",
3449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u0000\\u0080\\u0800\\U00010000]",
3450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "*",
3451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u007F\\u07FF\\uFFFF\\U0010FFFF]",
3452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "*",
3453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[[[:ID_Continue:]-[\\u30ab\\u30ad]]{\\u3000\\u30ab}{\\u3000\\u30ab\\u30ad}]",
3454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-c",
3455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "*",
3456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[[[:ID_Continue:]-[\\u30ab\\u30ad]]{\\u30ab\\u30ad}{\\u3000\\u30ab\\u30ad}]",
3457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-c",
3458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "*",
3459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Overlapping strings cause overlapping attempts to match.
3461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[x{xy}{xya}{axy}{ax}]",
3462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-cl",
3463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // More repetitions of "xya" would take too long with the recursive
3465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // reference implementation.
3466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // containsAll()=FALSE
3467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // test_string 0x14
3468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"
3469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxya"  // set.complement().span(longest match) will stop here.
3470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"            // set.complement().span(contained) will stop between the two 'x'es.
3471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxya"
3472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"
3473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxya"  // span() ends here.
3474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "aaa",
3475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // containsAll()=TRUE
3477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // test_string 0x15
3478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"
3479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxya"
3480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"
3481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxya"
3482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"
3483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxy",
3484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-bc",
3486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // test_string 0x17
3487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "byayaxya",  // span() -> { 4, 7, 8 }  spanBack() -> { 5, 8 }
3488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-c",
3489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "byayaxy",   // span() -> { 4, 7 }     complement.span() -> { 7 }
3490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "byayax",    // span() -> { 4, 6 }     complement.span() -> { 6 }
3491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-",
3492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "byaya",     // span() -> { 5 }
3493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "byay",      // span() -> { 4 }
3494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "bya",       // span() -> { 3 }
3495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // span(longest match) will not span the whole string.
3497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[a{ab}{bc}]",
3498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-cl",
3499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // test_string 0x21
3500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abc",
3501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[a{ab}{abc}{cd}]",
3503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-cl",
3504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "acdabcdabccd",
3505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // spanBack(longest match) will not span the whole string.
3507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[c{ab}{bc}]",
3508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-cl",
3509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abc",
3510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[d{cd}{bcd}{ab}]",
3512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-cl",
3513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "abbcdabcdabd",
3514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Test with non-ASCII set strings - test proper handling of surrogate pairs
3516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // and UTF-8 trail bytes.
3517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Copies of above test sets and strings, but transliterated to have
3518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // different code points with similar trail units.
3519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Previous: a      b         c            d
3520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Unicode:  042B   30AB      200AB        204AB
3521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // UTF-16:   042B   30AB      D840 DCAB    D841 DCAB
3522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // UTF-8:    D0 AB  E3 82 AB  F0 A0 82 AB  F0 A0 92 AB
3523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\u042B{\\u042B\\u30AB}{\\u042B\\u30AB\\U000200AB}{\\U000200AB\\U000204AB}]",
3524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-cl",
3525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u042B\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000200AB\\U000204AB",
3526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[\\U000204AB{\\U000200AB\\U000204AB}{\\u30AB\\U000200AB\\U000204AB}{\\u042B\\u30AB}]",
3528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-cl",
3529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "\\u042B\\u30AB\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000204AB",
3530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Stress bookkeeping and recursion.
3532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // The following strings are barely doable with the recursive
3533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // reference implementation.
3534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // The not-contained character at the end prevents an early exit from the span().
3535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[b{bb}]",
3536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-c",
3537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // test_string 0x33
3538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "bbbbbbbbbbbbbbbbbbbbbbbb-",
3539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // On complement sets, span() and spanBack() get different results
3540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // because b is not in the complement set and there is an odd number of b's
3541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // in the test string.
3542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-bc",
3543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "bbbbbbbbbbbbbbbbbbbbbbbbb-",
3544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Test with set strings with an initial or final code point span
3546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // longer than 254.
3547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[a{" _64_a _64_a _64_a _64_a "b}"
3548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          "{a" _64_b _64_b _64_b _64_b "}]",
3549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-c",
3550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        _64_a _64_a _64_a _63_a "b",
3551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        _64_a _64_a _64_a _64_a "b",
3552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        _64_a _64_a _64_a _64_a "aaaabbbb",
3553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "a" _64_b _64_b _64_b _63_b,
3554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "a" _64_b _64_b _64_b _64_b,
3555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "aaaabbbb" _64_b _64_b _64_b _64_b,
3556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Test with strings containing unpaired surrogates.
3558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // They are not representable in UTF-8, and a leading trail surrogate
3559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // and a trailing lead surrogate must not match in the middle of a proper surrogate pair.
3560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // U+20001 == \\uD840\\uDC01
3561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // U+20400 == \\uD841\\uDC00
3562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "[a\\U00020001\\U00020400{ab}{b\\uD840}{\\uDC00a}]",
3563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "-8cl",
3564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "aaab\\U00020001ba\\U00020400aba\\uD840ab\\uD840\\U00020000b\\U00020000a\\U00020000\\uDC00a\\uDC00babbb"
3565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
3566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t whichSpans[96]={ SPAN_ALL };
3567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t whichSpansCount=1;
3568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *sets[SET_COUNT]={ NULL };
3570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSetWithStrings *sets_with_str[SET_COUNT]={ NULL };
3571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char testName[1024];
3573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *testNameLimit=testName;
3574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i, j;
3576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i=0; i<LENGTHOF(testdata); ++i) {
3577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const char *s=testdata[i];
3578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(s[0]=='[') {
3579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Create new test sets from this pattern.
3580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for(j=0; j<SET_COUNT; ++j) {
3581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delete sets_with_str[j];
3582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delete sets[j];
3583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UErrorCode errorCode=U_ZERO_ERROR;
3585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sets[SLOW]=new UnicodeSet(UnicodeString(s, -1, US_INV).unescape(), errorCode);
3586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(U_FAILURE(errorCode)) {
3587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errln("FAIL: Unable to create UnicodeSet(%s) - %s", s, u_errorName(errorCode));
3588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
3589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sets[SLOW_NOT]=new UnicodeSet(*sets[SLOW]);
3591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sets[SLOW_NOT]->complement();
3592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Intermediate set: Test cloning of a frozen set.
3593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeSet *fast=new UnicodeSet(*sets[SLOW]);
3594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fast->freeze();
3595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sets[FAST]=(UnicodeSet *)fast->clone();
3596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete fast;
3597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeSet *fastNot=new UnicodeSet(*sets[SLOW_NOT]);
3598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fastNot->freeze();
3599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sets[FAST_NOT]=(UnicodeSet *)fastNot->clone();
3600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete fastNot;
3601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for(j=0; j<SET_COUNT; ++j) {
3603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                sets_with_str[j]=new UnicodeSetWithStrings(*sets[j]);
3604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            strcpy(testName, s);
3607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            testNameLimit=strchr(testName, 0);
3608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *testNameLimit++=':';
3609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *testNameLimit=0;
3610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            whichSpans[0]=SPAN_ALL;
3612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            whichSpansCount=1;
3613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(s[0]=='-') {
3614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            whichSpans[0]=SPAN_ALL;
3615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            whichSpansCount=1;
3616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while(*++s!=0) {
3618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                switch(*s) {
3619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                case 'c':
3620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    whichSpansCount=addAlternative(whichSpans, whichSpansCount,
3621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   ~SPAN_POLARITY,
3622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_SET,
3623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_COMPLEMENT,
3624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   0);
3625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
3626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                case 'b':
3627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    whichSpansCount=addAlternative(whichSpans, whichSpansCount,
3628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   ~SPAN_DIRS,
3629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_FWD,
3630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_BACK,
3631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   0);
3632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
3633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                case 'l':
3634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // test USET_SPAN_CONTAINED FWD & BACK, and separately
3635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // USET_SPAN_SIMPLE only FWD, and separately
3636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // USET_SPAN_SIMPLE only BACK
3637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    whichSpansCount=addAlternative(whichSpans, whichSpansCount,
3638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   ~(SPAN_DIRS|SPAN_CONDITION),
3639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_DIRS|SPAN_CONTAINED,
3640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_FWD|SPAN_SIMPLE,
3641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_BACK|SPAN_SIMPLE);
3642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
3643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                case '8':
3644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    whichSpansCount=addAlternative(whichSpans, whichSpansCount,
3645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   ~SPAN_UTFS,
3646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_UTF16,
3647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   SPAN_UTF8,
3648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                   0);
3649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
3650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                default:
3651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    errln("FAIL: unrecognized span set option in \"%s\"", testdata[i]);
3652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
3653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if(0==strcmp(s, "*")) {
3656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            strcpy(testNameLimit, "bad_string");
3657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for(j=0; j<whichSpansCount; ++j) {
3658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(whichSpansCount>1) {
3659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    sprintf(testNameLimit+10 /* strlen("bad_string") */,
3660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            "%%0x%3x",
3661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            whichSpans[j]);
3662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                testSpanUTF16String(sets_with_str, whichSpans[j], testName);
3664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                testSpanUTF8String(sets_with_str, whichSpans[j], testName);
3665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            strcpy(testNameLimit, "contents");
3668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for(j=0; j<whichSpansCount; ++j) {
3669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(whichSpansCount>1) {
3670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    sprintf(testNameLimit+8 /* strlen("contents") */,
3671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            "%%0x%3x",
3672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            whichSpans[j]);
3673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                testSpanContents(sets_with_str, whichSpans[j], testName);
3675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
3677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString string=UnicodeString(s, -1, US_INV).unescape();
3678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            strcpy(testNameLimit, "test_string");
3679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for(j=0; j<whichSpansCount; ++j) {
3680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(whichSpansCount>1) {
3681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    sprintf(testNameLimit+11 /* strlen("test_string") */,
3682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            "%%0x%3x",
3683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            whichSpans[j]);
3684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
3685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                testSpanBothUTFs(sets_with_str, string.getBuffer(), string.length(), whichSpans[j], testName, i);
3686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
3687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
3688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(j=0; j<SET_COUNT; ++j) {
3690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete sets_with_str[j];
3691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete sets[j];
3692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Test select patterns and strings, and test USET_SPAN_SIMPLE.
3696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSetTest::TestStringSpan() {
3697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char *pattern="[x{xy}{xya}{axy}{ax}]";
3698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const char *const string=
3699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"
3700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxya"
3701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"
3702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxya"
3703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xx"
3704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxy"
3705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        "aaaa";
3706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode errorCode=U_ZERO_ERROR;
3708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pattern16=UnicodeString(pattern, -1, US_INV);
3709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set(pattern16, errorCode);
3710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
3711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode));
3712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
3713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString string16=UnicodeString(string, -1, US_INV).unescape();
3716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(set.containsAll(string16)) {
3718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet(%s).containsAll(%s) should be FALSE", pattern, string);
3719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Remove trailing "aaaa".
3722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    string16.truncate(string16.length()-4);
3723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(!set.containsAll(string16)) {
3724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet(%s).containsAll(%s[:-4]) should be TRUE", pattern, string);
3725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    string16=UNICODE_STRING_SIMPLE("byayaxya");
3728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *s16=string16.getBuffer();
3729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t length16=string16.length();
3730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if( set.span(s16, 8, USET_SPAN_NOT_CONTAINED)!=4 ||
3731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.span(s16, 7, USET_SPAN_NOT_CONTAINED)!=4 ||
3732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.span(s16, 6, USET_SPAN_NOT_CONTAINED)!=4 ||
3733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.span(s16, 5, USET_SPAN_NOT_CONTAINED)!=5 ||
3734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.span(s16, 4, USET_SPAN_NOT_CONTAINED)!=4 ||
3735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.span(s16, 3, USET_SPAN_NOT_CONTAINED)!=3
3736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
3737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet(%s).span(while not) returns the wrong value", pattern);
3738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pattern="[a{ab}{abc}{cd}]";
3741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pattern16=UnicodeString(pattern, -1, US_INV);
3742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern(pattern16, errorCode);
3743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
3744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode));
3745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
3746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    string16=UNICODE_STRING_SIMPLE("acdabcdabccd");
3748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s16=string16.getBuffer();
3749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    length16=string16.length();
3750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if( set.span(s16, 12, USET_SPAN_CONTAINED)!=12 ||
3751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.span(s16, 12, USET_SPAN_SIMPLE)!=6 ||
3752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.span(s16+7, 5, USET_SPAN_SIMPLE)!=5
3753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
3754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet(%s).span(while longest match) returns the wrong value", pattern);
3755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pattern="[d{cd}{bcd}{ab}]";
3758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pattern16=UnicodeString(pattern, -1, US_INV);
3759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.applyPattern(pattern16, errorCode).freeze();
3760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
3761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode));
3762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
3763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    string16=UNICODE_STRING_SIMPLE("abbcdabcdabd");
3765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    s16=string16.getBuffer();
3766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    length16=string16.length();
3767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if( set.spanBack(s16, 12, USET_SPAN_CONTAINED)!=0 ||
3768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.spanBack(s16, 12, USET_SPAN_SIMPLE)!=6 ||
3769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        set.spanBack(s16, 5, USET_SPAN_SIMPLE)!=0
3770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
3771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        errln("FAIL: UnicodeSet(%s).spanBack(while longest match) returns the wrong value", pattern);
3772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
3773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
3774