1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)********************************************************************************
3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Copyright (C) 1999-2010 International Business Machines Corporation and
4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   others. All Rights Reserved.
5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)********************************************************************************
6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Date        Name        Description
7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   10/20/99    alan        Creation.
8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   03/22/2000  Madhu       Added additional tests
9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)********************************************************************************
10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/
11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <stdio.h>
13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include <string.h>
15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h"
16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "usettest.h"
17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ucnv.h"
18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uniset.h"
19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uchar.h"
20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/usetiter.h"
21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ustring.h"
22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/parsepos.h"
23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/symtable.h"
24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uversion.h"
25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "hash.h"
26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    dataerrln("fail in file \"%s\", line %d: \"%s\"", __FILE__, __LINE__, \
31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    u_errorName(status));}}
32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define TEST_ASSERT(expr) {if (!(expr)) { \
34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    dataerrln("fail in file \"%s\", line %d", __FILE__, __LINE__); }}
35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString operator+(const UnicodeString& left, const UnicodeSet& set) {
37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString pat;
38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.toPattern(pat);
39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return left + UnicodeSetTest::escape(pat);
40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define CASE(id,test) case id:                          \
43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          name = #test;                 \
44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          if (exec) {                   \
45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              logln(#test "---");       \
46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              logln();                  \
47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              test();                   \
48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          }                             \
49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          break
50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeSetTest::UnicodeSetTest() : utf8Cnv(NULL) {
52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UConverter *UnicodeSetTest::openUTF8Converter() {
55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(utf8Cnv==NULL) {
56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode errorCode=U_ZERO_ERROR;
57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utf8Cnv=ucnv_open("UTF-8", &errorCode);
58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return utf8Cnv;
60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeSetTest::~UnicodeSetTest() {
63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucnv_close(utf8Cnv);
64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeSetTest::runIndexedTest(int32_t index, UBool exec,
68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                               const char* &name, char* /*par*/) {
69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if (exec) logln((UnicodeString)"TestSuite UnicodeSetTest");
70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    switch (index) {
71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(0,TestPatterns);
72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(1,TestAddRemove);
73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(2,TestCategories);
74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(3,TestCloneEqualHash);
75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(4,TestMinimalRep);
76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(5,TestAPI);
77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(6,TestScriptSet);
78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(7,TestPropertySet);
79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(8,TestClone);
80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(9,TestExhaustive);
81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(10,TestToPattern);
82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(11,TestIndexOf);
83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(12,TestStrings);
84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(13,Testj2268);
85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(14,TestCloseOver);
86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(15,TestEscapePattern);
87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(16,TestInvalidCodePoint);
88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(17,TestSymbolTable);
89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(18,TestSurrogate);
90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(19,TestPosixClasses);
91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(20,TestIteration);
92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(21,TestFreezable);
93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(22,TestSpan);
94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE(23,TestStringSpan);
95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        default: name = ""; break;
96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const char NOT[] = "%%%%";
100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * UVector was improperly copying contents
103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This code will crash this is still true
104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::Testj2268() {
106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  UnicodeSet t;
107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  t.add(UnicodeString("abc"));
108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  UnicodeSet test(t);
109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  UnicodeString ustrPat;
110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  test.toPattern(ustrPat, TRUE);
111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Test toPattern().
115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestToPattern() {
117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode ec = U_ZERO_ERROR;
118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Test that toPattern() round trips with syntax characters and
120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // whitespace.
121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        static const char* OTHER_TOPATTERN_TESTS[] = {
123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            "[[:latin:]&[:greek:]]",
124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            "[[:latin:]-[:greek:]]",
125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            "[:nonspacing mark:]",
126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            NULL
127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        };
128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (int32_t j=0; OTHER_TOPATTERN_TESTS[j]!=NULL; ++j) {
130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ec = U_ZERO_ERROR;
131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UnicodeSet s(OTHER_TOPATTERN_TESTS[j], ec);
132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (U_FAILURE(ec)) {
133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                dataerrln((UnicodeString)"FAIL: bad pattern " + OTHER_TOPATTERN_TESTS[j] + " - " + UnicodeString(u_errorName(ec)));
134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            checkPat(OTHER_TOPATTERN_TESTS[j], s);
137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (UChar32 i = 0; i <= 0x10FFFF; ++i) {
140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if ((i <= 0xFF && !u_isalpha(i)) || u_isspace(i)) {
141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                // check various combinations to make sure they all work.
143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (i != 0 && !toPatternAux(i, i)){
144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    continue;
145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (!toPatternAux(0, i)){
147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    continue;
148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if (!toPatternAux(i, 0xFFFF)){
150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    continue;
151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Test pattern behavior of multicharacter strings.
157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ec = U_ZERO_ERROR;
159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet* s = new UnicodeSet("[a-z {aa} {ab}]", ec);
160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // This loop isn't a loop.  It's here to make the compiler happy.
162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // If you're curious, try removing it and changing the 'break'
163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // statements (except for the last) to goto's.
164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (;;) {
165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (U_FAILURE(ec)) break;
166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const char* exp1[] = {"aa", "ab", NOT, "ac", NULL};
167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            expectToPattern(*s, "[a-z{aa}{ab}]", exp1);
168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            s->add("ac");
170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const char* exp2[] = {"aa", "ab", "ac", NOT, "xy", NULL};
171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            expectToPattern(*s, "[a-z{aa}{ab}{ac}]", exp2);
172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\{l} {r\\}}]"), ec);
174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (U_FAILURE(ec)) break;
175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const char* exp3[] = {"{l", "r}", NOT, "xy", NULL};
176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{r\\}}{\\{l}]"), exp3);
177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            s->add("[]");
179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const char* exp4[] = {"{l", "r}", "[]", NOT, "xy", NULL};
180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\[\\]}{r\\}}{\\{l}]"), exp4);
181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            s->applyPattern(UNICODE_STRING_SIMPLE("[a-z {\\u4E01\\u4E02}{\\n\\r}]"), ec);
183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (U_FAILURE(ec)) break;
184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const char* exp5[] = {"\\u4E01\\u4E02", "\n\r", NULL};
185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            expectToPattern(*s, UNICODE_STRING_SIMPLE("[a-z{\\u000A\\u000D}{\\u4E01\\u4E02}]"), exp5);
186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // j2189
188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            s->clear();
189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            s->add(UnicodeString("abc", ""));
190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            s->add(UnicodeString("abc", ""));
191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const char* exp6[] = {"abc", NOT, "ab", NULL};
192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            expectToPattern(*s, "[{abc}]", exp6);
193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_FAILURE(ec)) errln("FAIL: pattern parse error");
198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete s;
199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // JB#3400: For 2 character ranges prefer [ab] to [a-b]
202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet s;
203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    s.add((UChar)97, (UChar)98); // 'a', 'b'
204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectToPattern(s, "[ab]", NULL);
205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool UnicodeSetTest::toPatternAux(UChar32 start, UChar32 end) {
208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // use Integer.toString because Utility.hex doesn't handle ints
210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString pat = "";
211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // TODO do these in hex
212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //String source = "0x" + Integer.toString(start,16).toUpperCase();
213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //if (start != end) source += "..0x" + Integer.toString(end,16).toUpperCase();
214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString source;
215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    source = source + (uint32_t)start;
216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (start != end)
217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        source = source + ".." + (uint32_t)end;
218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet testSet;
219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testSet.add(start, end);
220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return checkPat(source, testSet);
221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool UnicodeSetTest::checkPat(const UnicodeString& source,
224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                               const UnicodeSet& testSet) {
225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // What we want to make sure of is that a pattern generated
226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // by toPattern(), with or without escaped unprintables, can
227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // be passed back into the UnicodeSet constructor.
228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString pat0;
229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testSet.toPattern(pat0, TRUE);
231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (!checkPat(source + " (escaped)", testSet, pat0)) return FALSE;
233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //String pat1 = unescapeLeniently(pat0);
235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //if (!checkPat(source + " (in code)", testSet, pat1)) return false;
236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString pat2;
238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testSet.toPattern(pat2, FALSE);
239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (!checkPat(source, testSet, pat2)) return FALSE;
240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //String pat3 = unescapeLeniently(pat2);
242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if (!checkPat(source + " (in code)", testSet, pat3)) return false;
243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //logln(source + " => " + pat0 + ", " + pat1 + ", " + pat2 + ", " + pat3);
245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    logln((UnicodeString)source + " => " + pat0 + ", " + pat2);
246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return TRUE;
247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool UnicodeSetTest::checkPat(const UnicodeString& source,
250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                               const UnicodeSet& testSet,
251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                               const UnicodeString& pat) {
252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode ec = U_ZERO_ERROR;
253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet testSet2(pat, ec);
254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (testSet2 != testSet) {
255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"Fail toPattern: " + source + " => " + pat);
256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return FALSE;
257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return TRUE;
259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeSetTest::TestPatterns(void) {
263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet set;
264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPattern(set, UnicodeString("[[a-m]&[d-z]&[k-y]]", ""),  "km");
265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPattern(set, UnicodeString("[[a-z]-[m-y]-[d-r]]", ""),  "aczz");
266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPattern(set, UnicodeString("[a\\-z]", ""),  "--aazz");
267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPattern(set, UnicodeString("[-az]", ""),  "--aazz");
268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPattern(set, UnicodeString("[az-]", ""),  "--aazz");
269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPattern(set, UnicodeString("[[[a-z]-[aeiou]i]]", ""), "bdfnptvz");
270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Throw in a test of complement
272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.complement();
273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString exp;
274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    exp.append((UChar)0x0000).append("aeeoouu").append((UChar)(0x007a+1)).append((UChar)0xFFFF);
275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, exp);
276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeSetTest::TestCategories(void) {
280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char* pat = " [:Lu:] "; // Whitespace ok outside [:..:]
282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet set(pat, status);
283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dataerrln((UnicodeString)"Fail: Can't construct set with " + pat + " - " + UnicodeString(u_errorName(status)));
285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expectContainment(set, pat, "ABC", "abc");
288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 i;
291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t failures = 0;
292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Make sure generation of L doesn't pollute cached Lu set
293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // First generate L, then Lu
294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.applyPattern("[:L:]", status);
295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i=0; i<0x200; ++i) {
297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UBool l = u_isalpha((UChar)i);
298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (l != set.contains(i)) {
299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln((UnicodeString)"FAIL: L contains " + (unsigned short)i + " = " +
300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  set.contains(i));
301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (++failures == 10) break;
302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.applyPattern("[:Lu:]", status);
306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i=0; i<0x200; ++i) {
308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UBool lu = (u_charType((UChar)i) == U_UPPERCASE_LETTER);
309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (lu != set.contains(i)) {
310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln((UnicodeString)"FAIL: Lu contains " + (unsigned short)i + " = " +
311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  set.contains(i));
312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (++failures == 20) break;
313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeSetTest::TestCloneEqualHash(void) {
318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // set1 and set2 used to be built with the obsolete constructor taking
320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // UCharCategory values; replaced with pattern constructors
321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // markus 20030502
322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet *set1=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Lowercase Letter}"), status); //  :Ll: Letter, lowercase
323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet *set1a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Ll:]"), status); //  Letter, lowercase
324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)){
325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dataerrln((UnicodeString)"FAIL: Can't construst set with category->Ll" + " - " + UnicodeString(u_errorName(status)));
326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet *set2=new UnicodeSet(UNICODE_STRING_SIMPLE("\\p{Decimal Number}"), status);   //Number, Decimal digit
329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet *set2a=new UnicodeSet(UNICODE_STRING_SIMPLE("[:Nd:]"), status);   //Number, Decimal digit
330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)){
331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAIL: Can't construct set with category->Nd");
332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (*set1 != *set1a) {
336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: category constructor for Ll broken");
337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (*set2 != *set2a) {
339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: category constructor for Nd broken");
340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete set1a;
342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete set2a;
343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    logln("Testing copy construction");
345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet *set1copy=new UnicodeSet(*set1);
346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(*set1 != *set1copy || *set1 == *set2 ||
347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        getPairs(*set1) != getPairs(*set1copy) ||
348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set1->hashCode() != set1copy->hashCode()){
349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL : Error in copy construction");
350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    logln("Testing =operator");
354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet set1equal=*set1;
355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet set2equal=*set2;
356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(set1equal != *set1 || set1equal != *set1copy || set2equal != *set2 ||
357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set2equal == *set1 || set2equal == *set1copy || set2equal == set1equal){
358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: Error in =operator");
359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    logln("Testing clone()");
362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet *set1clone=(UnicodeSet*)set1->clone();
363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet *set2clone=(UnicodeSet*)set2->clone();
364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(*set1clone != *set1 || *set1clone != *set1copy || *set1clone != set1equal ||
365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *set2clone != *set2 || *set2clone == *set1copy || *set2clone != set2equal ||
366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *set2clone == *set1 || *set2clone == set1equal || *set2clone == *set1clone){
367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: Error in clone");
368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    logln("Testing hashcode");
371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(set1->hashCode() != set1equal.hashCode() || set1->hashCode() != set1clone->hashCode() ||
372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set2->hashCode() != set2equal.hashCode() || set2->hashCode() != set2clone->hashCode() ||
373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set1copy->hashCode() != set1equal.hashCode() || set1copy->hashCode() != set1clone->hashCode() ||
374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set1->hashCode() == set2->hashCode()  || set1copy->hashCode() == set2->hashCode() ||
375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set2->hashCode() == set1clone->hashCode() || set2->hashCode() == set1equal.hashCode() ){
376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: Error in hashCode()");
377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete set1;
380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete set1copy;
381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete set2;
382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete set1clone;
383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete set2clone;
384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeSetTest::TestAddRemove(void) {
389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet set; // Construct empty set
390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.isEmpty() == TRUE, "set should be empty");
391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.size() == 0, "size should be 0");
392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.complement();
393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.size() == 0x110000, "size should be 0x110000");
394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.clear();
395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.add(0x0061, 0x007a);
396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "az");
397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.isEmpty() == FALSE, "set should not be empty");
398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.size() != 0, "size should not be equal to 0");
399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.size() == 26, "size should be equal to 26");
400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.remove(0x006d, 0x0070);
401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "alqz");
402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.size() == 22, "size should be equal to 22");
403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.remove(0x0065, 0x0067);
404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "adhlqz");
405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.size() == 19, "size should be equal to 19");
406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.remove(0x0064, 0x0069);
407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "acjlqz");
408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.size() == 16, "size should be equal to 16");
409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.remove(0x0063, 0x0072);
410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "absz");
411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.size() == 10, "size should be equal to 10");
412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.add(0x0066, 0x0071);
413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "abfqsz");
414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.size() == 22, "size should be equal to 22");
415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.remove(0x0061, 0x0067);
416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "hqsz");
417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.remove(0x0061, 0x007a);
418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "");
419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.isEmpty() == TRUE, "set should be empty");
420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.size() == 0, "size should be 0");
421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.add(0x0061);
422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.isEmpty() == FALSE, "set should not be empty");
423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.size() == 1, "size should not be equal to 1");
424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.add(0x0062);
425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.add(0x0063);
426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "ac");
427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.size() == 3, "size should not be equal to 3");
428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.add(0x0070);
429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.add(0x0071);
430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "acpq");
431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.size() == 5, "size should not be equal to 5");
432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.clear();
433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "");
434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.isEmpty() == TRUE, "set should be empty");
435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.size() == 0, "size should be 0");
436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Try removing an entire set from another set
438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPattern(set, "[c-x]", "cx");
439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet set2;
440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPattern(set2, "[f-ky-za-bc[vw]]", "acfkvwyz");
441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.removeAll(set2);
442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "deluxx");
443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Try adding an entire set to another set
445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPattern(set, "[jackiemclean]", "aacceein");
446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPattern(set2, "[hitoshinamekatajamesanderson]", "aadehkmort");
447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.addAll(set2);
448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "aacehort");
449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.containsAll(set2) == TRUE, "set should contain all the elements in set2");
450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Try retaining an set of elements contained in another set (intersection)
452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet set3;
453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPattern(set3, "[a-c]", "ac");
454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.containsAll(set3) == FALSE, "set doesn't contain all the elements in set3");
455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set3.remove(0x0062);
456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set3, "aacc");
457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.containsAll(set3) == TRUE, "set should contain all the elements in set3");
458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.retainAll(set3);
459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "aacc");
460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.size() == set3.size(), "set.size() should be set3.size()");
461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.containsAll(set3) == TRUE, "set should contain all the elements in set3");
462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.clear();
463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.size() != set3.size(), "set.size() != set3.size()");
464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Test commutativity
466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPattern(set, "[hitoshinamekatajamesanderson]", "aadehkmort");
467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPattern(set2, "[jackiemclean]", "aacceein");
468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.addAll(set2);
469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "aacehort");
470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    doAssert(set.containsAll(set2) == TRUE, "set should contain all the elements in set2");
471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Make sure minimal representation is maintained.
479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestMinimalRep() {
481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // This is pretty thoroughly tested by checkCanonicalRep()
483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // run against the exhaustive operation results.  Use the code
484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // here for debugging specific spot problems.
485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // 1 overlap against 2
487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet set("[h-km-q]", status);
488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet set2("[i-o]", status);
490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.addAll(set2);
492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "hq");
493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // right
494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.applyPattern("[a-m]", status);
495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set2.applyPattern("[e-o]", status);
497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.addAll(set2);
499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "ao");
500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // left
501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.applyPattern("[e-o]", status);
502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set2.applyPattern("[a-m]", status);
504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.addAll(set2);
506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "ao");
507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // 1 overlap against 3
508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.applyPattern("[a-eg-mo-w]", status);
509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set2.applyPattern("[d-q]", status);
511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.addAll(set2);
513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectPairs(set, "aw");
514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestAPI() {
517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // default ct
519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet set;
520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (!set.isEmpty() || set.getRangeCount() != 0) {
521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAIL, set should be empty but isn't: " +
522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              set);
523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // clear(), isEmpty()
526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.add(0x0061);
527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (set.isEmpty()) {
528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAIL, set shouldn't be empty but is: " +
529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              set);
530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.clear();
532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (!set.isEmpty()) {
533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAIL, set should be empty but isn't: " +
534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              set);
535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // size()
538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.clear();
539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (set.size() != 0) {
540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAIL, size should be 0, but is " + set.size() +
541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              ": " + set);
542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.add(0x0061);
544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (set.size() != 1) {
545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAIL, size should be 1, but is " + set.size() +
546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              ": " + set);
547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.add(0x0031, 0x0039);
549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (set.size() != 10) {
550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAIL, size should be 10, but is " + set.size() +
551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              ": " + set);
552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // contains(first, last)
555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.clear();
556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.applyPattern("[A-Y 1-8 b-d l-y]", status);
557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (int32_t i = 0; i<set.getRangeCount(); ++i) {
559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 a = set.getRangeStart(i);
560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 b = set.getRangeEnd(i);
561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (!set.contains(a, b)) {
562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln((UnicodeString)"FAIL, should contain " + (unsigned short)a + '-' + (unsigned short)b +
563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  " but doesn't: " + set);
564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (set.contains((UChar32)(a-1), b)) {
566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln((UnicodeString)"FAIL, shouldn't contain " +
567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  (unsigned short)(a-1) + '-' + (unsigned short)b +
568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  " but does: " + set);
569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (set.contains(a, (UChar32)(b+1))) {
571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln((UnicodeString)"FAIL, shouldn't contain " +
572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  (unsigned short)a + '-' + (unsigned short)(b+1) +
573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  " but does: " + set);
574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Ported InversionList test.
578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet a((UChar32)3,(UChar32)10);
579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet b((UChar32)7,(UChar32)15);
580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet c;
581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    logln((UnicodeString)"a [3-10]: " + a);
583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    logln((UnicodeString)"b [7-15]: " + b);
584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    c = a;
585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    c.addAll(b);
586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet exp((UChar32)3,(UChar32)15);
587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (c == exp) {
588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln((UnicodeString)"c.set(a).add(b): " + c);
589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAIL: c.set(a).add(b) = " + c + ", expect " + exp);
591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    c.complement();
593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    exp.set((UChar32)0, (UChar32)2);
594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    exp.add((UChar32)16, UnicodeSet::MAX_VALUE);
595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (c == exp) {
596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln((UnicodeString)"c.complement(): " + c);
597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAIL: c.complement() = " + c + ", expect " + exp);
599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    c.complement();
601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    exp.set((UChar32)3, (UChar32)15);
602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (c == exp) {
603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln((UnicodeString)"c.complement(): " + c);
604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAIL: c.complement() = " + c + ", expect " + exp);
606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    c = a;
608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    c.complementAll(b);
609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    exp.set((UChar32)3,(UChar32)6);
610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    exp.add((UChar32)11,(UChar32) 15);
611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (c == exp) {
612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln((UnicodeString)"c.set(a).exclusiveOr(b): " + c);
613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAIL: c.set(a).exclusiveOr(b) = " + c + ", expect " + exp);
615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    exp = c;
618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bitsToSet(setToBits(c), c);
619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (c == exp) {
620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln((UnicodeString)"bitsToSet(setToBits(c)): " + c);
621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAIL: bitsToSet(setToBits(c)) = " + c + ", expect " + exp);
623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Additional tests for coverage JB#2118
626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //UnicodeSet::complement(class UnicodeString const &)
627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //UnicodeSet::complementAll(class UnicodeString const &)
628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //UnicodeSet::containsNone(class UnicodeSet const &)
629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //UnicodeSet::containsNone(long,long)
630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //UnicodeSet::containsSome(class UnicodeSet const &)
631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //UnicodeSet::containsSome(long,long)
632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //UnicodeSet::removeAll(class UnicodeString const &)
633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //UnicodeSet::retain(long)
634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //UnicodeSet::retainAll(class UnicodeString const &)
635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //UnicodeSet::serialize(unsigned short *,long,enum UErrorCode &)
636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //UnicodeSetIterator::getString(void)
637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.clear();
638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.complement("ab");
639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    exp.applyPattern("[{ab}]", status);
640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (set != exp) { errln("FAIL: complement(\"ab\")"); return; }
642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSetIterator iset(set);
644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (!iset.next() || !iset.isString()) {
645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSetIterator::next/isString");
646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else if (iset.getString() != "ab") {
647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSetIterator::getString");
648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.add((UChar32)0x61, (UChar32)0x7A);
651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.complementAll("alan");
652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    exp.applyPattern("[{ab}b-kmo-z]", status);
653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (set != exp) { errln("FAIL: complementAll(\"alan\")"); return; }
655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    exp.applyPattern("[a-z]", status);
657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); }
659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (!set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); }
660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    exp.applyPattern("[aln]", status);
661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (!set.containsNone(exp)) { errln("FAIL: containsNone(UnicodeSet)"); }
663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (set.containsSome(exp)) { errln("FAIL: containsSome(UnicodeSet)"); }
664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (set.containsNone((UChar32)0x61, (UChar32)0x7A)) {
666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: containsNone(UChar32, UChar32)");
667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (!set.containsSome((UChar32)0x61, (UChar32)0x7A)) {
669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: containsSome(UChar32, UChar32)");
670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (!set.containsNone((UChar32)0x41, (UChar32)0x5A)) {
672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: containsNone(UChar32, UChar32)");
673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (set.containsSome((UChar32)0x41, (UChar32)0x5A)) {
675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: containsSome(UChar32, UChar32)");
676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.removeAll("liu");
679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    exp.applyPattern("[{ab}b-hj-kmo-tv-z]", status);
680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (set != exp) { errln("FAIL: removeAll(\"liu\")"); return; }
682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.retainAll("star");
684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    exp.applyPattern("[rst]", status);
685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (set != exp) { errln("FAIL: retainAll(\"star\")"); return; }
687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.retain((UChar32)0x73);
689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    exp.applyPattern("[s]", status);
690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL"); return; }
691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (set != exp) { errln("FAIL: retain('s')"); return; }
692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint16_t buf[32];
694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t slen = set.serialize(buf, sizeof(buf)/sizeof(buf[0]), status);
695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) { errln("FAIL: serialize"); return; }
696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (slen != 3 || buf[0] != 2 || buf[1] != 0x73 || buf[2] != 0x74) {
697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: serialize");
698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Conversions to and from USet
702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet *uniset = &set;
703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    USet *uset = uniset->toUSet();
704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT((void *)uset == (void *)uniset);
705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet *setx = UnicodeSet::fromUSet(uset);
706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT((void *)setx == (void *)uset);
707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeSet *constSet = uniset;
708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const USet *constUSet = constSet->toUSet();
709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT((void *)constUSet == (void *)constSet);
710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeSet *constSetx = UnicodeSet::fromUSet(constUSet);
711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT((void *)constSetx == (void *)constUSet);
712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // span(UnicodeString) and spanBack(UnicodeString) convenience methods
714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString longString=UNICODE_STRING_SIMPLE("aaaaaaaaaabbbbbbbbbbcccccccccc");
715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet ac(0x61, 0x63);
716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ac.remove(0x62).freeze();
717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if( ac.span(longString, -5, USET_SPAN_CONTAINED)!=10 ||
718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.span(longString, 0, USET_SPAN_CONTAINED)!=10 ||
719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.span(longString, 5, USET_SPAN_CONTAINED)!=10 ||
720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.span(longString, 10, USET_SPAN_CONTAINED)!=10 ||
721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.span(longString, 15, USET_SPAN_CONTAINED)!=15 ||
722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.span(longString, 20, USET_SPAN_CONTAINED)!=30 ||
723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.span(longString, 25, USET_SPAN_CONTAINED)!=30 ||
724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.span(longString, 30, USET_SPAN_CONTAINED)!=30 ||
725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.span(longString, 35, USET_SPAN_CONTAINED)!=30 ||
726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.span(longString, INT32_MAX, USET_SPAN_CONTAINED)!=30
727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ) {
728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("UnicodeSet.span(UnicodeString, ...) returns incorrect end indexes");
729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if( ac.spanBack(longString, -5, USET_SPAN_CONTAINED)!=0 ||
731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.spanBack(longString, 0, USET_SPAN_CONTAINED)!=0 ||
732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.spanBack(longString, 5, USET_SPAN_CONTAINED)!=0 ||
733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.spanBack(longString, 10, USET_SPAN_CONTAINED)!=0 ||
734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.spanBack(longString, 15, USET_SPAN_CONTAINED)!=15 ||
735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.spanBack(longString, 20, USET_SPAN_CONTAINED)!=20 ||
736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.spanBack(longString, 25, USET_SPAN_CONTAINED)!=20 ||
737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.spanBack(longString, 30, USET_SPAN_CONTAINED)!=20 ||
738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.spanBack(longString, 35, USET_SPAN_CONTAINED)!=20 ||
739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ac.spanBack(longString, INT32_MAX, USET_SPAN_CONTAINED)!=20
740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ) {
741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("UnicodeSet.spanBack(UnicodeString, ...) returns incorrect start indexes");
742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestIteration() {
746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode ec = U_ZERO_ERROR;
747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int i = 0;
748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int outerLoop;
749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // 6 code points, 3 ranges, 2 strings, 8 total elements
751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   Iteration will access them in sorted order -  a, b, c, y, z, U0001abcd, "str1", "str2"
752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet set(UNICODE_STRING_SIMPLE("[zabyc\\U0001abcd{str1}{str2}]"), ec);
753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TEST_ASSERT_SUCCESS(ec);
754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSetIterator it(set);
755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (outerLoop=0; outerLoop<3; outerLoop++) {
757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Run the test multiple times, to check that iterator.reset() is working.
758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (i=0; i<10; i++) {
759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UBool         nextv        = it.next();
760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UBool         isString     = it.isString();
761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t       codePoint    = it.getCodepoint();
762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            //int32_t       codePointEnd = it.getCodepointEnd();
763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UnicodeString s   = it.getString();
764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            switch (i) {
765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            case 0:
766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(nextv == TRUE);
767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(isString == FALSE);
768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(codePoint==0x61);
769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(s == "a");
770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            case 1:
772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(nextv == TRUE);
773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(isString == FALSE);
774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(codePoint==0x62);
775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(s == "b");
776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            case 2:
778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(nextv == TRUE);
779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(isString == FALSE);
780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(codePoint==0x63);
781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(s == "c");
782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            case 3:
784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(nextv == TRUE);
785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(isString == FALSE);
786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(codePoint==0x79);
787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(s == "y");
788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            case 4:
790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(nextv == TRUE);
791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(isString == FALSE);
792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(codePoint==0x7a);
793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(s == "z");
794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            case 5:
796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(nextv == TRUE);
797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(isString == FALSE);
798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(codePoint==0x1abcd);
799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(s == UnicodeString((UChar32)0x1abcd));
800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            case 6:
802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(nextv == TRUE);
803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(isString == TRUE);
804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(s == "str1");
805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            case 7:
807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(nextv == TRUE);
808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(isString == TRUE);
809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(s == "str2");
810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            case 8:
812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(nextv == FALSE);
813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            case 9:
815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                TEST_ASSERT(nextv == FALSE);
816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        it.reset();  // prepare to run the iteration again.
820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestStrings() {
827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode ec = U_ZERO_ERROR;
828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet* testList[] = {
830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet::createFromAll("abc"),
831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        new UnicodeSet("[a-c]", ec),
832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        &(UnicodeSet::createFrom("ch")->add('a','z').add("ll")),
834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        new UnicodeSet("[{ll}{ch}a-z]", ec),
835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet::createFrom("ab}c"),
837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        new UnicodeSet("[{ab\\}c}]", ec),
838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        &((new UnicodeSet('a','z'))->add('A', 'Z').retain('M','m').complement('X')),
840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]", ec),
841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        NULL
843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(ec)) {
846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: couldn't construct test sets");
847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (int32_t i = 0; testList[i] != NULL; i+=2) {
850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_SUCCESS(ec)) {
851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UnicodeString pat0, pat1;
852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            testList[i]->toPattern(pat0, TRUE);
853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            testList[i+1]->toPattern(pat1, TRUE);
854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (*testList[i] == *testList[i+1]) {
855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                logln((UnicodeString)"Ok: " + pat0 + " == " + pat1);
856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else {
857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                logln((UnicodeString)"FAIL: " + pat0 + " != " + pat1);
858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete testList[i];
861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete testList[i+1];
862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Test the [:Latin:] syntax.
867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestScriptSet() {
869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectContainment(UNICODE_STRING_SIMPLE("[:Latin:]"), "aA", CharsToUnicodeString("\\u0391\\u03B1"));
870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectContainment(UNICODE_STRING_SIMPLE("[:Greek:]"), CharsToUnicodeString("\\u0391\\u03B1"), "aA");
872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* Jitterbug 1423 */
874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectContainment(UNICODE_STRING_SIMPLE("[[:Common:][:Inherited:]]"), CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA");
875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Test the [:Latin:] syntax.
880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestPropertySet() {
882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char* const DATA[] = {
883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Pattern, Chars IN, Chars NOT in
884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:Latin:]",
886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "aA",
887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u0391\\u03B1",
888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\p{Greek}]",
890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u0391\\u03B1",
891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "aA",
892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\P{ GENERAL Category = upper case letter }",
894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "abc",
895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "ABC",
896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_NORMALIZATION
898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Combining class: @since ICU 2.2
899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Check both symbolic and numeric
900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\p{ccc=Nukta}",
901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u0ABC",
902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "abc",
903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\p{Canonical Combining Class = 11}",
905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u05B1",
906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u05B2",
907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:c c c = iota subscript :]",
909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u0345",
910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xyz",
911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Bidi class: @since ICU 2.2
914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\p{bidiclass=lefttoright}",
915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "abc",
916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u0671\\u0672",
917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Binary properties: @since ICU 2.2
919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\p{ideographic}",
920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u4E0A",
921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "x",
922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:math=false:]",
924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "q)*(",
925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // weiv: )(and * were removed from math in Unicode 4.0.1
926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        //"(*+)",
927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "+<>^",
928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // JB#1767 \N{}, \p{ASCII}
930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:Ascii:]",
931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "abc\\u0000\\u007F",
932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u0080\\u4E00",
933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\N{ latin small letter  a  }[:name= latin small letter z:]]",
935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "az",
936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "qrs",
937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // JB#2015
939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:any:]",
940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "a\\U0010FFFF",
941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "",
942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:nv=0.5:]",
944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u00BD\\u0F2A",
945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u00BC",
946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // JB#2653: Age
948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:Age=1.1:]",
949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u03D6", // 1.1
950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u03D8\\u03D9", // 3.2
951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:Age=3.1:]",
953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u1800\\u3400\\U0002f800",
954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u0220\\u034f\\u30ff\\u33ff\\ufe73\\U00010000\\U00050000",
955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // JB#2350: Case_Sensitive
957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:Case Sensitive:]",
958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "A\\u1FFC\\U00010410",
959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ";\\u00B4\\U00010500",
960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // JB#2832: C99-compatibility props
962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:blank:]",
963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        " \\u0009",
964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "1-9A-Z",
965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:graph:]",
967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "19AZ",
968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        " \\u0003\\u0007\\u0009\\u000A\\u000D",
969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:punct:]",
971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "!@#%&*()[]{}-_\\/;:,.?'\"",
972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "09azAZ",
973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:xdigit:]",
975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "09afAF",
976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "gG!",
977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Regex compatibility test
979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[-b]", // leading '-' is literal
980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-b",
981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "ac",
982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[^-b]", // leading '-' is literal
984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "ac",
985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-b",
986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[b-]", // trailing '-' is literal
988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-b",
989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "ac",
990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[^b-]", // trailing '-' is literal
992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "ac",
993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-b",
994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[a-b-]", // trailing '-' is literal
996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "ab-",
997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "c=",
998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[[a-q]&[p-z]-]", // trailing '-' is literal
1000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "pq-",
1001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "or=",
1002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\s|\\)|:|$|\\>]", // from regex tests
1004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "s|):$>",
1005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "abc",
1006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\uDC00cd]", // JB#2906: isolated trail at start
1008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "cd\\uDC00",
1009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "ab\\uD800\\U00010000",
1010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[ab\\uD800]", // JB#2906: isolated trail at start
1012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "ab\\uD800",
1013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "cd\\uDC00\\U00010000",
1014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[ab\\uD800cd]", // JB#2906: isolated lead in middle
1016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "abcd\\uD800",
1017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "ef\\uDC00\\U00010000",
1018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[ab\\uDC00cd]", // JB#2906: isolated trail in middle
1020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "abcd\\uDC00",
1021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "ef\\uD800\\U00010000",
1022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_NORMALIZATION
1024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:^lccc=0:]", // Lead canonical class
1025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u0300\\u0301",
1026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "abcd\\u00c0\\u00c5",
1027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:^tccc=0:]", // Trail canonical class
1029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u0300\\u0301\\u00c0\\u00c5",
1030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "abcd",
1031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[[:^lccc=0:][:^tccc=0:]]", // Lead and trail canonical class
1033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u0300\\u0301\\u00c0\\u00c5",
1034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "abcd",
1035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[[:^lccc=0:]-[:^tccc=0:]]", // Stuff that starts with an accent but ends with a base (none right now)
1037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "",
1038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "abcd\\u0300\\u0301\\u00c0\\u00c5",
1039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[[:ccc=0:]-[:lccc=0:]-[:tccc=0:]]", // Weirdos. Complete canonical class is zero, but both lead and trail are not
1041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u0F73\\u0F75\\u0F81",
1042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "abcd\\u0300\\u0301\\u00c0\\u00c5",
1043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* !UCONFIG_NO_NORMALIZATION */
1044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:Assigned:]",
1046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD",
1047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u0888\\uFDD3\\uFFFE\\U00050005",
1048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Script_Extensions, new in Unicode 6.0
1050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:scx=Arab:]",
1051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u061E\\u061F\\u0620\\u0621\\u063F\\u0640\\u0650\\u065E\\uFDF1\\uFDF2\\uFDF3",
1052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u061D\\u065F\\uFDEF\\uFDFE",
1053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // U+FDF2 has Script=Arabic and also Arab in its Script_Extensions,
1055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // so scx-sc is missing U+FDF2.
1056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[[:Script_Extensions=Arabic:]-[:Arab:]]",
1057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u0640\\u064B\\u0650\\u0655\\uFDFD",
1058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\uFDF2"
1059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
1060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]);
1062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (int32_t i=0; i<DATA_LEN; i+=3) {
1064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expectContainment(UnicodeString(DATA[i], -1, US_INV), CharsToUnicodeString(DATA[i+1]),
1065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          CharsToUnicodeString(DATA[i+2]));
1066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
1070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  * Test that Posix style character classes [:digit:], etc.
1071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  *   have the Unicode definitions from TR 18.
1072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)  */
1073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestPosixClasses() {
1074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
1075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode status = U_ZERO_ERROR;
1076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s1("[:alpha:]", status);
1077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Alphabetic}"), status);
1078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
1079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT(s1==s2);
1080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
1082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode status = U_ZERO_ERROR;
1083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s1("[:lower:]", status);
1084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{lowercase}"), status);
1085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
1086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT(s1==s2);
1087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
1089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode status = U_ZERO_ERROR;
1090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s1("[:upper:]", status);
1091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Uppercase}"), status);
1092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
1093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT(s1==s2);
1094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
1096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode status = U_ZERO_ERROR;
1097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s1("[:punct:]", status);
1098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=Punctuation}"), status);
1099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
1100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT(s1==s2);
1101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
1103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode status = U_ZERO_ERROR;
1104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s1("[:digit:]", status);
1105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{gc=DecimalNumber}"), status);
1106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
1107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT(s1==s2);
1108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
1110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode status = U_ZERO_ERROR;
1111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s1("[:xdigit:]", status);
1112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{DecimalNumber}\\p{HexDigit}]"), status);
1113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
1114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT(s1==s2);
1115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
1117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode status = U_ZERO_ERROR;
1118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s1("[:alnum:]", status);
1119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Alphabetic}\\p{DecimalNumber}]"), status);
1120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
1121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT(s1==s2);
1122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
1124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode status = U_ZERO_ERROR;
1125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s1("[:space:]", status);
1126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Whitespace}"), status);
1127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
1128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT(s1==s2);
1129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
1131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode status = U_ZERO_ERROR;
1132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s1("[:blank:]", status);
1133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
1134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s2(UNICODE_STRING_SIMPLE("[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}\\p{ParagraphSeparator}]]"),
1135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            status);
1136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
1137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT(s1==s2);
1138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
1140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode status = U_ZERO_ERROR;
1141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s1("[:cntrl:]", status);
1142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
1143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s2(UNICODE_STRING_SIMPLE("\\p{Control}"), status);
1144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
1145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT(s1==s2);
1146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
1148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode status = U_ZERO_ERROR;
1149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s1("[:graph:]", status);
1150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
1151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s2(UNICODE_STRING_SIMPLE("[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]"), status);
1152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
1153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT(s1==s2);
1154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
1156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode status = U_ZERO_ERROR;
1157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s1("[:print:]", status);
1158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
1159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet s2(UNICODE_STRING_SIMPLE("[[:graph:][:blank:]-[\\p{Control}]]") ,status);
1160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT_SUCCESS(status);
1161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TEST_ASSERT(s1==s2);
1162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
1165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Test cloning of UnicodeSet.  For C++, we test the copy constructor.
1166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
1167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestClone() {
1168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode ec = U_ZERO_ERROR;
1169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet s("[abcxyz]", ec);
1170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet t(s);
1171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectContainment(t, "abc", "def");
1172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
1175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Test the indexOf() and charAt() methods.
1176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
1177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestIndexOf() {
1178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode ec = U_ZERO_ERROR;
1179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet set("[a-cx-y3578]", ec);
1180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(ec)) {
1181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSet constructor");
1182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (int32_t i=0; i<set.size(); ++i) {
1185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c = set.charAt(i);
1186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (set.indexOf(c) != i) {
1187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("FAIL: charAt(%d) = %X => indexOf() => %d",
1188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                i, c, set.indexOf(c));
1189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 c = set.charAt(set.size());
1192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (c != -1) {
1193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: charAt(<out of range>) = %X", c);
1194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t j = set.indexOf((UChar32)0x71/*'q'*/);
1196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (j != -1) {
1197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAIL: indexOf('q') = " + j);
1198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
1202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Test closure API.
1203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
1204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestCloseOver() {
1205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode ec = U_ZERO_ERROR;
1206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char CASE[] = {(char)USET_CASE_INSENSITIVE};
1208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char CASE_MAPPINGS[] = {(char)USET_ADD_CASE_MAPPINGS};
1209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char* DATA[] = {
1210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // selector, input, output
1211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE,
1212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[aq\\u00DF{Bc}{bC}{Fi}]",
1213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[aAqQ\\u00DF\\u1E9E\\uFB01{ss}{bc}{fi}]",  // U+1E9E LATIN CAPITAL LETTER SHARP S is new in Unicode 5.1
1214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE,
1216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\u01F1]", // 'DZ'
1217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\u01F1\\u01F2\\u01F3]",
1218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE,
1220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\u1FB4]",
1221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\u1FB4{\\u03AC\\u03B9}]",
1222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE,
1224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[{F\\uFB01}]",
1225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\uFB03{ffi}]",
1226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE, // make sure binary search finds limits
1228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[a\\uFF3A]",
1229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[aA\\uFF3A\\uFF5A]",
1230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE,
1232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[a-z]","[A-Za-z\\u017F\\u212A]",
1233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE,
1234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[abc]","[A-Ca-c]",
1235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE,
1236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[ABC]","[A-Ca-c]",
1237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE, "[i]", "[iI]",
1239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE, "[\\u0130]",          "[\\u0130{i\\u0307}]", // dotted I
1241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE, "[{i\\u0307}]",       "[\\u0130{i\\u0307}]", // i with dot
1242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE, "[\\u0131]",          "[\\u0131]", // dotless i
1244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE, "[\\u0390]",          "[\\u0390\\u1FD3{\\u03B9\\u0308\\u0301}]",
1246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE, "[\\u03c2]",          "[\\u03a3\\u03c2\\u03c3]", // sigmas
1248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE, "[\\u03f2]",          "[\\u03f2\\u03f9]", // lunate sigmas
1250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE, "[\\u03f7]",          "[\\u03f7\\u03f8]",
1252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE, "[\\u1fe3]",          "[\\u03b0\\u1fe3{\\u03c5\\u0308\\u0301}]",
1254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE, "[\\ufb05]",          "[\\ufb05\\ufb06{st}]",
1256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE, "[{st}]",             "[\\ufb05\\ufb06{st}]",
1257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE, "[\\U0001044F]",      "[\\U00010427\\U0001044F]",
1259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE, "[{a\\u02BE}]",       "[\\u1E9A{a\\u02BE}]", // first in sorted table
1261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE, "[{\\u1f7c\\u03b9}]", "[\\u1ff2{\\u1f7c\\u03b9}]", // last in sorted table
1263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_FILE_IO
1265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE_MAPPINGS,
1266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[aq\\u00DF{Bc}{bC}{Fi}]",
1267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[aAqQ\\u00DF{ss}{Ss}{SS}{Bc}{BC}{bC}{bc}{FI}{Fi}{fi}]",
1268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
1269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE_MAPPINGS,
1271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\u01F1]", // 'DZ'
1272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\u01F1\\u01F2\\u01F3]",
1273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        CASE_MAPPINGS,
1275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[a-z]",
1276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[A-Za-z]",
1277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        NULL
1279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
1280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet s;
1282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet t;
1283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString buf;
1284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (int32_t i=0; DATA[i]!=NULL; i+=3) {
1285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t selector = DATA[i][0];
1286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString pat(DATA[i+1], -1, US_INV);
1287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString exp(DATA[i+2], -1, US_INV);
1288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        s.applyPattern(pat, ec);
1289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        s.closeOver(selector);
1290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        t.applyPattern(exp, ec);
1291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_FAILURE(ec)) {
1292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("FAIL: applyPattern failed");
1293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
1294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (s == t) {
1296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            logln((UnicodeString)"Ok: " + pat + ".closeOver(" + selector + ") => " + exp);
1297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
1298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            dataerrln((UnicodeString)"FAIL: " + pat + ".closeOver(" + selector + ") => " +
1299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  s.toPattern(buf, TRUE) + ", expected " + exp);
1300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0
1304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /*
1305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Unused test code.
1306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * This was used to compare the old implementation (using USET_CASE)
1307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * with the new one (using 0x100 temporarily)
1308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * while transitioning from hardcoded case closure tables in uniset.cpp
1309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * (moved to uniset_props.cpp) to building the data by gencase into ucase.icu.
1310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * and using ucase.c functions for closure.
1311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * See Jitterbug 3432 RFE: Move uniset.cpp data to a data file
1312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     *
1313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Note: The old and new implementation never fully matched because
1314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * the old implementation turned out to not map U+0130 and U+0131 correctly
1315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * (dotted I and dotless i) and because the old implementation's data tables
1316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * were outdated compared to Unicode 4.0.1 at the time of the change to the
1317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * new implementation. (So sigmas and some other characters were not handled
1318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * according to the newer Unicode version.)
1319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
1320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet sens("[:case_sensitive:]", ec), sens2, s2;
1321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSetIterator si(sens);
1322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString str, buf2;
1323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeString *pStr;
1324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 c;
1325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while(si.next()) {
1326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(!si.isString()) {
1327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c=si.getCodepoint();
1328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            s.clear();
1329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            s.add(c);
1330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            str.setTo(c);
1332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            str.foldCase();
1333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            sens2.add(str);
1334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            t=s;
1336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            s.closeOver(USET_CASE);
1337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            t.closeOver(0x100);
1338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(s!=t) {
1339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("FAIL: closeOver(U+%04x) differs: ", c);
1340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln((UnicodeString)"old "+s.toPattern(buf, TRUE)+" new: "+t.toPattern(buf2, TRUE));
1341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // remove all code points
1345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // should contain all full case folding mapping strings
1346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    sens2.remove(0, 0x10ffff);
1347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    si.reset(sens2);
1348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while(si.next()) {
1349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(si.isString()) {
1350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            pStr=&si.getString();
1351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            s.clear();
1352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            s.add(*pStr);
1353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            t=s2=s;
1354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            s.closeOver(USET_CASE);
1355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            t.closeOver(0x100);
1356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(s!=t) {
1357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln((UnicodeString)"FAIL: closeOver("+s2.toPattern(buf, TRUE)+") differs: ");
1358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln((UnicodeString)"old "+s.toPattern(buf, TRUE)+" new: "+t.toPattern(buf2, TRUE));
1359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
1363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Test the pattern API
1365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    s.applyPattern("[abc]", USET_CASE_INSENSITIVE, NULL, ec);
1366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(ec)) {
1367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: applyPattern failed");
1368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
1369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expectContainment(s, "abcABC", "defDEF");
1370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet v("[^abc]", USET_CASE_INSENSITIVE, NULL, ec);
1372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(ec)) {
1373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: constructor failed");
1374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
1375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expectContainment(v, "defDEF", "abcABC");
1376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet cm("[abck]", USET_ADD_CASE_MAPPINGS, NULL, ec);
1378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(ec)) {
1379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: construct w/case mappings failed");
1380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
1381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expectContainment(cm, "abckABCK", CharsToUnicodeString("defDEF\\u212A"));
1382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestEscapePattern() {
1386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char pattern[] =
1387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\uFEFF \\u200A-\\u200E \\U0001D173-\\U0001D17A \\U000F0000-\\U000FFFFD ]";
1388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char exp[] =
1389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\u200A-\\u200E\\uFEFF\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]";
1390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // We test this with two passes; in the second pass we
1391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // pre-unescape the pattern.  Since U+200E is rule whitespace,
1392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // this fails -- which is what we expect.
1393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (int32_t pass=1; pass<=2; ++pass) {
1394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode ec = U_ZERO_ERROR;
1395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString pat(pattern, -1, US_INV);
1396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (pass==2) {
1397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            pat = pat.unescape();
1398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Pattern is only good for pass 1
1400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UBool isPatternValid = (pass==1);
1401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet set(pat, ec);
1403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_SUCCESS(ec) != isPatternValid){
1404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln((UnicodeString)"FAIL: applyPattern(" +
1405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  escape(pat) + ") => " +
1406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  u_errorName(ec));
1407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
1408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_FAILURE(ec)) {
1410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
1411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (set.contains((UChar)0x0644)){
1413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln((UnicodeString)"FAIL: " + escape(pat) + " contains(U+0664)");
1414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString newpat;
1417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.toPattern(newpat, TRUE);
1418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (newpat == UnicodeString(exp, -1, US_INV)) {
1419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            logln(escape(pat) + " => " + newpat);
1420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
1421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln((UnicodeString)"FAIL: " + escape(pat) + " => " + newpat);
1422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (int32_t i=0; i<set.getRangeCount(); ++i) {
1425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UnicodeString str("Range ");
1426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            str.append((UChar)(0x30 + i))
1427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                .append(": ")
1428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                .append((UChar32)set.getRangeStart(i))
1429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                .append(" - ")
1430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                .append((UChar32)set.getRangeEnd(i));
1431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            str = str + " (" + set.getRangeStart(i) + " - " +
1432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                set.getRangeEnd(i) + ")";
1433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (set.getRangeStart(i) < 0) {
1434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln((UnicodeString)"FAIL: " + escape(str));
1435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else {
1436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                logln(escape(str));
1437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::expectRange(const UnicodeString& label,
1443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                 const UnicodeSet& set,
1444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                 UChar32 start, UChar32 end) {
1445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet exp(start, end);
1446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString pat;
1447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (set == exp) {
1448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln(label + " => " + set.toPattern(pat, TRUE));
1449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
1450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString xpat;
1451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAIL: " + label + " => " +
1452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              set.toPattern(pat, TRUE) +
1453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              ", expected " + exp.toPattern(xpat, TRUE));
1454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestInvalidCodePoint() {
1458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UChar32 DATA[] = {
1460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Test range             Expected range
1461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0, 0x10FFFF,              0, 0x10FFFF,
1462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (UChar32)-1, 8,           0, 8,
1463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        8, 0x110000,              8, 0x10FFFF
1464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
1465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const int32_t DATA_LENGTH = sizeof(DATA)/sizeof(DATA[0]);
1466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString pat;
1468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i;
1469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i=0; i<DATA_LENGTH; i+=4) {
1471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 start  = DATA[i];
1472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 end    = DATA[i+1];
1473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 xstart = DATA[i+2];
1474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 xend   = DATA[i+3];
1475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Try various API using the test code points
1477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet set(start, end);
1479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expectRange((UnicodeString)"ct(" + start + "," + end + ")",
1480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    set, xstart, xend);
1481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.clear();
1483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.set(start, end);
1484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expectRange((UnicodeString)"set(" + start + "," + end + ")",
1485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    set, xstart, xend);
1486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UBool b = set.contains(start);
1488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        b = set.contains(start, end);
1489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        b = set.containsNone(start, end);
1490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        b = set.containsSome(start, end);
1491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /*int32_t index = set.indexOf(start);*/
1493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.clear();
1495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.add(start);
1496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.add(start, end);
1497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expectRange((UnicodeString)"add(" + start + "," + end + ")",
1498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    set, xstart, xend);
1499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.set(0, 0x10FFFF);
1501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.retain(start, end);
1502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expectRange((UnicodeString)"retain(" + start + "," + end + ")",
1503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    set, xstart, xend);
1504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.retain(start);
1505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.set(0, 0x10FFFF);
1507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.remove(start);
1508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.remove(start, end);
1509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.complement();
1510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expectRange((UnicodeString)"!remove(" + start + "," + end + ")",
1511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    set, xstart, xend);
1512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.set(0, 0x10FFFF);
1514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.complement(start, end);
1515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.complement();
1516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expectRange((UnicodeString)"!complement(" + start + "," + end + ")",
1517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    set, xstart, xend);
1518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.complement(start);
1519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UChar32 DATA2[] = {
1522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0,
1523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x10FFFF,
1524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (UChar32)-1,
1525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x110000
1526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
1527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const int32_t DATA2_LENGTH = sizeof(DATA2)/sizeof(DATA2[0]);
1528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i=0; i<DATA2_LENGTH; ++i) {
1530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c = DATA2[i], end = 0x10FFFF;
1531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UBool valid = (c >= 0 && c <= 0x10FFFF);
1532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet set(0, 0x10FFFF);
1534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // For single-codepoint contains, invalid codepoints are NOT contained
1536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UBool b = set.contains(c);
1537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (b == valid) {
1538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            logln((UnicodeString)"[\\u0000-\\U0010FFFF].contains(" + c +
1539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  ") = " + b);
1540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
1541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln((UnicodeString)"FAIL: [\\u0000-\\U0010FFFF].contains(" + c +
1542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  ") = " + b);
1543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // For codepoint range contains, containsNone, and containsSome,
1546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // invalid or empty (start > end) ranges have UNDEFINED behavior.
1547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        b = set.contains(c, end);
1548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln((UnicodeString)"* [\\u0000-\\U0010FFFF].contains(" + c +
1549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              "," + end + ") = " + b);
1550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        b = set.containsNone(c, end);
1552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln((UnicodeString)"* [\\u0000-\\U0010FFFF].containsNone(" + c +
1553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              "," + end + ") = " + b);
1554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        b = set.containsSome(c, end);
1556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln((UnicodeString)"* [\\u0000-\\U0010FFFF].containsSome(" + c +
1557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              "," + end + ") = " + b);
1558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t index = set.indexOf(c);
1560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if ((index >= 0) == valid) {
1561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            logln((UnicodeString)"[\\u0000-\\U0010FFFF].indexOf(" + c +
1562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  ") = " + index);
1563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
1564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln((UnicodeString)"FAIL: [\\u0000-\\U0010FFFF].indexOf(" + c +
1565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  ") = " + index);
1566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Used by TestSymbolTable
1571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class TokenSymbolTable : public SymbolTable {
1572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public:
1573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    Hashtable contents;
1574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    TokenSymbolTable(UErrorCode& ec) : contents(FALSE, ec) {
1576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        contents.setValueDeleter(uhash_deleteUnicodeString);
1577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ~TokenSymbolTable() {}
1580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
1582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * (Non-SymbolTable API) Add the given variable and value to
1583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * the table.  Variable should NOT contain leading '$'.
1584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
1585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    void add(const UnicodeString& var, const UnicodeString& value,
1586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             UErrorCode& ec) {
1587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_SUCCESS(ec)) {
1588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            contents.put(var, new UnicodeString(value), ec);
1589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
1593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * SymbolTable API
1594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
1595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual const UnicodeString* lookup(const UnicodeString& s) const {
1596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return (const UnicodeString*) contents.get(s);
1597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
1600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * SymbolTable API
1601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
1602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual const UnicodeFunctor* lookupMatcher(UChar32 /*ch*/) const {
1603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
1604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
1607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * SymbolTable API
1608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
1609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    virtual UnicodeString parseReference(const UnicodeString& text,
1610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                         ParsePosition& pos, int32_t limit) const {
1611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t start = pos.getIndex();
1612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t i = start;
1613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString result;
1614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (i < limit) {
1615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UChar c = text.charAt(i);
1616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) {
1617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
1618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ++i;
1620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (i == start) { // No valid name chars
1622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return result; // Indicate failure with empty string
1623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        pos.setIndex(i);
1625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        text.extractBetween(start, i, result);
1626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return result;
1627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
1629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestSymbolTable() {
1631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Multiple test cases can be set up here.  Each test case
1632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // is terminated by null:
1633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // var, value, var, value,..., input pat., exp. output pat., null
1634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char* DATA[] = {
1635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "us", "a-z", "[0-1$us]", "[0-1a-z]", NULL,
1636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "us", "[a-z]", "[0-1$us]", "[0-1[a-z]]", NULL,
1637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "us", "\\[a\\-z\\]", "[0-1$us]", "[-01\\[\\]az]", NULL,
1638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        NULL
1639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
1640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (int32_t i=0; DATA[i]!=NULL; ++i) {
1642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode ec = U_ZERO_ERROR;
1643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        TokenSymbolTable sym(ec);
1644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_FAILURE(ec)) {
1645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("FAIL: couldn't construct TokenSymbolTable");
1646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
1647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Set up variables
1650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (DATA[i+2] != NULL) {
1651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            sym.add(UnicodeString(DATA[i], -1, US_INV), UnicodeString(DATA[i+1], -1, US_INV), ec);
1652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (U_FAILURE(ec)) {
1653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("FAIL: couldn't add to TokenSymbolTable");
1654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
1655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            i += 2;
1657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Input pattern and expected output pattern
1660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString inpat = UnicodeString(DATA[i], -1, US_INV), exppat = UnicodeString(DATA[i+1], -1, US_INV);
1661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        i += 2;
1662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ParsePosition pos(0);
1664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet us(inpat, pos, USET_IGNORE_SPACE, &sym, ec);
1665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_FAILURE(ec)) {
1666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("FAIL: couldn't construct UnicodeSet");
1667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
1668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // results
1671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (pos.getIndex() != inpat.length()) {
1672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln((UnicodeString)"Failed to read to end of string \""
1673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  + inpat + "\": read to "
1674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  + pos.getIndex() + ", length is "
1675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  + inpat.length());
1676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet us2(exppat, ec);
1679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_FAILURE(ec)) {
1680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("FAIL: couldn't construct expected UnicodeSet");
1681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
1682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString a, b;
1685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (us != us2) {
1686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln((UnicodeString)"Failed, got " + us.toPattern(a, TRUE) +
1687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  ", expected " + us2.toPattern(b, TRUE));
1688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
1689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            logln((UnicodeString)"Ok, got " + us.toPattern(a, TRUE));
1690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestSurrogate() {
1695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char* DATA[] = {
1696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // These should all behave identically
1697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[abc\\uD800\\uDC00]",
1698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // "[abc\uD800\uDC00]", // Can't do this on C -- only Java
1699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[abc\\U00010000]",
1700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0
1701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
1702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (int i=0; DATA[i] != 0; ++i) {
1703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UErrorCode ec = U_ZERO_ERROR;
1704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln((UnicodeString)"Test pattern " + i + " :" + UnicodeString(DATA[i], -1, US_INV));
1705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString str = UnicodeString(DATA[i], -1, US_INV);
1706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSet set(str, ec);
1707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (U_FAILURE(ec)) {
1708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln("FAIL: UnicodeSet constructor");
1709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
1710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expectContainment(set,
1712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          CharsToUnicodeString("abc\\U00010000"),
1713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          CharsToUnicodeString("\\uD800;\\uDC00")); // split apart surrogate-pair
1714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (set.size() != 4) {
1715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln((UnicodeString)"FAIL: " + UnicodeString(DATA[i], -1, US_INV) + ".size() == " +
1716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  set.size() + ", expected 4");
1717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestExhaustive() {
1722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // exhaustive tests. Simulate UnicodeSets with integers.
1723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // That gives us very solid tests (except for large memory tests).
1724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t limit = 128;
1726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet x, y, z, aa;
1728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (int32_t i = 0; i < limit; ++i) {
1730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        bitsToSet(i, x);
1731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln((UnicodeString)"Testing " + i + ", " + x);
1732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        _testComplement(i, x, y);
1733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // AS LONG AS WE ARE HERE, check roundtrip
1735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        checkRoundTrip(bitsToSet(i, aa));
1736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for (int32_t j = 0; j < limit; ++j) {
1738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            _testAdd(i,j,  x,y,z);
1739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            _testXor(i,j,  x,y,z);
1740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            _testRetain(i,j,  x,y,z);
1741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            _testRemove(i,j,  x,y,z);
1742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::_testComplement(int32_t a, UnicodeSet& x, UnicodeSet& z) {
1747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bitsToSet(a, x);
1748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    z = x;
1749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    z.complement();
1750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t c = setToBits(z);
1751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (c != (~a)) {
1752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAILED: add: ~" + x +  " != " + z);
1753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAILED: add: ~" + a + " != " + c);
1754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    checkCanonicalRep(z, (UnicodeString)"complement " + a);
1756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::_testAdd(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) {
1759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bitsToSet(a, x);
1760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bitsToSet(b, y);
1761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    z = x;
1762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    z.addAll(y);
1763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t c = setToBits(z);
1764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (c != (a | b)) {
1765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAILED: add: " + x + " | " + y + " != " + z);
1766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAILED: add: " + a + " | " + b + " != " + c);
1767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    checkCanonicalRep(z, (UnicodeString)"add " + a + "," + b);
1769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::_testRetain(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) {
1772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bitsToSet(a, x);
1773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bitsToSet(b, y);
1774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    z = x;
1775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    z.retainAll(y);
1776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t c = setToBits(z);
1777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (c != (a & b)) {
1778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAILED: retain: " + x + " & " + y + " != " + z);
1779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAILED: retain: " + a + " & " + b + " != " + c);
1780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    checkCanonicalRep(z, (UnicodeString)"retain " + a + "," + b);
1782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::_testRemove(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) {
1785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bitsToSet(a, x);
1786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bitsToSet(b, y);
1787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    z = x;
1788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    z.removeAll(y);
1789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t c = setToBits(z);
1790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (c != (a &~ b)) {
1791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAILED: remove: " + x + " &~ " + y + " != " + z);
1792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAILED: remove: " + a + " &~ " + b + " != " + c);
1793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    checkCanonicalRep(z, (UnicodeString)"remove " + a + "," + b);
1795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::_testXor(int32_t a, int32_t b, UnicodeSet& x, UnicodeSet& y, UnicodeSet& z) {
1798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bitsToSet(a, x);
1799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bitsToSet(b, y);
1800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    z = x;
1801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    z.complementAll(y);
1802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t c = setToBits(z);
1803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (c != (a ^ b)) {
1804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAILED: complement: " + x + " ^ " + y + " != " + z);
1805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAILED: complement: " + a + " ^ " + b + " != " + c);
1806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    checkCanonicalRep(z, (UnicodeString)"complement " + a + "," + b);
1808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
1811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Check that ranges are monotonically increasing and non-
1812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * overlapping.
1813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
1814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::checkCanonicalRep(const UnicodeSet& set, const UnicodeString& msg) {
1815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t n = set.getRangeCount();
1816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (n < 0) {
1817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAIL result of " + msg +
1818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              ": range count should be >= 0 but is " +
1819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              n /*+ " for " + set.toPattern())*/);
1820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 last = 0;
1823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (int32_t i=0; i<n; ++i) {
1824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 start = set.getRangeStart(i);
1825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 end = set.getRangeEnd(i);
1826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (start > end) {
1827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln((UnicodeString)"FAIL result of " + msg +
1828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  ": range " + (i+1) +
1829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  " start > end: " + (int)start + ", " + (int)end +
1830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  " for " + set);
1831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (i > 0 && start <= last) {
1833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln((UnicodeString)"FAIL result of " + msg +
1834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  ": range " + (i+1) +
1835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  " overlaps previous range: " + (int)start + ", " + (int)end +
1836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  " for " + set);
1837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        last = end;
1839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
1843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Convert a bitmask to a UnicodeSet.
1844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
1845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeSet& UnicodeSetTest::bitsToSet(int32_t a, UnicodeSet& result) {
1846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    result.clear();
1847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (UChar32 i = 0; i < 32; ++i) {
1848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if ((a & (1<<i)) != 0) {
1849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            result.add(i);
1850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return result;
1853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
1856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Convert a UnicodeSet to a bitmask.  Only the characters
1857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * U+0000 to U+0020 are represented in the bitmask.
1858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
1859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t UnicodeSetTest::setToBits(const UnicodeSet& x) {
1860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t result = 0;
1861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (int32_t i = 0; i < 32; ++i) {
1862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (x.contains((UChar32)i)) {
1863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            result |= (1<<i);
1864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return result;
1867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
1870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Return the representation of an inversion list based UnicodeSet
1871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * as a pairs list.  Ranges are listed in ascending Unicode order.
1872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * For example, the set [a-zA-M3] is represented as "33AMaz".
1873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
1874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString UnicodeSetTest::getPairs(const UnicodeSet& set) {
1875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString pairs;
1876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (int32_t i=0; i<set.getRangeCount(); ++i) {
1877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 start = set.getRangeStart(i);
1878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 end = set.getRangeEnd(i);
1879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (end > 0xFFFF) {
1880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            end = 0xFFFF;
1881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            i = set.getRangeCount(); // Should be unnecessary
1882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        pairs.append((UChar)start).append((UChar)end);
1884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return pairs;
1886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
1889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Basic consistency check for a few items.
1890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * That the iterator works, and that we can create a pattern and
1891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * get the same thing back
1892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
1893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::checkRoundTrip(const UnicodeSet& s) {
1894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode ec = U_ZERO_ERROR;
1895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet t(s);
1897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    checkEqual(s, t, "copy ct");
1898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    t = s;
1900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    checkEqual(s, t, "operator=");
1901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    copyWithIterator(t, s, FALSE);
1903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    checkEqual(s, t, "iterator roundtrip");
1904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    copyWithIterator(t, s, TRUE); // try range
1906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    checkEqual(s, t, "iterator roundtrip");
1907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString pat; s.toPattern(pat, FALSE);
1909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    t.applyPattern(pat, ec);
1910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(ec)) {
1911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: applyPattern");
1912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
1914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        checkEqual(s, t, "toPattern(false)");
1915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    s.toPattern(pat, TRUE);
1918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    t.applyPattern(pat, ec);
1919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(ec)) {
1920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: applyPattern");
1921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
1923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        checkEqual(s, t, "toPattern(true)");
1924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::copyWithIterator(UnicodeSet& t, const UnicodeSet& s, UBool withRange) {
1928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    t.clear();
1929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSetIterator it(s);
1930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (withRange) {
1931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (it.nextRange()) {
1932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (it.isString()) {
1933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                t.add(it.getString());
1934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else {
1935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                t.add(it.getCodepoint(), it.getCodepointEnd());
1936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
1939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while (it.next()) {
1940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (it.isString()) {
1941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                t.add(it.getString());
1942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else {
1943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                t.add(it.getCodepoint());
1944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool UnicodeSetTest::checkEqual(const UnicodeSet& s, const UnicodeSet& t, const char* message) {
1950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString source; s.toPattern(source, TRUE);
1951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString result; t.toPattern(result, TRUE);
1952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (s != t) {
1953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAIL: " + message
1954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              + "; source = " + source
1955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              + "; result = " + result
1956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              );
1957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return FALSE;
1958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
1959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln((UnicodeString)"Ok: " + message
1960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              + "; source = " + source
1961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              + "; result = " + result
1962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              );
1963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return TRUE;
1965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
1968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeSetTest::expectContainment(const UnicodeString& pat,
1969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                  const UnicodeString& charsIn,
1970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                  const UnicodeString& charsOut) {
1971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode ec = U_ZERO_ERROR;
1972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet set(pat, ec);
1973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(ec)) {
1974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dataerrln((UnicodeString)"FAIL: pattern \"" +
1975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              pat + "\" => " + u_errorName(ec));
1976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectContainment(set, pat, charsIn, charsOut);
1979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
1982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeSetTest::expectContainment(const UnicodeSet& set,
1983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                  const UnicodeString& charsIn,
1984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                  const UnicodeString& charsOut) {
1985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString pat;
1986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.toPattern(pat);
1987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectContainment(set, pat, charsIn, charsOut);
1988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
1991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeSetTest::expectContainment(const UnicodeSet& set,
1992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                  const UnicodeString& setName,
1993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                  const UnicodeString& charsIn,
1994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                  const UnicodeString& charsOut) {
1995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString bad;
1996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 c;
1997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i;
1998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i=0; i<charsIn.length(); i+=U16_LENGTH(c)) {
2000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        c = charsIn.char32At(i);
2001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (!set.contains(c)) {
2002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            bad.append(c);
2003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (bad.length() > 0) {
2006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"Fail: set " + setName + " does not contain " + prettify(bad) +
2007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              ", expected containment of " + prettify(charsIn));
2008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
2009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln((UnicodeString)"Ok: set " + setName + " contains " + prettify(charsIn));
2010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bad.truncate(0);
2013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (i=0; i<charsOut.length(); i+=U16_LENGTH(c)) {
2014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        c = charsOut.char32At(i);
2015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (set.contains(c)) {
2016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            bad.append(c);
2017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (bad.length() > 0) {
2020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"Fail: set " + setName + " contains " + prettify(bad) +
2021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              ", expected non-containment of " + prettify(charsOut));
2022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
2023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln((UnicodeString)"Ok: set " + setName + " does not contain " + prettify(charsOut));
2024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
2028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeSetTest::expectPattern(UnicodeSet& set,
2029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              const UnicodeString& pattern,
2030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              const UnicodeString& expectedPairs){
2031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode status = U_ZERO_ERROR;
2032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.applyPattern(pattern, status);
2033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
2034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln(UnicodeString("FAIL: applyPattern(\"") + pattern +
2035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              "\") failed");
2036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
2037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
2038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (getPairs(set) != expectedPairs ) {
2039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln(UnicodeString("FAIL: applyPattern(\"") + pattern +
2040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  "\") => pairs \"" +
2041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  escape(getPairs(set)) + "\", expected \"" +
2042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  escape(expectedPairs) + "\"");
2043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
2044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            logln(UnicodeString("Ok:   applyPattern(\"") + pattern +
2045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  "\") => pairs \"" +
2046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  escape(getPairs(set)) + "\"");
2047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // the result of calling set.toPattern(), which is the string representation of
2050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // this set(set), is passed to a  UnicodeSet constructor, and tested that it
2051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // will produce another set that is equal to this one.
2052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString temppattern;
2053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.toPattern(temppattern);
2054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet *tempset=new UnicodeSet(temppattern, status);
2055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
2056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln(UnicodeString("FAIL: applyPattern(\""+ pattern + "\").toPattern() => " + temppattern + " => invalid pattern"));
2057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
2058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(*tempset != set || getPairs(*tempset) != getPairs(set)){
2060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln(UnicodeString("FAIL: applyPattern(\""+ pattern + "\").toPattern() => " + temppattern + " => pairs \""+ escape(getPairs(*tempset)) + "\", expected pairs \"" +
2061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            escape(getPairs(set)) + "\""));
2062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else{
2063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln(UnicodeString("Ok:   applyPattern(\""+ pattern + "\").toPattern() => " + temppattern + " => pairs \"" + escape(getPairs(*tempset)) + "\""));
2064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete tempset;
2067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
2071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeSetTest::expectPairs(const UnicodeSet& set, const UnicodeString& expectedPairs) {
2072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (getPairs(set) != expectedPairs) {
2073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln(UnicodeString("FAIL: Expected pair list \"") +
2074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              escape(expectedPairs) + "\", got \"" +
2075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              escape(getPairs(set)) + "\"");
2076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::expectToPattern(const UnicodeSet& set,
2080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                     const UnicodeString& expPat,
2081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                     const char** expStrings) {
2082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString pat;
2083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.toPattern(pat, TRUE);
2084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (pat == expPat) {
2085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        logln((UnicodeString)"Ok:   toPattern() => \"" + pat + "\"");
2086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
2087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln((UnicodeString)"FAIL: toPattern() => \"" + pat + "\", expected \"" + expPat + "\"");
2088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
2089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (expStrings == NULL) {
2091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
2092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool in = TRUE;
2094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (int32_t i=0; expStrings[i] != NULL; ++i) {
2095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (expStrings[i] == NOT) { // sic; pointer comparison
2096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            in = FALSE;
2097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
2098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString s = CharsToUnicodeString(expStrings[i]);
2100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UBool contained = set.contains(s);
2101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (contained == in) {
2102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            logln((UnicodeString)"Ok: " + expPat +
2103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  (contained ? " contains {" : " does not contain {") +
2104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  escape(expStrings[i]) + "}");
2105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
2106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errln((UnicodeString)"FAIL: " + expPat +
2107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  (contained ? " contains {" : " does not contain {") +
2108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  escape(expStrings[i]) + "}");
2109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UChar toHexString(int32_t i) { return (UChar)(i + (i < 10 ? 0x30 : (0x41 - 10))); }
2114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
2116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeSetTest::doAssert(UBool condition, const char *message)
2117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
2118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (!condition) {
2119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln(UnicodeString("ERROR : ") + message);
2120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString
2124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeSetTest::escape(const UnicodeString& s) {
2125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString buf;
2126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for (int32_t i=0; i<s.length(); )
2127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    {
2128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c = s.char32At(i);
2129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if (0x0020 <= c && c <= 0x007F) {
2130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            buf += c;
2131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
2132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if (c <= 0xFFFF) {
2133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                buf += (UChar)0x5c; buf += (UChar)0x75;
2134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else {
2135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                buf += (UChar)0x5c; buf += (UChar)0x55;
2136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                buf += toHexString((c & 0xF0000000) >> 28);
2137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                buf += toHexString((c & 0x0F000000) >> 24);
2138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                buf += toHexString((c & 0x00F00000) >> 20);
2139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                buf += toHexString((c & 0x000F0000) >> 16);
2140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            buf += toHexString((c & 0xF000) >> 12);
2142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            buf += toHexString((c & 0x0F00) >> 8);
2143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            buf += toHexString((c & 0x00F0) >> 4);
2144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            buf += toHexString(c & 0x000F);
2145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        i += U16_LENGTH(c);
2147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return buf;
2149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestFreezable() {
2152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode errorCode=U_ZERO_ERROR;
2153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString idPattern=UNICODE_STRING("[:ID_Continue:]", 15);
2154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet idSet(idPattern, errorCode);
2155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(errorCode)) {
2156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dataerrln("FAIL: unable to create UnicodeSet([:ID_Continue:]) - %s", u_errorName(errorCode));
2157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
2158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString wsPattern=UNICODE_STRING("[:White_Space:]", 15);
2161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet wsSet(wsPattern, errorCode);
2162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(errorCode)) {
2163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dataerrln("FAIL: unable to create UnicodeSet([:White_Space:]) - %s", u_errorName(errorCode));
2164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
2165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    idSet.add(idPattern);
2168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet frozen(idSet);
2169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    frozen.freeze();
2170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(idSet.isFrozen() || !frozen.isFrozen()) {
2172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: isFrozen() is wrong");
2173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(frozen!=idSet || !(frozen==idSet)) {
2175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: a copy-constructed frozen set differs from its original");
2176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    frozen=wsSet;
2179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(frozen!=idSet || !(frozen==idSet)) {
2180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: a frozen set was modified by operator=");
2181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet frozen2(frozen);
2184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(frozen2!=frozen || frozen2!=idSet) {
2185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: a copied frozen set differs from its frozen original");
2186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!frozen2.isFrozen()) {
2188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: copy-constructing a frozen set results in a thawed one");
2189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet frozen3(5, 55);  // Set to some values to really test assignment below, not copy construction.
2191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(frozen3.contains(0, 4) || !frozen3.contains(5, 55) || frozen3.contains(56, 0x10ffff)) {
2192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSet(5, 55) failed");
2193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    frozen3=frozen;
2195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!frozen3.isFrozen()) {
2196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: copying a frozen set results in a thawed one");
2197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet *cloned=(UnicodeSet *)frozen.clone();
2200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!cloned->isFrozen() || *cloned!=frozen || cloned->containsSome(0xd802, 0xd805)) {
2201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: clone() failed");
2202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    cloned->add(0xd802, 0xd805);
2204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(cloned->containsSome(0xd802, 0xd805)) {
2205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: unable to modify clone");
2206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete cloned;
2208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet *thawed=(UnicodeSet *)frozen.cloneAsThawed();
2210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(thawed->isFrozen() || *thawed!=frozen || thawed->containsSome(0xd802, 0xd805)) {
2211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: cloneAsThawed() failed");
2212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    thawed->add(0xd802, 0xd805);
2214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!thawed->contains(0xd802, 0xd805)) {
2215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: unable to modify thawed clone");
2216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete thawed;
2218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    frozen.set(5, 55);
2220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(frozen!=idSet || !(frozen==idSet)) {
2221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSet::set() modified a frozen set");
2222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    frozen.clear();
2225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(frozen!=idSet || !(frozen==idSet)) {
2226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSet::clear() modified a frozen set");
2227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    frozen.closeOver(USET_CASE_INSENSITIVE);
2230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(frozen!=idSet || !(frozen==idSet)) {
2231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSet::closeOver() modified a frozen set");
2232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    frozen.compact();
2235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(frozen!=idSet || !(frozen==idSet)) {
2236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSet::compact() modified a frozen set");
2237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ParsePosition pos;
2240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    frozen.
2241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        applyPattern(wsPattern, errorCode).
2242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        applyPattern(wsPattern, USET_IGNORE_SPACE, NULL, errorCode).
2243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        applyPattern(wsPattern, pos, USET_IGNORE_SPACE, NULL, errorCode).
2244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        applyIntPropertyValue(UCHAR_CANONICAL_COMBINING_CLASS, 230, errorCode).
2245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        applyPropertyAlias(UNICODE_STRING_SIMPLE("Assigned"), UnicodeString(), errorCode);
2246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(frozen!=idSet || !(frozen==idSet)) {
2247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSet::applyXYZ() modified a frozen set");
2248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    frozen.
2251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        add(0xd800).
2252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        add(0xd802, 0xd805).
2253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        add(wsPattern).
2254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        addAll(idPattern).
2255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        addAll(wsSet);
2256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(frozen!=idSet || !(frozen==idSet)) {
2257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSet::addXYZ() modified a frozen set");
2258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    frozen.
2261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        retain(0x62).
2262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        retain(0x64, 0x69).
2263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        retainAll(wsPattern).
2264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        retainAll(wsSet);
2265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(frozen!=idSet || !(frozen==idSet)) {
2266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSet::retainXYZ() modified a frozen set");
2267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    frozen.
2270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        remove(0x62).
2271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        remove(0x64, 0x69).
2272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        remove(idPattern).
2273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        removeAll(idPattern).
2274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        removeAll(idSet);
2275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(frozen!=idSet || !(frozen==idSet)) {
2276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSet::removeXYZ() modified a frozen set");
2277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    frozen.
2280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        complement().
2281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        complement(0x62).
2282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        complement(0x64, 0x69).
2283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        complement(idPattern).
2284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        complementAll(idPattern).
2285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        complementAll(idSet);
2286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(frozen!=idSet || !(frozen==idSet)) {
2287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSet::complementXYZ() modified a frozen set");
2288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Test span() etc. -------------------------------------------------------- ***
2292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Append the UTF-8 version of the string to t and return the appended UTF-8 length.
2294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t
2295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)appendUTF8(const UChar *s, int32_t length, char *t, int32_t capacity) {
2296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode errorCode=U_ZERO_ERROR;
2297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t length8=0;
2298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    u_strToUTF8(t, capacity, &length8, s, length, &errorCode);
2299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_SUCCESS(errorCode)) {
2300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return length8;
2301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
2302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // The string contains an unpaired surrogate.
2303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Ignore this string.
2304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
2305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class UnicodeSetWithStringsIterator;
2309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Make the strings in a UnicodeSet easily accessible.
2311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class UnicodeSetWithStrings {
2312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public:
2313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSetWithStrings(const UnicodeSet &normalSet) :
2314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            set(normalSet), stringsLength(0), hasSurrogates(FALSE) {
2315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t size=set.size();
2316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(size>0 && set.charAt(size-1)<0) {
2317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // If a set's last element is not a code point, then it must contain strings.
2318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Iterate over the set, skip all code point ranges, and cache the strings.
2319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Convert them to UTF-8 for spanUTF8().
2320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UnicodeSetIterator iter(set);
2321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const UnicodeString *s;
2322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            char *s8=utf8;
2323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t length8, utf8Count=0;
2324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while(iter.nextRange() && stringsLength<LENGTHOF(strings)) {
2325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(iter.isString()) {
2326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // Store the pointer to the set's string element
2327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // which we happen to know is a stable pointer.
2328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    strings[stringsLength]=s=&iter.getString();
2329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    utf8Count+=
2330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        utf8Lengths[stringsLength]=length8=
2331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        appendUTF8(s->getBuffer(), s->length(),
2332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                   s8, (int32_t)(sizeof(utf8)-utf8Count));
2333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(length8==0) {
2334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        hasSurrogates=TRUE;  // Contains unpaired surrogates.
2335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
2336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    s8+=length8;
2337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    ++stringsLength;
2338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
2339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeSet &getSet() const {
2344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return set;
2345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool hasStrings() const {
2348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return (UBool)(stringsLength>0);
2349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool hasStringsWithSurrogates() const {
2352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return hasSurrogates;
2353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private:
2356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    friend class UnicodeSetWithStringsIterator;
2357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeSet &set;
2359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeString *strings[20];
2361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t stringsLength;
2362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool hasSurrogates;
2363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char utf8[1024];
2365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t utf8Lengths[20];
2366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t nextStringIndex;
2368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t nextUTF8Start;
2369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
2370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)class UnicodeSetWithStringsIterator {
2372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)public:
2373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSetWithStringsIterator(const UnicodeSetWithStrings &set) :
2374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fSet(set), nextStringIndex(0), nextUTF8Start(0) {
2375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    void reset() {
2378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        nextStringIndex=nextUTF8Start=0;
2379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeString *nextString() {
2382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(nextStringIndex<fSet.stringsLength) {
2383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return fSet.strings[nextStringIndex++];
2384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
2385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return NULL;
2386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Do not mix with calls to nextString().
2390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *nextUTF8(int32_t &length) {
2391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(nextStringIndex<fSet.stringsLength) {
2392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const char *s8=fSet.utf8+nextUTF8Start;
2393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            nextUTF8Start+=length=fSet.utf8Lengths[nextStringIndex++];
2394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return s8;
2395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
2396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            length=0;
2397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return NULL;
2398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)private:
2402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeSetWithStrings &fSet;
2403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t nextStringIndex;
2404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t nextUTF8Start;
2405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
2406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Compare 16-bit Unicode strings (which may be malformed UTF-16)
2408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// at code point boundaries.
2409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// That is, each edge of a match must not be in the middle of a surrogate pair.
2410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static inline UBool
2411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)matches16CPB(const UChar *s, int32_t start, int32_t limit, const UnicodeString &t) {
2412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    s+=start;
2413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    limit-=start;
2414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t length=t.length();
2415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return 0==t.compare(s, length) &&
2416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           !(0<start && U16_IS_LEAD(s[-1]) && U16_IS_TRAIL(s[0])) &&
2417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           !(length<limit && U16_IS_LEAD(s[length-1]) && U16_IS_TRAIL(s[length]));
2418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Implement span() with contains() for comparison.
2421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t containsSpanUTF16(const UnicodeSetWithStrings &set, const UChar *s, int32_t length,
2422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                 USetSpanCondition spanCondition) {
2423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeSet &realSet(set.getSet());
2424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!set.hasStrings()) {
2425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c;
2430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t start=0, prev;
2431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while((prev=start)<length) {
2432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            U16_NEXT(s, start, length, c);
2433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(realSet.contains(c)!=spanCondition) {
2434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return prev;
2438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else if(spanCondition==USET_SPAN_NOT_CONTAINED) {
2439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSetWithStringsIterator iter(set);
2440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c;
2441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t start, next;
2442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(start=next=0; start<length;) {
2443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            U16_NEXT(s, next, length, c);
2444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(realSet.contains(c)) {
2445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const UnicodeString *str;
2448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            iter.reset();
2449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while((str=iter.nextString())!=NULL) {
2450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(str->length()<=(length-start) && matches16CPB(s, start, length, *str)) {
2451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // spanNeedsStrings=TRUE;
2452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    return start;
2453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
2454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start=next;
2456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return start;
2458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ {
2459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSetWithStringsIterator iter(set);
2460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c;
2461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t start, next, maxSpanLimit=0;
2462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(start=next=0; start<length;) {
2463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            U16_NEXT(s, next, length, c);
2464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(!realSet.contains(c)) {
2465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                next=start;  // Do not span this single, not-contained code point.
2466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const UnicodeString *str;
2468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            iter.reset();
2469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while((str=iter.nextString())!=NULL) {
2470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(str->length()<=(length-start) && matches16CPB(s, start, length, *str)) {
2471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // spanNeedsStrings=TRUE;
2472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    int32_t matchLimit=start+str->length();
2473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(matchLimit==length) {
2474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        return length;
2475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
2476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(spanCondition==USET_SPAN_CONTAINED) {
2477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        // Iterate for the shortest match at each position.
2478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        // Recurse for each but the shortest match.
2479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        if(next==start) {
2480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            next=matchLimit;  // First match from start.
2481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        } else {
2482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            if(matchLimit<next) {
2483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                // Remember shortest match from start for iteration.
2484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                int32_t temp=next;
2485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                next=matchLimit;
2486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                matchLimit=temp;
2487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            }
2488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            // Recurse for non-shortest match from start.
2489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            int32_t spanLength=containsSpanUTF16(set, s+matchLimit, length-matchLimit,
2490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                                 USET_SPAN_CONTAINED);
2491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            if((matchLimit+spanLength)>maxSpanLimit) {
2492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                maxSpanLimit=matchLimit+spanLength;
2493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                if(maxSpanLimit==length) {
2494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                    return length;
2495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                }
2496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            }
2497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        }
2498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    } else /* spanCondition==USET_SPAN_SIMPLE */ {
2499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        if(matchLimit>next) {
2500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            // Remember longest match from start.
2501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            next=matchLimit;
2502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        }
2503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
2504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
2505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(next==start) {
2507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;  // No match from start.
2508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start=next;
2510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(start>maxSpanLimit) {
2512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return start;
2513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
2514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return maxSpanLimit;
2515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t containsSpanBackUTF16(const UnicodeSetWithStrings &set, const UChar *s, int32_t length,
2520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                     USetSpanCondition spanCondition) {
2521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(length==0) {
2522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
2523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeSet &realSet(set.getSet());
2525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!set.hasStrings()) {
2526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c;
2531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t prev=length;
2532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        do {
2533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            U16_PREV(s, 0, length, c);
2534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(realSet.contains(c)!=spanCondition) {
2535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } while((prev=length)>0);
2538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return prev;
2539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else if(spanCondition==USET_SPAN_NOT_CONTAINED) {
2540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSetWithStringsIterator iter(set);
2541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c;
2542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t prev=length, length0=length;
2543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        do {
2544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            U16_PREV(s, 0, length, c);
2545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(realSet.contains(c)) {
2546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const UnicodeString *str;
2549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            iter.reset();
2550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while((str=iter.nextString())!=NULL) {
2551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(str->length()<=prev && matches16CPB(s, prev-str->length(), length0, *str)) {
2552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // spanNeedsStrings=TRUE;
2553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    return prev;
2554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
2555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } while((prev=length)>0);
2557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return prev;
2558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ {
2559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSetWithStringsIterator iter(set);
2560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c;
2561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t prev=length, minSpanStart=length, length0=length;
2562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        do {
2563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            U16_PREV(s, 0, length, c);
2564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(!realSet.contains(c)) {
2565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                length=prev;  // Do not span this single, not-contained code point.
2566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const UnicodeString *str;
2568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            iter.reset();
2569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while((str=iter.nextString())!=NULL) {
2570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(str->length()<=prev && matches16CPB(s, prev-str->length(), length0, *str)) {
2571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // spanNeedsStrings=TRUE;
2572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    int32_t matchStart=prev-str->length();
2573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(matchStart==0) {
2574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        return 0;
2575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
2576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(spanCondition==USET_SPAN_CONTAINED) {
2577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        // Iterate for the shortest match at each position.
2578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        // Recurse for each but the shortest match.
2579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        if(length==prev) {
2580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            length=matchStart;  // First match from prev.
2581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        } else {
2582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            if(matchStart>length) {
2583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                // Remember shortest match from prev for iteration.
2584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                int32_t temp=length;
2585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                length=matchStart;
2586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                matchStart=temp;
2587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            }
2588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            // Recurse for non-shortest match from prev.
2589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            int32_t spanStart=containsSpanBackUTF16(set, s, matchStart,
2590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                                    USET_SPAN_CONTAINED);
2591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            if(spanStart<minSpanStart) {
2592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                minSpanStart=spanStart;
2593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                if(minSpanStart==0) {
2594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                    return 0;
2595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                }
2596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            }
2597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        }
2598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    } else /* spanCondition==USET_SPAN_SIMPLE */ {
2599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        if(matchStart<length) {
2600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            // Remember longest match from prev.
2601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            length=matchStart;
2602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        }
2603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
2604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
2605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(length==prev) {
2607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;  // No match from prev.
2608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } while((prev=length)>0);
2610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(prev<minSpanStart) {
2611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return prev;
2612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
2613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return minSpanStart;
2614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t containsSpanUTF8(const UnicodeSetWithStrings &set, const char *s, int32_t length,
2619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                USetSpanCondition spanCondition) {
2620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeSet &realSet(set.getSet());
2621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!set.hasStrings()) {
2622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c;
2627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t start=0, prev;
2628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while((prev=start)<length) {
2629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            U8_NEXT(s, start, length, c);
2630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(c<0) {
2631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                c=0xfffd;
2632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(realSet.contains(c)!=spanCondition) {
2634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return prev;
2638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else if(spanCondition==USET_SPAN_NOT_CONTAINED) {
2639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSetWithStringsIterator iter(set);
2640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c;
2641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t start, next;
2642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(start=next=0; start<length;) {
2643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            U8_NEXT(s, next, length, c);
2644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(c<0) {
2645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                c=0xfffd;
2646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(realSet.contains(c)) {
2648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const char *s8;
2651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t length8;
2652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            iter.reset();
2653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while((s8=iter.nextUTF8(length8))!=NULL) {
2654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(length8!=0 && length8<=(length-start) && 0==memcmp(s+start, s8, length8)) {
2655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // spanNeedsStrings=TRUE;
2656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    return start;
2657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
2658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start=next;
2660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return start;
2662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ {
2663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSetWithStringsIterator iter(set);
2664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c;
2665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t start, next, maxSpanLimit=0;
2666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(start=next=0; start<length;) {
2667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            U8_NEXT(s, next, length, c);
2668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(c<0) {
2669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                c=0xfffd;
2670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(!realSet.contains(c)) {
2672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                next=start;  // Do not span this single, not-contained code point.
2673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const char *s8;
2675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t length8;
2676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            iter.reset();
2677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while((s8=iter.nextUTF8(length8))!=NULL) {
2678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(length8!=0 && length8<=(length-start) && 0==memcmp(s+start, s8, length8)) {
2679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // spanNeedsStrings=TRUE;
2680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    int32_t matchLimit=start+length8;
2681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(matchLimit==length) {
2682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        return length;
2683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
2684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(spanCondition==USET_SPAN_CONTAINED) {
2685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        // Iterate for the shortest match at each position.
2686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        // Recurse for each but the shortest match.
2687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        if(next==start) {
2688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            next=matchLimit;  // First match from start.
2689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        } else {
2690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            if(matchLimit<next) {
2691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                // Remember shortest match from start for iteration.
2692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                int32_t temp=next;
2693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                next=matchLimit;
2694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                matchLimit=temp;
2695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            }
2696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            // Recurse for non-shortest match from start.
2697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            int32_t spanLength=containsSpanUTF8(set, s+matchLimit, length-matchLimit,
2698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                                USET_SPAN_CONTAINED);
2699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            if((matchLimit+spanLength)>maxSpanLimit) {
2700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                maxSpanLimit=matchLimit+spanLength;
2701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                if(maxSpanLimit==length) {
2702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                    return length;
2703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                }
2704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            }
2705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        }
2706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    } else /* spanCondition==USET_SPAN_SIMPLE */ {
2707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        if(matchLimit>next) {
2708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            // Remember longest match from start.
2709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            next=matchLimit;
2710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        }
2711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
2712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
2713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(next==start) {
2715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;  // No match from start.
2716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start=next;
2718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(start>maxSpanLimit) {
2720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return start;
2721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
2722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return maxSpanLimit;
2723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t containsSpanBackUTF8(const UnicodeSetWithStrings &set, const char *s, int32_t length,
2728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                    USetSpanCondition spanCondition) {
2729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(length==0) {
2730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
2731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeSet &realSet(set.getSet());
2733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!set.hasStrings()) {
2734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c;
2739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t prev=length;
2740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        do {
2741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            U8_PREV(s, 0, length, c);
2742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(c<0) {
2743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                c=0xfffd;
2744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(realSet.contains(c)!=spanCondition) {
2746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } while((prev=length)>0);
2749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return prev;
2750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else if(spanCondition==USET_SPAN_NOT_CONTAINED) {
2751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSetWithStringsIterator iter(set);
2752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c;
2753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t prev=length;
2754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        do {
2755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            U8_PREV(s, 0, length, c);
2756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(c<0) {
2757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                c=0xfffd;
2758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(realSet.contains(c)) {
2760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const char *s8;
2763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t length8;
2764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            iter.reset();
2765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while((s8=iter.nextUTF8(length8))!=NULL) {
2766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(length8!=0 && length8<=prev && 0==memcmp(s+prev-length8, s8, length8)) {
2767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // spanNeedsStrings=TRUE;
2768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    return prev;
2769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
2770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } while((prev=length)>0);
2772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return prev;
2773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else /* USET_SPAN_CONTAINED or USET_SPAN_SIMPLE */ {
2774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeSetWithStringsIterator iter(set);
2775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c;
2776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t prev=length, minSpanStart=length;
2777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        do {
2778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            U8_PREV(s, 0, length, c);
2779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(c<0) {
2780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                c=0xfffd;
2781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(!realSet.contains(c)) {
2783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                length=prev;  // Do not span this single, not-contained code point.
2784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            const char *s8;
2786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t length8;
2787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            iter.reset();
2788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while((s8=iter.nextUTF8(length8))!=NULL) {
2789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(length8!=0 && length8<=prev && 0==memcmp(s+prev-length8, s8, length8)) {
2790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // spanNeedsStrings=TRUE;
2791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    int32_t matchStart=prev-length8;
2792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(matchStart==0) {
2793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        return 0;
2794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
2795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(spanCondition==USET_SPAN_CONTAINED) {
2796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        // Iterate for the shortest match at each position.
2797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        // Recurse for each but the shortest match.
2798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        if(length==prev) {
2799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            length=matchStart;  // First match from prev.
2800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        } else {
2801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            if(matchStart>length) {
2802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                // Remember shortest match from prev for iteration.
2803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                int32_t temp=length;
2804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                length=matchStart;
2805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                matchStart=temp;
2806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            }
2807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            // Recurse for non-shortest match from prev.
2808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            int32_t spanStart=containsSpanBackUTF8(set, s, matchStart,
2809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                                   USET_SPAN_CONTAINED);
2810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            if(spanStart<minSpanStart) {
2811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                minSpanStart=spanStart;
2812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                if(minSpanStart==0) {
2813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                    return 0;
2814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                }
2815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            }
2816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        }
2817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    } else /* spanCondition==USET_SPAN_SIMPLE */ {
2818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        if(matchStart<length) {
2819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            // Remember longest match from prev.
2820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            length=matchStart;
2821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        }
2822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
2823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
2824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(length==prev) {
2826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;  // No match from prev.
2827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } while((prev=length)>0);
2829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(prev<minSpanStart) {
2830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return prev;
2831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
2832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return minSpanStart;
2833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// spans to be performed and compared
2838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enum {
2839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SPAN_UTF16          =1,
2840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SPAN_UTF8           =2,
2841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SPAN_UTFS           =3,
2842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SPAN_SET            =4,
2844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SPAN_COMPLEMENT     =8,
2845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SPAN_POLARITY       =0xc,
2846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SPAN_FWD            =0x10,
2848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SPAN_BACK           =0x20,
2849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SPAN_DIRS           =0x30,
2850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SPAN_CONTAINED      =0x100,
2852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SPAN_SIMPLE         =0x200,
2853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SPAN_CONDITION      =0x300,
2854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SPAN_ALL            =0x33f
2856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
2857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static inline USetSpanCondition invertSpanCondition(USetSpanCondition spanCondition, USetSpanCondition contained) {
2859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return spanCondition == USET_SPAN_NOT_CONTAINED ? contained : USET_SPAN_NOT_CONTAINED;
2860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static inline int32_t slen(const void *s, UBool isUTF16) {
2863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return isUTF16 ? u_strlen((const UChar *)s) : strlen((const char *)s);
2864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
2865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
2867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Count spans on a string with the method according to type and set the span limits.
2868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The set may be the complement of the original.
2869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * When using spanBack() and comparing with span(), use a span condition for the first spanBack()
2870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * according to the expected number of spans.
2871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Sets typeName to an empty string if there is no such type.
2872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returns -1 if the span option is filtered out.
2873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
2874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t getSpans(const UnicodeSetWithStrings &set, UBool isComplement,
2875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        const void *s, int32_t length, UBool isUTF16,
2876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        uint32_t whichSpans,
2877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        int type, const char *&typeName,
2878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        int32_t limits[], int32_t limitsCapacity,
2879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        int32_t expectCount) {
2880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeSet &realSet(set.getSet());
2881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t start, count;
2882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    USetSpanCondition spanCondition, firstSpanCondition, contained;
2883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool isForward;
2884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(type<0 || 7<type) {
2886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        typeName="";
2887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
2888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char *const typeNames16[]={
2891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "contains", "contains(LM)",
2892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "span", "span(LM)",
2893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "containsBack", "containsBack(LM)",
2894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "spanBack", "spanBack(LM)"
2895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
2896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char *const typeNames8[]={
2898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "containsUTF8", "containsUTF8(LM)",
2899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "spanUTF8", "spanUTF8(LM)",
2900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "containsBackUTF8", "containsBackUTF8(LM)", // not implemented
2901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "spanBackUTF8", "spanBackUTF8(LM)"
2902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
2903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    typeName= isUTF16 ? typeNames16[type] : typeNames8[type];
2905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // filter span options
2907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(type<=3) {
2908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // span forward
2909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if((whichSpans&SPAN_FWD)==0) {
2910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return -1;
2911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        isForward=TRUE;
2913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
2914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // span backward
2915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if((whichSpans&SPAN_BACK)==0) {
2916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return -1;
2917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        isForward=FALSE;
2919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if((type&1)==0) {
2921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // use USET_SPAN_CONTAINED
2922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if((whichSpans&SPAN_CONTAINED)==0) {
2923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return -1;
2924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        contained=USET_SPAN_CONTAINED;
2926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
2927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // use USET_SPAN_SIMPLE
2928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if((whichSpans&SPAN_SIMPLE)==0) {
2929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return -1;
2930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        contained=USET_SPAN_SIMPLE;
2932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Default first span condition for going forward with an uncomplemented set.
2935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    spanCondition=USET_SPAN_NOT_CONTAINED;
2936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(isComplement) {
2937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        spanCondition=invertSpanCondition(spanCondition, contained);
2938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // First span condition for span(), used to terminate the spanBack() iteration.
2941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    firstSpanCondition=spanCondition;
2942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // spanBack(): Its initial span condition is span()'s last span condition,
2944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // which is the opposite of span()'s first span condition
2945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // if we expect an even number of spans.
2946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // (The loop inverts spanCondition (expectCount-1) times
2947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // before the expectCount'th span() call.)
2948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // If we do not compare forward and backward directions, then we do not have an
2949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // expectCount and just start with firstSpanCondition.
2950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!isForward && (whichSpans&SPAN_FWD)!=0 && (expectCount&1)==0) {
2951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        spanCondition=invertSpanCondition(spanCondition, contained);
2952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
2953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
2954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    count=0;
2955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    switch(type) {
2956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    case 0:
2957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    case 1:
2958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        start=0;
2959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(length<0) {
2960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            length=slen(s, isUTF16);
2961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(;;) {
2963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start+= isUTF16 ? containsSpanUTF16(set, (const UChar *)s+start, length-start, spanCondition) :
2964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              containsSpanUTF8(set, (const char *)s+start, length-start, spanCondition);
2965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(count<limitsCapacity) {
2966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                limits[count]=start;
2967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ++count;
2969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(start>=length) {
2970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            spanCondition=invertSpanCondition(spanCondition, contained);
2973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        break;
2975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    case 2:
2976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    case 3:
2977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        start=0;
2978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(;;) {
2979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start+= isUTF16 ? realSet.span((const UChar *)s+start, length>=0 ? length-start : length, spanCondition) :
2980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              realSet.spanUTF8((const char *)s+start, length>=0 ? length-start : length, spanCondition);
2981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(count<limitsCapacity) {
2982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                limits[count]=start;
2983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ++count;
2985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(length>=0 ? start>=length :
2986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                           isUTF16 ? ((const UChar *)s)[start]==0 :
2987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                     ((const char *)s)[start]==0
2988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ) {
2989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
2990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
2991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            spanCondition=invertSpanCondition(spanCondition, contained);
2992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        break;
2994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    case 4:
2995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    case 5:
2996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(length<0) {
2997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            length=slen(s, isUTF16);
2998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
2999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(;;) {
3000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ++count;
3001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(count<=limitsCapacity) {
3002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                limits[limitsCapacity-count]=length;
3003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            length= isUTF16 ? containsSpanBackUTF16(set, (const UChar *)s, length, spanCondition) :
3005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              containsSpanBackUTF8(set, (const char *)s, length, spanCondition);
3006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(length==0 && spanCondition==firstSpanCondition) {
3007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
3008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            spanCondition=invertSpanCondition(spanCondition, contained);
3010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(count<limitsCapacity) {
3012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            memmove(limits, limits+(limitsCapacity-count), count*4);
3013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        break;
3015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    case 6:
3016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    case 7:
3017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(;;) {
3018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ++count;
3019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(count<=limitsCapacity) {
3020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                limits[limitsCapacity-count]= length >=0 ? length : slen(s, isUTF16);
3021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Note: Length<0 is tested only for the first spanBack().
3023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // If we wanted to keep length<0 for all spanBack()s, we would have to
3024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // temporarily modify the string by placing a NUL where the previous spanBack() stopped.
3025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            length= isUTF16 ? realSet.spanBack((const UChar *)s, length, spanCondition) :
3026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              realSet.spanBackUTF8((const char *)s, length, spanCondition);
3027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(length==0 && spanCondition==firstSpanCondition) {
3028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
3029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            spanCondition=invertSpanCondition(spanCondition, contained);
3031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(count<limitsCapacity) {
3033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            memmove(limits, limits+(limitsCapacity-count), count*4);
3034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        break;
3036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    default:
3037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        typeName="";
3038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
3039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return count;
3042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// sets to be tested; odd index=isComplement
3045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enum {
3046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SLOW,
3047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SLOW_NOT,
3048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    FAST,
3049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    FAST_NOT,
3050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    SET_COUNT
3051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
3052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const char *const setNames[SET_COUNT]={
3054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "slow",
3055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "slow.not",
3056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "fast",
3057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    "fast.not"
3058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
3059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
3061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Verify that we get the same results whether we look at text with contains(),
3062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * span() or spanBack(), using unfrozen or frozen versions of the set,
3063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and using the set or its complement (switching the spanConditions accordingly).
3064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The latter verifies that
3065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *   set.span(spanCondition) == set.complement().span(!spanCondition).
3066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
3067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The expectLimits[] are either provided by the caller (with expectCount>=0)
3068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * or returned to the caller (with an input expectCount<0).
3069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
3070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::testSpan(const UnicodeSetWithStrings *sets[4],
3071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              const void *s, int32_t length, UBool isUTF16,
3072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              uint32_t whichSpans,
3073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              int32_t expectLimits[], int32_t &expectCount,
3074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              const char *testName, int32_t index) {
3075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t limits[500];
3076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t limitsCount;
3077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int i, j;
3078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const char *typeName;
3080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int type;
3081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=0; i<SET_COUNT; ++i) {
3083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if((i&1)==0) {
3084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Even-numbered sets are original, uncomplemented sets.
3085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if((whichSpans&SPAN_SET)==0) {
3086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
3087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
3089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Odd-numbered sets are complemented.
3090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if((whichSpans&SPAN_COMPLEMENT)==0) {
3091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue;
3092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(type=0;; ++type) {
3095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            limitsCount=getSpans(*sets[i], (UBool)(i&1),
3096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                 s, length, isUTF16,
3097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                 whichSpans,
3098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                 type, typeName,
3099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                 limits, LENGTHOF(limits), expectCount);
3100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(typeName[0]==0) {
3101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break; // All types tried.
3102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(limitsCount<0) {
3104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                continue; // Span option filtered out.
3105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(expectCount<0) {
3107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                expectCount=limitsCount;
3108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(limitsCount>LENGTHOF(limits)) {
3109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    errln("FAIL: %s[0x%lx].%s.%s span count=%ld > %ld capacity - too many spans",
3110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)LENGTHOF(limits));
3111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    return;
3112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
3113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                memcpy(expectLimits, limits, limitsCount*4);
3114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else if(limitsCount!=expectCount) {
3115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                errln("FAIL: %s[0x%lx].%s.%s span count=%ld != %ld",
3116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      testName, (long)index, setNames[i], typeName, (long)limitsCount, (long)expectCount);
3117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else {
3118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                for(j=0; j<limitsCount; ++j) {
3119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(limits[j]!=expectLimits[j]) {
3120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        errln("FAIL: %s[0x%lx].%s.%s span count=%ld limits[%d]=%ld != %ld",
3121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              testName, (long)index, setNames[i], typeName, (long)limitsCount,
3122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              j, (long)limits[j], (long)expectLimits[j]);
3123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        break;
3124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
3125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
3126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Compare span() with containsAll()/containsNone(),
3131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // but only if we have expectLimits[] from the uncomplemented set.
3132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(isUTF16 && (whichSpans&SPAN_SET)!=0) {
3133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        const UChar *s16=(const UChar *)s;
3134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString string;
3135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t prev=0, limit, length;
3136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(i=0; i<expectCount; ++i) {
3137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            limit=expectLimits[i];
3138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            length=limit-prev;
3139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(length>0) {
3140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                string.setTo(FALSE, s16+prev, length);  // read-only alias
3141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(i&1) {
3142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(!sets[SLOW]->getSet().containsAll(string)) {
3143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        errln("FAIL: %s[0x%lx].%s.containsAll(%ld..%ld)==FALSE contradicts span()",
3144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              testName, (long)index, setNames[SLOW], (long)prev, (long)limit);
3145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        return;
3146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
3147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(!sets[FAST]->getSet().containsAll(string)) {
3148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        errln("FAIL: %s[0x%lx].%s.containsAll(%ld..%ld)==FALSE contradicts span()",
3149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              testName, (long)index, setNames[FAST], (long)prev, (long)limit);
3150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        return;
3151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
3152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                } else {
3153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(!sets[SLOW]->getSet().containsNone(string)) {
3154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        errln("FAIL: %s[0x%lx].%s.containsNone(%ld..%ld)==FALSE contradicts span()",
3155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              testName, (long)index, setNames[SLOW], (long)prev, (long)limit);
3156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        return;
3157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
3158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(!sets[FAST]->getSet().containsNone(string)) {
3159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        errln("FAIL: %s[0x%lx].%s.containsNone(%ld..%ld)==FALSE contradicts span()",
3160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              testName, (long)index, setNames[FAST], (long)prev, (long)limit);
3161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        return;
3162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
3163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
3164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            prev=limit;
3166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Specifically test either UTF-16 or UTF-8.
3171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::testSpan(const UnicodeSetWithStrings *sets[4],
3172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              const void *s, int32_t length, UBool isUTF16,
3173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              uint32_t whichSpans,
3174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                              const char *testName, int32_t index) {
3175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t expectLimits[500];
3176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t expectCount=-1;
3177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testSpan(sets, s, length, isUTF16, whichSpans, expectLimits, expectCount, testName, index);
3178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool stringContainsUnpairedSurrogate(const UChar *s, int32_t length) {
3181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar c, c2;
3182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(length>=0) {
3184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while(length>0) {
3185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c=*s++;
3186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            --length;
3187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(0xd800<=c && c<0xe000) {
3188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(c>=0xdc00 || length==0 || !U16_IS_TRAIL(c2=*s++)) {
3189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    return TRUE;
3190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
3191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                --length;
3192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
3195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while((c=*s++)!=0) {
3196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(0xd800<=c && c<0xe000) {
3197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(c>=0xdc00 || !U16_IS_TRAIL(c2=*s++)) {
3198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    return TRUE;
3199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
3200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return FALSE;
3204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Test both UTF-16 and UTF-8 versions of span() etc. on the same sets and text,
3207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// unless either UTF is turned off in whichSpans.
3208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Testing UTF-16 and UTF-8 together requires that surrogate code points
3209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// have the same contains(c) value as U+FFFD.
3210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::testSpanBothUTFs(const UnicodeSetWithStrings *sets[4],
3211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                      const UChar *s16, int32_t length16,
3212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                      uint32_t whichSpans,
3213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                      const char *testName, int32_t index) {
3214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t expectLimits[500];
3215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t expectCount;
3216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    expectCount=-1;  // Get expectLimits[] from testSpan().
3218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if((whichSpans&SPAN_UTF16)!=0) {
3220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        testSpan(sets, s16, length16, TRUE, whichSpans, expectLimits, expectCount, testName, index);
3221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if((whichSpans&SPAN_UTF8)==0) {
3223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
3224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Convert s16[] and expectLimits[] to UTF-8.
3227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint8_t s8[3000];
3228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t offsets[3000];
3229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UChar *s16Limit=s16+length16;
3231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *t=(char *)s8;
3232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *tLimit=t+sizeof(s8);
3233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t *o=offsets;
3234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode errorCode=U_ZERO_ERROR;
3235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Convert with substitution: Turn unpaired surrogates into U+FFFD.
3237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ucnv_fromUnicode(openUTF8Converter(), &t, tLimit, &s16, s16Limit, o, TRUE, &errorCode);
3238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(errorCode)) {
3239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: %s[0x%lx] ucnv_fromUnicode(to UTF-8) fails with %s",
3240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)              testName, (long)index, u_errorName(errorCode));
3241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ucnv_resetFromUnicode(utf8Cnv);
3242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
3243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t length8=(int32_t)(t-(char *)s8);
3245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Convert expectLimits[].
3247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i, j, expect;
3248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=j=0; i<expectCount; ++i) {
3249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        expect=expectLimits[i];
3250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(expect==length16) {
3251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            expectLimits[i]=length8;
3252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
3253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while(offsets[j]<expect) {
3254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                ++j;
3255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            expectLimits[i]=j;
3257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testSpan(sets, s8, length8, FALSE, whichSpans, expectLimits, expectCount, testName, index);
3261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UChar32 nextCodePoint(UChar32 c) {
3264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Skip some large and boring ranges.
3265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    switch(c) {
3266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    case 0x3441:
3267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0x4d7f;
3268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    case 0x5100:
3269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0x9f00;
3270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    case 0xb040:
3271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0xd780;
3272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    case 0xe041:
3273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0xf8fe;
3274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    case 0x10100:
3275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0x20000;
3276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    case 0x20041:
3277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0xe0000;
3278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    case 0xe0101:
3279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0x10fffd;
3280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    default:
3281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return c+1;
3282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Verify that all implementations represent the same set.
3286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::testSpanContents(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) {
3287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // contains(U+FFFD) is inconsistent with contains(some surrogates),
3288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // or the set contains strings with unpaired surrogates which don't translate to valid UTF-8:
3289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Skip the UTF-8 part of the test - if the string contains surrogates -
3290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // because it is likely to produce a different result.
3291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool inconsistentSurrogates=
3292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (!(sets[0]->getSet().contains(0xfffd) ?
3293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)               sets[0]->getSet().contains(0xd800, 0xdfff) :
3294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)               sets[0]->getSet().containsNone(0xd800, 0xdfff)) ||
3295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             sets[0]->hasStringsWithSurrogates());
3296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar s[1000];
3298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t length=0;
3299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t localWhichSpans;
3300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 c, first;
3302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(first=c=0;; c=nextCodePoint(c)) {
3303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(c>0x10ffff || length>(LENGTHOF(s)-U16_MAX_LENGTH)) {
3304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            localWhichSpans=whichSpans;
3305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(stringContainsUnpairedSurrogate(s, length) && inconsistentSurrogates) {
3306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                localWhichSpans&=~SPAN_UTF8;
3307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            testSpanBothUTFs(sets, s, length, localWhichSpans, testName, first);
3309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(c>0x10ffff) {
3310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
3311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            length=0;
3313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            first=c;
3314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        U16_APPEND_UNSAFE(s, length, c);
3316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Test with a particular, interesting string.
3320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Specify length and try NUL-termination.
3321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::testSpanUTF16String(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) {
3322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const UChar s[]={
3323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x61, 0x62, 0x20,                       // Latin, space
3324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x3b1, 0x3b2, 0x3b3,                    // Greek
3325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0xd900,                                 // lead surrogate
3326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0x3000, 0x30ab, 0x30ad,                 // wide space, Katakana
3327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0xdc05,                                 // trail surrogate
3328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0xa0, 0xac00, 0xd7a3,                   // nbsp, Hangul
3329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0xd900, 0xdc05,                         // unassigned supplementary
3330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0xd840, 0xdfff, 0xd860, 0xdffe,         // Han supplementary
3331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0xd7a4, 0xdc05, 0xd900, 0x2028,         // unassigned, surrogates in wrong order, LS
3332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        0                                       // NUL
3333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
3334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if((whichSpans&SPAN_UTF16)==0) {
3336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
3337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testSpan(sets, s, -1, TRUE, (whichSpans&~SPAN_UTF8), testName, 0);
3339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testSpan(sets, s, LENGTHOF(s)-1, TRUE, (whichSpans&~SPAN_UTF8), testName, 1);
3340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::testSpanUTF8String(const UnicodeSetWithStrings *sets[4], uint32_t whichSpans, const char *testName) {
3343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char s[]={
3344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "abc"                                   // Latin
3345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* trail byte in lead position */
3347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\x80"
3348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        " "                                     // space
3350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* truncated multi-byte sequences */
3352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xd0"
3353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xe0"
3354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xe1"
3355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xed"
3356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xee"
3357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf0"
3358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf1"
3359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf4"
3360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf8"
3361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xfc"
3362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xCE\xB1\xCE\xB2\xCE\xB3"              // Greek
3364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* trail byte in lead position */
3366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\x80"
3367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xe0\x80"
3369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xe0\xa0"
3370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xe1\x80"
3371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xed\x80"
3372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xed\xa0"
3373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xee\x80"
3374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf0\x80"
3375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf0\x90"
3376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf1\x80"
3377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf4\x80"
3378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf4\x90"
3379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf8\x80"
3380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xfc\x80"
3381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xE3\x80\x80\xE3\x82\xAB\xE3\x82\xAD"  // wide space, Katakana
3383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* trail byte in lead position */
3385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\x80"
3386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf0\x80\x80"
3388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf0\x90\x80"
3389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf1\x80\x80"
3390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf4\x80\x80"
3391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf4\x90\x80"
3392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf8\x80\x80"
3393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xfc\x80\x80"
3394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xC2\xA0\xEA\xB0\x80\xED\x9E\xA3"      // nbsp, Hangul
3396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* trail byte in lead position */
3398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\x80"
3399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf8\x80\x80\x80"
3401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xfc\x80\x80\x80"
3402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xF1\x90\x80\x85"                      // unassigned supplementary
3404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* trail byte in lead position */
3406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\x80"
3407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xfc\x80\x80\x80\x80"
3409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xF0\xA0\x8F\xBF\xF0\xA8\x8F\xBE"      // Han supplementary
3411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* trail byte in lead position */
3413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\x80"
3414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* complete sequences but non-shortest forms or out of range etc. */
3416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xc0\x80"
3417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xe0\x80\x80"
3418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xed\xa0\x80"
3419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf0\x80\x80\x80"
3420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf4\x90\x80\x80"
3421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xf8\x80\x80\x80\x80"
3422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xfc\x80\x80\x80\x80\x80"
3423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xfe"
3424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xff"
3425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* trail byte in lead position */
3427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\x80"
3428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\xED\x9E\xA4\xE2\x80\xA8"              // unassigned, LS, NUL-terminated
3430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
3431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if((whichSpans&SPAN_UTF8)==0) {
3433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
3434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testSpan(sets, s, -1, FALSE, (whichSpans&~SPAN_UTF16), testName, 0);
3436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    testSpan(sets, s, LENGTHOF(s)-1, FALSE, (whichSpans&~SPAN_UTF16), testName, 1);
3437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Take a set of span options and multiply them so that
3440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// each portion only has one of the options a, b and c.
3441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// If b==0, then the set of options is just modified with mask and a.
3442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// If b!=0 and c==0, then the set of options is just modified with mask, a and b.
3443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t
3444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)addAlternative(uint32_t whichSpans[], int32_t whichSpansCount,
3445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)               uint32_t mask, uint32_t a, uint32_t b, uint32_t c) {
3446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t s;
3447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i;
3448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=0; i<whichSpansCount; ++i) {
3450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        s=whichSpans[i]&mask;
3451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        whichSpans[i]=s|a;
3452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(b!=0) {
3453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            whichSpans[whichSpansCount+i]=s|b;
3454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(c!=0) {
3455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                whichSpans[2*whichSpansCount+i]=s|c;
3456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return b==0 ? whichSpansCount : c==0 ? 2*whichSpansCount : 3*whichSpansCount;
3460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define _63_a "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define _64_a "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define _63_b "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
3465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define _64_b "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
3466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestSpan() {
3468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // "[...]" is a UnicodeSet pattern.
3469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // "*" performs tests on all Unicode code points and on a selection of
3470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   malformed UTF-8/16 strings.
3471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // "-options" limits the scope of testing for the current set.
3472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   By default, the test verifies that equivalent boundaries are found
3473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   for UTF-16 and UTF-8, going forward and backward,
3474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   alternating USET_SPAN_NOT_CONTAINED with
3475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   either USET_SPAN_CONTAINED or USET_SPAN_SIMPLE.
3476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   Single-character options:
3477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //     8 -- UTF-16 and UTF-8 boundaries may differ.
3478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //          Cause: contains(U+FFFD) is inconsistent with contains(some surrogates),
3479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //          or the set contains strings with unpaired surrogates
3480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //          which do not translate to valid UTF-8.
3481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //     c -- set.span() and set.complement().span() boundaries may differ.
3482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //          Cause: Set strings are not complemented.
3483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //     b -- span() and spanBack() boundaries may differ.
3484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //          Cause: Strings in the set overlap, and spanBack(USET_SPAN_CONTAINED)
3485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //          and spanBack(USET_SPAN_SIMPLE) are defined to
3486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //          match with non-overlapping substrings.
3487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //          For example, with a set containing "ab" and "ba",
3488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //          span() of "aba" yields boundaries { 0, 2, 3 }
3489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //          because the initial "ab" matches from 0 to 2,
3490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //          while spanBack() yields boundaries { 0, 1, 3 }
3491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //          because the final "ba" matches from 1 to 3.
3492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //     l -- USET_SPAN_CONTAINED and USET_SPAN_SIMPLE boundaries may differ.
3493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //          Cause: Strings in the set overlap, and a longer match may
3494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //          require a sequence including non-longest substrings.
3495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //          For example, with a set containing "ab", "abc" and "cd",
3496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //          span(contained) of "abcd" spans the entire string
3497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //          but span(longest match) only spans the first 3 characters.
3498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   Each "-options" first resets all options and then applies the specified options.
3499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   A "-" without options resets the options.
3500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    //   The options are also reset for each new set.
3501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Other strings will be spanned.
3502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char *const testdata[]={
3503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:ID_Continue:]",
3504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "*",
3505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[:White_Space:]",
3506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "*",
3507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[]",
3508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "*",
3509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\u0000-\\U0010FFFF]",
3510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "*",
3511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\u0000\\u0080\\u0800\\U00010000]",
3512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "*",
3513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\u007F\\u07FF\\uFFFF\\U0010FFFF]",
3514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "*",
3515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[[[:ID_Continue:]-[\\u30ab\\u30ad]]{\\u3000\\u30ab}{\\u3000\\u30ab\\u30ad}]",
3516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-c",
3517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "*",
3518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[[[:ID_Continue:]-[\\u30ab\\u30ad]]{\\u30ab\\u30ad}{\\u3000\\u30ab\\u30ad}]",
3519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-c",
3520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "*",
3521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Overlapping strings cause overlapping attempts to match.
3523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[x{xy}{xya}{axy}{ax}]",
3524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-cl",
3525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // More repetitions of "xya" would take too long with the recursive
3527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // reference implementation.
3528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // containsAll()=FALSE
3529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // test_string 0x14
3530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xx"
3531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xyaxyaxyaxya"  // set.complement().span(longest match) will stop here.
3532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xx"            // set.complement().span(contained) will stop between the two 'x'es.
3533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xyaxyaxyaxya"
3534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xx"
3535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xyaxyaxyaxya"  // span() ends here.
3536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "aaa",
3537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // containsAll()=TRUE
3539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // test_string 0x15
3540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xx"
3541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xyaxyaxyaxya"
3542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xx"
3543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xyaxyaxyaxya"
3544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xx"
3545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xyaxyaxyaxy",
3546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-bc",
3548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // test_string 0x17
3549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "byayaxya",  // span() -> { 4, 7, 8 }  spanBack() -> { 5, 8 }
3550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-c",
3551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "byayaxy",   // span() -> { 4, 7 }     complement.span() -> { 7 }
3552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "byayax",    // span() -> { 4, 6 }     complement.span() -> { 6 }
3553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-",
3554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "byaya",     // span() -> { 5 }
3555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "byay",      // span() -> { 4 }
3556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "bya",       // span() -> { 3 }
3557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // span(longest match) will not span the whole string.
3559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[a{ab}{bc}]",
3560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-cl",
3561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // test_string 0x21
3562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "abc",
3563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[a{ab}{abc}{cd}]",
3565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-cl",
3566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "acdabcdabccd",
3567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // spanBack(longest match) will not span the whole string.
3569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[c{ab}{bc}]",
3570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-cl",
3571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "abc",
3572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[d{cd}{bcd}{ab}]",
3574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-cl",
3575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "abbcdabcdabd",
3576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Test with non-ASCII set strings - test proper handling of surrogate pairs
3578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // and UTF-8 trail bytes.
3579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Copies of above test sets and strings, but transliterated to have
3580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // different code points with similar trail units.
3581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Previous: a      b         c            d
3582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Unicode:  042B   30AB      200AB        204AB
3583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // UTF-16:   042B   30AB      D840 DCAB    D841 DCAB
3584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // UTF-8:    D0 AB  E3 82 AB  F0 A0 82 AB  F0 A0 92 AB
3585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\u042B{\\u042B\\u30AB}{\\u042B\\u30AB\\U000200AB}{\\U000200AB\\U000204AB}]",
3586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-cl",
3587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u042B\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000200AB\\U000204AB",
3588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[\\U000204AB{\\U000200AB\\U000204AB}{\\u30AB\\U000200AB\\U000204AB}{\\u042B\\u30AB}]",
3590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-cl",
3591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "\\u042B\\u30AB\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000200AB\\U000204AB\\u042B\\u30AB\\U000204AB",
3592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Stress bookkeeping and recursion.
3594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // The following strings are barely doable with the recursive
3595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // reference implementation.
3596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // The not-contained character at the end prevents an early exit from the span().
3597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[b{bb}]",
3598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-c",
3599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // test_string 0x33
3600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "bbbbbbbbbbbbbbbbbbbbbbbb-",
3601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // On complement sets, span() and spanBack() get different results
3602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // because b is not in the complement set and there is an odd number of b's
3603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // in the test string.
3604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-bc",
3605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "bbbbbbbbbbbbbbbbbbbbbbbbb-",
3606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Test with set strings with an initial or final code point span
3608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // longer than 254.
3609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[a{" _64_a _64_a _64_a _64_a "b}"
3610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          "{a" _64_b _64_b _64_b _64_b "}]",
3611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-c",
3612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        _64_a _64_a _64_a _63_a "b",
3613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        _64_a _64_a _64_a _64_a "b",
3614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        _64_a _64_a _64_a _64_a "aaaabbbb",
3615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "a" _64_b _64_b _64_b _63_b,
3616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "a" _64_b _64_b _64_b _64_b,
3617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "aaaabbbb" _64_b _64_b _64_b _64_b,
3618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // Test with strings containing unpaired surrogates.
3620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // They are not representable in UTF-8, and a leading trail surrogate
3621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // and a trailing lead surrogate must not match in the middle of a proper surrogate pair.
3622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // U+20001 == \\uD840\\uDC01
3623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        // U+20400 == \\uD841\\uDC00
3624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "[a\\U00020001\\U00020400{ab}{b\\uD840}{\\uDC00a}]",
3625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "-8cl",
3626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "aaab\\U00020001ba\\U00020400aba\\uD840ab\\uD840\\U00020000b\\U00020000a\\U00020000\\uDC00a\\uDC00babbb"
3627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    };
3628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t whichSpans[96]={ SPAN_ALL };
3629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t whichSpansCount=1;
3630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet *sets[SET_COUNT]={ NULL };
3632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UnicodeSetWithStrings *sets_with_str[SET_COUNT]={ NULL };
3633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char testName[1024];
3635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    char *testNameLimit=testName;
3636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i, j;
3638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=0; i<LENGTHOF(testdata); ++i) {
3639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        const char *s=testdata[i];
3640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(s[0]=='[') {
3641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Create new test sets from this pattern.
3642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(j=0; j<SET_COUNT; ++j) {
3643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                delete sets_with_str[j];
3644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                delete sets[j];
3645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UErrorCode errorCode=U_ZERO_ERROR;
3647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            sets[SLOW]=new UnicodeSet(UnicodeString(s, -1, US_INV).unescape(), errorCode);
3648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(U_FAILURE(errorCode)) {
3649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                dataerrln("FAIL: Unable to create UnicodeSet(%s) - %s", s, u_errorName(errorCode));
3650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
3651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            sets[SLOW_NOT]=new UnicodeSet(*sets[SLOW]);
3653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            sets[SLOW_NOT]->complement();
3654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // Intermediate set: Test cloning of a frozen set.
3655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UnicodeSet *fast=new UnicodeSet(*sets[SLOW]);
3656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fast->freeze();
3657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            sets[FAST]=(UnicodeSet *)fast->clone();
3658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            delete fast;
3659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UnicodeSet *fastNot=new UnicodeSet(*sets[SLOW_NOT]);
3660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fastNot->freeze();
3661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            sets[FAST_NOT]=(UnicodeSet *)fastNot->clone();
3662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            delete fastNot;
3663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(j=0; j<SET_COUNT; ++j) {
3665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                sets_with_str[j]=new UnicodeSetWithStrings(*sets[j]);
3666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            strcpy(testName, s);
3669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            testNameLimit=strchr(testName, 0);
3670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *testNameLimit++=':';
3671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *testNameLimit=0;
3672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            whichSpans[0]=SPAN_ALL;
3674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            whichSpansCount=1;
3675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else if(s[0]=='-') {
3676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            whichSpans[0]=SPAN_ALL;
3677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            whichSpansCount=1;
3678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            while(*++s!=0) {
3680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                switch(*s) {
3681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                case 'c':
3682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    whichSpansCount=addAlternative(whichSpans, whichSpansCount,
3683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                   ~SPAN_POLARITY,
3684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                   SPAN_SET,
3685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                   SPAN_COMPLEMENT,
3686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                   0);
3687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    break;
3688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                case 'b':
3689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    whichSpansCount=addAlternative(whichSpans, whichSpansCount,
3690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                   ~SPAN_DIRS,
3691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                   SPAN_FWD,
3692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                   SPAN_BACK,
3693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                   0);
3694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    break;
3695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                case 'l':
3696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // test USET_SPAN_CONTAINED FWD & BACK, and separately
3697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // USET_SPAN_SIMPLE only FWD, and separately
3698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    // USET_SPAN_SIMPLE only BACK
3699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    whichSpansCount=addAlternative(whichSpans, whichSpansCount,
3700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                   ~(SPAN_DIRS|SPAN_CONDITION),
3701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                   SPAN_DIRS|SPAN_CONTAINED,
3702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                   SPAN_FWD|SPAN_SIMPLE,
3703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                   SPAN_BACK|SPAN_SIMPLE);
3704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    break;
3705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                case '8':
3706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    whichSpansCount=addAlternative(whichSpans, whichSpansCount,
3707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                   ~SPAN_UTFS,
3708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                   SPAN_UTF16,
3709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                   SPAN_UTF8,
3710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                                   0);
3711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    break;
3712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                default:
3713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    errln("FAIL: unrecognized span set option in \"%s\"", testdata[i]);
3714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    break;
3715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
3716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else if(0==strcmp(s, "*")) {
3718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            strcpy(testNameLimit, "bad_string");
3719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(j=0; j<whichSpansCount; ++j) {
3720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(whichSpansCount>1) {
3721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    sprintf(testNameLimit+10 /* strlen("bad_string") */,
3722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            "%%0x%3x",
3723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            whichSpans[j]);
3724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
3725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                testSpanUTF16String(sets_with_str, whichSpans[j], testName);
3726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                testSpanUTF8String(sets_with_str, whichSpans[j], testName);
3727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            strcpy(testNameLimit, "contents");
3730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(j=0; j<whichSpansCount; ++j) {
3731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(whichSpansCount>1) {
3732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    sprintf(testNameLimit+8 /* strlen("contents") */,
3733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            "%%0x%3x",
3734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            whichSpans[j]);
3735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
3736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                testSpanContents(sets_with_str, whichSpans[j], testName);
3737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
3739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            UnicodeString string=UnicodeString(s, -1, US_INV).unescape();
3740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            strcpy(testNameLimit, "test_string");
3741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(j=0; j<whichSpansCount; ++j) {
3742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(whichSpansCount>1) {
3743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    sprintf(testNameLimit+11 /* strlen("test_string") */,
3744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            "%%0x%3x",
3745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            whichSpans[j]);
3746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
3747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                testSpanBothUTFs(sets_with_str, string.getBuffer(), string.length(), whichSpans[j], testName, i);
3748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
3749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
3750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(j=0; j<SET_COUNT; ++j) {
3752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete sets_with_str[j];
3753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete sets[j];
3754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Test select patterns and strings, and test USET_SPAN_SIMPLE.
3758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void UnicodeSetTest::TestStringSpan() {
3759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char *pattern="[x{xy}{xya}{axy}{ax}]";
3760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    static const char *const string=
3761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xx"
3762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxya"
3763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xx"
3764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxya"
3765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xx"
3766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "xyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxyaxy"
3767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        "aaaa";
3768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode errorCode=U_ZERO_ERROR;
3770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString pattern16=UnicodeString(pattern, -1, US_INV);
3771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeSet set(pattern16, errorCode);
3772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(errorCode)) {
3773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode));
3774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
3775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString string16=UnicodeString(string, -1, US_INV).unescape();
3778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(set.containsAll(string16)) {
3780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSet(%s).containsAll(%s) should be FALSE", pattern, string);
3781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Remove trailing "aaaa".
3784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    string16.truncate(string16.length()-4);
3785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!set.containsAll(string16)) {
3786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSet(%s).containsAll(%s[:-4]) should be TRUE", pattern, string);
3787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    string16=UNICODE_STRING_SIMPLE("byayaxya");
3790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UChar *s16=string16.getBuffer();
3791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t length16=string16.length();
3792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if( set.span(s16, 8, USET_SPAN_NOT_CONTAINED)!=4 ||
3793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.span(s16, 7, USET_SPAN_NOT_CONTAINED)!=4 ||
3794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.span(s16, 6, USET_SPAN_NOT_CONTAINED)!=4 ||
3795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.span(s16, 5, USET_SPAN_NOT_CONTAINED)!=5 ||
3796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.span(s16, 4, USET_SPAN_NOT_CONTAINED)!=4 ||
3797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.span(s16, 3, USET_SPAN_NOT_CONTAINED)!=3
3798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ) {
3799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSet(%s).span(while not) returns the wrong value", pattern);
3800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    pattern="[a{ab}{abc}{cd}]";
3803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    pattern16=UnicodeString(pattern, -1, US_INV);
3804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.applyPattern(pattern16, errorCode);
3805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(errorCode)) {
3806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode));
3807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
3808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    string16=UNICODE_STRING_SIMPLE("acdabcdabccd");
3810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    s16=string16.getBuffer();
3811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    length16=string16.length();
3812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if( set.span(s16, 12, USET_SPAN_CONTAINED)!=12 ||
3813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.span(s16, 12, USET_SPAN_SIMPLE)!=6 ||
3814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.span(s16+7, 5, USET_SPAN_SIMPLE)!=5
3815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ) {
3816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSet(%s).span(while longest match) returns the wrong value", pattern);
3817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
3819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    pattern="[d{cd}{bcd}{ab}]";
3820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    pattern16=UnicodeString(pattern, -1, US_INV);
3821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set.applyPattern(pattern16, errorCode).freeze();
3822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(errorCode)) {
3823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: Unable to create UnicodeSet(%s) - %s", pattern, u_errorName(errorCode));
3824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
3825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    string16=UNICODE_STRING_SIMPLE("abbcdabcdabd");
3827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    s16=string16.getBuffer();
3828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    length16=string16.length();
3829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if( set.spanBack(s16, 12, USET_SPAN_CONTAINED)!=0 ||
3830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.spanBack(s16, 12, USET_SPAN_SIMPLE)!=6 ||
3831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        set.spanBack(s16, 5, USET_SPAN_SIMPLE)!=0
3832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ) {
3833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errln("FAIL: UnicodeSet(%s).spanBack(while longest match) returns the wrong value", pattern);
3834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
3835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
3836