1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
38393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius*   Copyright (C) 1999-2012, International Business Machines
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Date        Name        Description
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   10/20/99    alan        Creation.
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/parsepos.h"
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/symtable.h"
14103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/uniset.h"
15103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf8.h"
16103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h"
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ruleiter.h"
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h"
20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "patternprops.h"
21103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "uelement.h"
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "util.h"
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h"
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "charstr.h"
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustrfmt.h"
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uassert.h"
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "bmpset.h"
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unisetspan.h"
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Define UChar constants using hex for EBCDIC compatibility
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Used #define to reduce private static exports and memory access time.
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define SET_OPEN        ((UChar)0x005B) /*[*/
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define SET_CLOSE       ((UChar)0x005D) /*]*/
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define HYPHEN          ((UChar)0x002D) /*-*/
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define COMPLEMENT      ((UChar)0x005E) /*^*/
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define COLON           ((UChar)0x003A) /*:*/
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BACKSLASH       ((UChar)0x005C) /*\*/
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define INTERSECTION    ((UChar)0x0026) /*&*/
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UPPER_U         ((UChar)0x0055) /*U*/
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LOWER_U         ((UChar)0x0075) /*u*/
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define OPEN_BRACE      ((UChar)123)    /*{*/
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define CLOSE_BRACE     ((UChar)125)    /*}*/
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UPPER_P         ((UChar)0x0050) /*P*/
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LOWER_P         ((UChar)0x0070) /*p*/
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UPPER_N         ((UChar)78)     /*N*/
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define EQUALS          ((UChar)0x003D) /*=*/
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// HIGH_VALUE > all valid values. 110000 for codepoints
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UNICODESET_HIGH 0x0110000
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// LOW <= all valid values. ZERO for codepoints
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UNICODESET_LOW 0x000000
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// initial storage. Must be >= 0
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define START_EXTRA 16
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// extra amount for growth. Must be >= 0
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define GROW_EXTRA START_EXTRA
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruSymbolTable::~SymbolTable() {}
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSet)
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Modify the given UChar32 variable so that it is in range, by
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pinning values < UNICODESET_LOW to UNICODESET_LOW, and
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pinning values > UNICODESET_HIGH-1 to UNICODESET_HIGH-1.
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It modifies its argument in-place and also returns it.
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UChar32 pinCodePoint(UChar32& c) {
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c < UNICODESET_LOW) {
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c = UNICODESET_LOW;
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if (c > (UNICODESET_HIGH-1)) {
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c = (UNICODESET_HIGH-1);
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return c;
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Debugging
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// DO NOT DELETE THIS CODE.  This code is used to debug memory leaks.
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// To enable the debugging, define the symbol DEBUG_MEM in the line
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// below.  This will result in text being sent to stdout that looks
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// like this:
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   DEBUG UnicodeSet: ct 0x00A39B20; 397 [\u0A81-\u0A83\u0A85-
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   DEBUG UnicodeSet: dt 0x00A39B20; 396 [\u0A81-\u0A83\u0A85-
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Each line lists a construction (ct) or destruction (dt) event, the
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// object address, the number of outstanding objects after the event,
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// and the pattern of the object in question.
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// #define DEBUG_MEM
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef DEBUG_MEM
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h>
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t _dbgCount = 0;
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline void _dbgct(UnicodeSet* set) {
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString str;
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set->toPattern(str, TRUE);
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char buf[40];
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    str.extract(0, 39, buf, "");
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("DEBUG UnicodeSet: ct 0x%08X; %d %s\n", set, ++_dbgCount, buf);
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline void _dbgdt(UnicodeSet* set) {
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString str;
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set->toPattern(str, TRUE);
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char buf[40];
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    str.extract(0, 39, buf, "");
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("DEBUG UnicodeSet: dt 0x%08X; %d %s\n", set, --_dbgCount, buf);
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#else
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _dbgct(set)
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _dbgdt(set)
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// UnicodeString in UVector support
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
128103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic void U_CALLCONV cloneUnicodeString(UElement *dst, UElement *src) {
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dst->pointer = new UnicodeString(*(UnicodeString*)src->pointer);
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
132103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeString &a = *(const UnicodeString*)t1.pointer;
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeString &b = *(const UnicodeString*)t2.pointer;
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return a.compare(b);
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Constructors &c
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Constructs an empty set.
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet() :
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len(1), capacity(1 + START_EXTRA), list(0), bmpSet(0), buffer(0),
147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFlags(0)
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    allocateStrings(status);
152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(list!=NULL){
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        list[0] = UNICODESET_HIGH;
158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else { // If memory allocation failed, set to bogus state.
159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    _dbgct(this);
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Constructs a set containing the given range. If <code>end >
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * start</code> then an empty set is created.
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet(UChar32 start, UChar32 end) :
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len(1), capacity(1 + START_EXTRA), list(0), bmpSet(0), buffer(0),
174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFlags(0)
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    allocateStrings(status);
179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(list!=NULL){
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        list[0] = UNICODESET_HIGH;
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complement(start, end);
186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else { // If memory allocation failed, set to bogus state.
187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    _dbgct(this);
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Constructs a set that is identical to the given UnicodeSet.
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet(const UnicodeSet& o) :
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeFilter(o),
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len(0), capacity(o.isFrozen() ? o.len : o.len + GROW_EXTRA), list(0),
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bmpSet(0),
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buffer(0), bufferCapacity(0),
201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFlags(0)
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    allocateStrings(status);
206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(list!=NULL){
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *this = o;
212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else { // If memory allocation failed, set to bogus state.
213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    _dbgct(this);
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Copy-construct as thawed.
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) :
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeFilter(o),
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len(0), capacity(o.len + GROW_EXTRA), list(0),
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bmpSet(0),
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buffer(0), bufferCapacity(0),
225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFlags(0)
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    allocateStrings(status);
230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(list!=NULL){
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // *this = o except for bmpSet and stringSpan
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        len = o.len;
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_memcpy(list, o.list, len*sizeof(UChar32));
238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (strings != NULL && o.strings != NULL) {
239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            strings->assign(*o.strings, cloneUnicodeString, status);
240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else { // Invalid strings.
241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            setToBogus();
242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return;
243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (o.pat) {
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            setPattern(UnicodeString(o.pat, o.patLen));
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else { // If memory allocation failed, set to bogus state.
248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    _dbgct(this);
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Destructs the set.
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::~UnicodeSet() {
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    _dbgdt(this); // first!
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_free(list);
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete bmpSet;
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (buffer) {
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(buffer);
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete strings;
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete stringSpan;
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Assigns this object to be a copy of another.
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) {
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (this == &o) {
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (isFrozen()) {
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (o.isBogus()) {
280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return *this;
282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    ensureCapacity(o.len, ec);
285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(ec)) {
286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return *this; // There is no way to report this error :-(
287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len = o.len;
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_memcpy(list, o.list, len*sizeof(UChar32));
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (o.bmpSet == NULL) {
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bmpSet = NULL;
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bmpSet = new BMPSet(*o.bmpSet, list, len);
294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (bmpSet == NULL) { // Check for memory allocation error.
295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            setToBogus();
296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return *this;
297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (strings != NULL && o.strings != NULL) {
300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        strings->assign(*o.strings, cloneUnicodeString, ec);
301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else { // Invalid strings.
302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return *this;
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (o.stringSpan == NULL) {
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        stringSpan = NULL;
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings);
309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (stringSpan == NULL) { // Check for memory allocation error.
310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            setToBogus();
311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return *this;
312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (o.pat) {
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        setPattern(UnicodeString(o.pat, o.patLen));
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a copy of this object.  All UnicodeMatcher objects have
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to support cloning in order to allow classes using
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * UnicodeMatchers, such as Transliterator, to implement cloning.
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeFunctor* UnicodeSet::clone() const {
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return new UnicodeSet(*this);
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeFunctor *UnicodeSet::cloneAsThawed() const {
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return new UnicodeSet(*this, TRUE);
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Compares the specified object with this set for equality.  Returns
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <tt>true</tt> if the two sets
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * have the same size, and every member of the specified set is
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * contained in this set (or equivalently, every member of this set is
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * contained in the specified set).
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param o set to be compared for equality with this set.
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return <tt>true</tt> if the specified set is equal to this set.
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::operator==(const UnicodeSet& o) const {
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (len != o.len) return FALSE;
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i < len; ++i) {
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (list[i] != o.list[i]) return FALSE;
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (*strings != *o.strings) return FALSE;
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the hash code value for this set.
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the hash code value for this set.
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see Object#hashCode()
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::hashCode(void) const {
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t result = len;
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i < len; ++i) {
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result *= 1000003;
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result += list[i];
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Public API
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the number of elements in this set (its cardinality),
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Note than the elements of a set may include both individual
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * codepoints and strings.
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the number of elements in this set (its cardinality).
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::size(void) const {
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t n = 0;
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t count = getRangeCount();
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i < count; ++i) {
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        n += getRangeEnd(i) - getRangeStart(i) + 1;
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return n + strings->size();
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns <tt>true</tt> if this set contains no elements.
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return <tt>true</tt> if this set contains no elements.
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::isEmpty(void) const {
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return len == 1 && strings->size() == 0;
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains the given character.
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c character to be checked for containment
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::contains(UChar32 c) const {
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Set i to the index of the start item greater than ch
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // We know we will terminate without length test!
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // LATER: for large sets, add binary search
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //int32_t i = -1;
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //for (;;) {
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    if (c < list[++i]) break;
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //}
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (bmpSet != NULL) {
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return bmpSet->contains(c);
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (stringSpan != NULL) {
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return stringSpan->contains(c);
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c >= UNICODESET_HIGH) { // Don't need to check LOW bound
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = findCodePoint(c);
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (UBool)(i & 1); // return true if odd
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the smallest value i such that c < list[i].  Caller
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * must ensure that c is a legal value or this method will enter
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * an infinite loop.  This method performs a binary search.
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c a character in the range MIN_VALUE..MAX_VALUE
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * inclusive
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the smallest integer i in the range 0..len-1,
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * inclusive, such that c < list[i]
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::findCodePoint(UChar32 c) const {
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Examples:
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                       findCodePoint(c)
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       set              list[]         c=0 1 3 4 7 8
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       ===              ==============   ===========
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       []               [110000]         0 0 0 0 0 0
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       [\u0000-\u0003]  [0, 4, 110000]   1 1 1 2 2 2
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       [\u0004-\u0007]  [4, 8, 110000]   0 0 0 1 1 2
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       [:Any:]          [0, 110000]      1 1 1 1 1 1
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Return the smallest i such that c < list[i].  Assume
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c < list[0])
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // High runner test.  c is often after the last range, so an
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // initial check for this condition pays off.
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t lo = 0;
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t hi = len - 1;
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (lo >= hi || c >= list[hi-1])
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return hi;
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // invariant: c >= list[lo]
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // invariant: c < list[hi]
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t i = (lo + hi) >> 1;
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (i == lo) {
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break; // Found!
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (c < list[i]) {
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            hi = i;
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lo = i;
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return hi;
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains every character
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given range.
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of the range
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of the range
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::contains(UChar32 start, UChar32 end) const {
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //int32_t i = -1;
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //for (;;) {
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    if (start < list[++i]) break;
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //}
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = findCodePoint(start);
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ((i & 1) != 0 && end < list[i]);
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns <tt>true</tt> if this set contains the given
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * multicharacter string.
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s string to be checked for containment
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return <tt>true</tt> if this set contains the specified string
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::contains(const UnicodeString& s) const {
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (s.length() == 0) return FALSE;
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t cp = getSingleCP(s);
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (cp < 0) {
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return strings->contains((void*) &s);
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return contains((UChar32) cp);
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains all the characters and strings
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given set.
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set to be checked for containment
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsAll(const UnicodeSet& c) const {
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // The specified set is a subset if all of its pairs are contained in
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // this set.  It's possible to code this more efficiently in terms of
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // direct manipulation of the inversion lists if the need arises.
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t n = c.getRangeCount();
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int i=0; i<n; ++i) {
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!contains(c.getRangeStart(i), c.getRangeEnd(i))) {
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return FALSE;
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!strings->containsAll(*c.strings)) return FALSE;
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains all the characters
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given string.
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s string containing characters to be checked for containment
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsAll(const UnicodeString& s) const {
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_CONTAINED) ==
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                   s.length());
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains none of the characters
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given range.
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of the range
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of the range
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsNone(UChar32 start, UChar32 end) const {
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //int32_t i = -1;
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //for (;;) {
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    if (start < list[++i]) break;
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //}
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = findCodePoint(start);
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ((i & 1) == 0 && end < list[i]);
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains none of the characters and strings
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given set.
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set to be checked for containment
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsNone(const UnicodeSet& c) const {
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // The specified set is a subset if all of its pairs are contained in
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // this set.  It's possible to code this more efficiently in terms of
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // direct manipulation of the inversion lists if the need arises.
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t n = c.getRangeCount();
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; i<n; ++i) {
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!containsNone(c.getRangeStart(i), c.getRangeEnd(i))) {
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return FALSE;
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!strings->containsNone(*c.strings)) return FALSE;
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains none of the characters
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given string.
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s string containing characters to be checked for containment
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsNone(const UnicodeString& s) const {
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_NOT_CONTAINED) ==
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                   s.length());
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns <tt>true</tt> if this set contains any character whose low byte
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is the given value.  This is used by <tt>RuleBasedTransliterator</tt> for
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * indexing.
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::matchesIndexValue(uint8_t v) const {
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* The index value v, in the range [0,255], is contained in this set if
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * it is contained in any pair of this set.  Pairs either have the high
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * bytes equal, or unequal.  If the high bytes are equal, then we have
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * aaxx..aayy, where aa is the high byte.  Then v is contained if xx <=
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * v <= yy.  If the high bytes are unequal we have aaxx..bbyy, bb>aa.
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Then v is contained if xx <= v || v <= yy.  (This is identical to the
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * time zone month containment logic.)
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t rangeCount=getRangeCount();
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<rangeCount; ++i) {
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 low = getRangeStart(i);
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 high = getRangeEnd(i);
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if ((low & ~0xFF) == (high & ~0xFF)) {
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if ((low & 0xFF) <= v && v <= (high & 0xFF)) {
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return TRUE;
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if ((low & 0xFF) <= v || v <= (high & 0xFF)) {
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return TRUE;
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (strings->size() != 0) {
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i=0; i<strings->size(); ++i) {
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i);
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //if (s.length() == 0) {
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //    // Empty strings match everything
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //    return TRUE;
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //}
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // assert(s.length() != 0); // We enforce this elsewhere
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 c = s.char32At(0);
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if ((c & 0xFF) == v) {
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return TRUE;
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return FALSE;
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Implementation of UnicodeMatcher::matches().  Always matches the
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * longest possible multichar string.
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUMatchDegree UnicodeSet::matches(const Replaceable& text,
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 int32_t& offset,
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 int32_t limit,
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 UBool incremental) {
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (offset == limit) {
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Strings, if any, have length != 0, so we don't worry
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // about them here.  If we ever allow zero-length strings
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // we much check for them here.
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (contains(U_ETHER)) {
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return incremental ? U_PARTIAL_MATCH : U_MATCH;
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return U_MISMATCH;
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (strings->size() != 0) { // try strings first
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // might separate forward and backward loops later
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // for now they are combined
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // TODO Improve efficiency of this, at least in the forward
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // direction, if not in both.  In the forward direction we
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // can assume the strings are sorted.
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t i;
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UBool forward = offset < limit;
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // firstChar is the leftmost char to match in the
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // forward direction or the rightmost char to match in
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // the reverse direction.
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar firstChar = text.charAt(offset);
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // If there are multiple strings that can match we
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // return the longest match.
659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t highWaterLength = 0;
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for (i=0; i<strings->size(); ++i) {
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i);
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //if (trial.length() == 0) {
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    return U_MATCH; // null-string always matches
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //}
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // assert(trial.length() != 0); // We ensure this elsewhere
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UChar c = trial.charAt(forward ? 0 : trial.length() - 1);
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Strings are sorted, so we can optimize in the
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // forward direction.
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (forward && c > firstChar) break;
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (c != firstChar) continue;
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t matchLen = matchRest(text, offset, limit, trial);
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (incremental) {
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int32_t maxLen = forward ? limit-offset : offset-limit;
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (matchLen == maxLen) {
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // We have successfully matched but only up to limit.
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return U_PARTIAL_MATCH;
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (matchLen == trial.length()) {
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // We have successfully matched the whole string.
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (matchLen > highWaterLength) {
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        highWaterLength = matchLen;
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // In the forward direction we know strings
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // are sorted so we can bail early.
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (forward && matchLen < highWaterLength) {
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // We've checked all strings without a partial match.
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // If we have full matches, return the longest one.
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (highWaterLength != 0) {
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                offset += forward ? highWaterLength : -highWaterLength;
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return U_MATCH;
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return UnicodeFilter::matches(text, offset, limit, incremental);
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the longest match for s in text at the given position.
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If limit > start then match forward from start+1 to limit
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * matching all characters except s.charAt(0).  If limit < start,
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * go backward starting from start-1 matching all characters
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * except s.charAt(s.length()-1).  This method assumes that the
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * first character, text.charAt(start), matches s, so it does not
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * check it.
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param text the text to match
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start the first character to match.  In the forward
721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * direction, text.charAt(start) is matched against s.charAt(0).
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * In the reverse direction, it is matched against
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * s.charAt(s.length()-1).
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param limit the limit offset for matching, either last+1 in
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the forward direction, or last-1 in the reverse direction,
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * where last is the index of the last character to match.
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return If part of s matches up to the limit, return |limit -
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * start|.  If all of s matches before reaching the limit, return
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * s.length().  If there is a mismatch between s and text, return
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::matchRest(const Replaceable& text,
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              int32_t start, int32_t limit,
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const UnicodeString& s) {
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t maxLen;
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t slen = s.length();
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (start < limit) {
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        maxLen = limit - start;
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (maxLen > slen) maxLen = slen;
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i = 1; i < maxLen; ++i) {
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (text.charAt(start + i) != s.charAt(i)) return 0;
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        maxLen = start - limit;
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (maxLen > slen) maxLen = slen;
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        --slen; // <=> slen = s.length() - 1;
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i = 1; i < maxLen; ++i) {
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (text.charAt(start - i) != s.charAt(slen - i)) return 0;
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return maxLen;
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Implement of UnicodeMatcher
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::addMatchSetTo(UnicodeSet& toUnionTo) const {
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    toUnionTo.addAll(*this);
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the index of the given character within this set, where
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the set is ordered by ascending code point.  If the character
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is not in this set, return -1.  The inverse of this method is
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>charAt()</code>.
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return an index from 0..size()-1, or -1
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::indexOf(UChar32 c) const {
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c < MIN_VALUE || c > MAX_VALUE) {
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -1;
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = 0;
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t n = 0;
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 start = list[i++];
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (c < start) {
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -1;
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 limit = list[i++];
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (c < limit) {
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return n + c - start;
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        n += limit - start;
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the character at the given index within this set, where
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the set is ordered by ascending code point.  If the index is
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * out of range, return (UChar32)-1.  The inverse of this method is
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>indexOf()</code>.
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param index an index from 0..size()-1
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the character at the given index, or (UChar32)-1.
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 UnicodeSet::charAt(int32_t index) const {
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (index >= 0) {
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // len2 is the largest even integer <= len, that is, it is len
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // for even values and len-1 for odd values.  With odd values
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // the last entry is UNICODESET_HIGH.
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t len2 = len & ~1;
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (int32_t i=0; i < len2;) {
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 start = list[i++];
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t count = list[i++] - start;
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (index < count) {
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return (UChar32)(start + index);
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            index -= count;
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (UChar32)-1;
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Make this object represent the range <code>start - end</code>.
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If <code>end > start</code> then this object is set to an
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * an empty range.
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character in the set, inclusive
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @rparam end last character in the set, inclusive
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) {
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    clear();
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    complement(start, end);
825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds the specified range to this set if it is not already
830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * present.  If this set already contains the specified range,
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the call leaves this set unchanged.  If <code>end > start</code>
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then an empty range is added, leaving the set unchanged.
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range to be added
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this set.
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range to be added
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this set.
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::add(UChar32 start, UChar32 end) {
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pinCodePoint(start) < pinCodePoint(end)) {
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        add(range, 2, 0);
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if (start == end) {
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        add(start);
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// #define DEBUG_US_ADD
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef DEBUG_US_ADD
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h>
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid dump(UChar32 c) {
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c <= 0xFF) {
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("%c", (char)c);
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("U+%04X", c);
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid dump(const UChar32* list, int32_t len) {
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("[");
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; i<len; ++i) {
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (i != 0) printf(", ");
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        dump(list[i]);
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("]");
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds the specified character to this set if it is not already
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * present.  If this set already contains the specified character,
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the call leaves this set unchanged.
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::add(UChar32 c) {
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // find smallest i such that c < list[i]
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // if odd, then it is IN the set
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // if even, then it is OUT of the set
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = findCodePoint(pinCodePoint(c));
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // already in set?
882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if ((i & 1) != 0  || isFrozen() || isBogus()) return *this;
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // HIGH is 0x110000
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // assert(list[len-1] == HIGH);
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // empty = [HIGH]
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // [start_0, limit_0, start_1, limit_1, HIGH]
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //                             ^
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //                             list[i]
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // i == 0 means c is before the first range
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef DEBUG_US_ADD
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("Add of ");
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dump(c);
899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf(" found at %d", i);
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf(": ");
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dump(list, len);
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf(" => ");
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c == list[i]-1) {
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // c is before start of next range
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        list[i] = c;
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // if we touched the HIGH mark, then add a new one
909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (c == (UNICODESET_HIGH - 1)) {
910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UErrorCode status = U_ZERO_ERROR;
911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            ensureCapacity(len+1, status);
912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (U_FAILURE(status)) {
913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return *this; // There is no way to report this error :-(
914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            list[len++] = UNICODESET_HIGH;
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (i > 0 && c == list[i-1]) {
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // collapse adjacent ranges
919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // [..., start_k-1, c, c, limit_k, ..., HIGH]
921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //                     ^
922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //                     list[i]
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //for (int32_t k=i-1; k<len-2; ++k) {
925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //    list[k] = list[k+2];
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //}
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32* dst = list + i - 1;
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32* src = dst + 2;
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32* srclimit = list + len;
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while (src < srclimit) *(dst++) = *(src++);
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            len -= 2;
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else if (i > 0 && c == list[i-1]) {
937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // c is after end of prior range
938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        list[i-1]++;
939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // no need to check for collapse here
940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // At this point we know the new char is not adjacent to
944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // any existing ranges, and it is not 10FFFF.
945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //                             ^
949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //                             list[i]
950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH]
952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //                             ^
953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //                             list[i]
954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        ensureCapacity(len+2, status);
957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (U_FAILURE(status)) {
958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return *this; // There is no way to report this error :-(
959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //for (int32_t k=len-1; k>=i; --k) {
962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //    list[k+2] = list[k];
963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //}
964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32* src = list + len;
965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32* dst = src + 2;
966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32* srclimit = list + i;
967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while (src > srclimit) *(--dst) = *(--src);
968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        list[i] = c;
970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        list[i+1] = c+1;
971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        len += 2;
972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef DEBUG_US_ADD
975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dump(list, len);
976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("\n");
977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=1; i<len; ++i) {
979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (list[i] <= list[i-1]) {
980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Corrupt array!
981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printf("ERROR: list has been corrupted\n");
982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            exit(1);
983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds the specified multicharacter to this set if it is not already
993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * present.  If this set already contains the multicharacter,
994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the call leaves this set unchanged.
995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Thus "ch" => {"ch"}
996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s the source string
998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining
999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::add(const UnicodeString& s) {
1001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (s.length() == 0 || isFrozen() || isBogus()) return *this;
1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t cp = getSingleCP(s);
1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (cp < 0) {
1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!strings->contains((void*) &s)) {
1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _add(s);
1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            releasePattern();
1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        add((UChar32)cp);
1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds the given string, in order, to 'strings'.  The given string
1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * must have been checked by the caller to not be empty and to not
1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * already be in 'strings'.
1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::_add(const UnicodeString& s) {
1020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString* t = new UnicodeString(s);
1024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (t == NULL) { // Check for memory allocation error.
1025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
1026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
1027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    strings->sortedInsert(t, compareUnicodeString, ec);
1030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(ec)) {
1031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
1032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        delete t;
1033c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a code point IF the string consists of a single one.
1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * otherwise returns -1.
1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param string to test
1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::getSingleCP(const UnicodeString& s) {
1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //if (s.length() < 1) {
1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //}
1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (s.length() > 2) return -1;
1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (s.length() == 1) return s.charAt(0);
1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // at this point, len = 2
1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 cp = s.char32At(0);
1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (cp > 0xFFFF) { // is surrogate pair
1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return cp;
1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return -1;
1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this set already any particular character, it has no effect on that character.
1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string
1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining
1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::addAll(const UnicodeString& s) {
1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 cp;
1064103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) {
1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        cp = s.char32At(i);
1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        add(cp);
1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this set already any particular character, it has no effect on that character.
1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string
1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining
1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::retainAll(const UnicodeString& s) {
1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set;
1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(s);
1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    retainAll(set);
1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this set already any particular character, it has no effect on that character.
1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string
1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining
1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complementAll(const UnicodeString& s) {
1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set;
1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(s);
1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    complementAll(set);
1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this set already any particular character, it has no effect on that character.
1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string
1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining
1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::removeAll(const UnicodeString& s) {
1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set;
1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(s);
1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    removeAll(set);
1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUnicodeSet& UnicodeSet::removeAllStrings() {
1111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    strings->removeAllElements();
1112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return *this;
1113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string
1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a newly created set containing the given string
1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet* U_EXPORT2 UnicodeSet::createFrom(const UnicodeString& s) {
1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *set = new UnicodeSet();
1124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (set != NULL) { // Check for memory allocation error.
1125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        set->add(s);
1126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return set;
1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string
1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a newly created set containing the given characters
1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet* U_EXPORT2 UnicodeSet::createFromAll(const UnicodeString& s) {
1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *set = new UnicodeSet();
1138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (set != NULL) { // Check for memory allocation error.
1139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        set->addAll(s);
1140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return set;
1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Retain only the elements in this set that are contained in the
1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified range.  If <code>end > start</code> then an empty range is
1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * retained, leaving the set empty.
1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range to be retained
1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this set.
1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range to be retained
1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this set.
1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::retain(UChar32 start, UChar32 end) {
1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pinCodePoint(start) <= pinCodePoint(end)) {
1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retain(range, 2, 0);
1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        clear();
1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::retain(UChar32 c) {
1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return retain(c, c);
1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes the specified range from this set if it is present.
1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set will not contain the specified range once the call
1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns.  If <code>end > start</code> then an empty range is
1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * removed, leaving the set unchanged.
1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range to be removed
1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from this set.
1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range to be removed
1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from this set.
1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::remove(UChar32 start, UChar32 end) {
1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pinCodePoint(start) <= pinCodePoint(end)) {
1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retain(range, 2, 2);
1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes the specified character from this set if it is present.
1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set will not contain the specified range once the call
1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns.
1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::remove(UChar32 c) {
1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return remove(c, c);
1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes the specified string from this set if it is present.
1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set will not contain the specified character once the call
1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns.
1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string
1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining
1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
1204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (s.length() == 0 || isFrozen() || isBogus()) return *this;
1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t cp = getSingleCP(s);
1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (cp < 0) {
1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        strings->removeElement((void*) &s);
1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        releasePattern();
1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        remove((UChar32)cp, (UChar32)cp);
1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Complements the specified range in this set.  Any character in
1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the range will be removed if it is in this set, or will be
1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * added if it is not in this set.  If <code>end > start</code>
1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then an empty range is xor'ed, leaving the set unchanged.
1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range to be removed
1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from this set.
1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range to be removed
1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from this set.
1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complement(UChar32 start, UChar32 end) {
1227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pinCodePoint(start) <= pinCodePoint(end)) {
1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exclusiveOr(range, 2, 0);
1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complement(UChar32 c) {
1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return complement(c, c);
1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is equivalent to
1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complement(void) {
1247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (list[0] == UNICODESET_LOW) {
1252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        ensureBufferCapacity(len-1, status);
1253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (U_FAILURE(status)) {
1254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return *this;
1255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_memcpy(buffer, list + 1, (len-1)*sizeof(UChar32));
1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        --len;
1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        ensureBufferCapacity(len+1, status);
1260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (U_FAILURE(status)) {
1261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return *this;
1262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_memcpy(buffer + 1, list, len*sizeof(UChar32));
1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        buffer[0] = UNICODESET_LOW;
1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ++len;
1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    swapBuffers();
1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Complement the specified string in this set.
1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set will not contain the specified string once the call
1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns.
1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s the string to complement
1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return this object, for chaining
1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
1281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (s.length() == 0 || isFrozen() || isBogus()) return *this;
1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t cp = getSingleCP(s);
1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (cp < 0) {
1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (strings->contains((void*) &s)) {
1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            strings->removeElement((void*) &s);
1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _add(s);
1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        releasePattern();
1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complement((UChar32)cp, (UChar32)cp);
1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds all of the elements in the specified set to this set if
1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * they're not already present.  This operation effectively
1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * modifies this set so that its value is the <i>union</i> of the two
1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * sets.  The behavior of this operation is unspecified if the specified
1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * collection is modified while the operation is in progress.
1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set whose elements are to be added to this set.
1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #add(char, char)
1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::addAll(const UnicodeSet& c) {
1307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if ( c.len>0 && c.list!=NULL ) {
1308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        add(c.list, c.len, 0);
1309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Add strings in order
1312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if ( c.strings!=NULL ) {
1313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        for (int32_t i=0; i<c.strings->size(); ++i) {
1314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            const UnicodeString* s = (const UnicodeString*)c.strings->elementAt(i);
1315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (!strings->contains((void*) s)) {
1316c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                _add(*s);
1317c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Retains only the elements in this set that are contained in the
1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified set.  In other words, removes from this set all of
1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * its elements that are not contained in the specified set.  This
1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * operation effectively modifies this set so that its value is
1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the <i>intersection</i> of the two sets.
1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set that defines which elements this set will retain.
1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) {
1333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    retain(c.list, c.len, 0);
1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    strings->retainAll(*c.strings);
1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes from this set all of its elements that are contained in the
1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified set.  This operation effectively modifies this
1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * set so that its value is the <i>asymmetric set difference</i> of
1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the two sets.
1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set that defines which elements will be removed from
1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *          this set.
1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::removeAll(const UnicodeSet& c) {
1351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    retain(c.list, c.len, 2);
1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    strings->removeAll(*c.strings);
1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Complements in this set all elements contained in the specified
1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * set.  Any character in the other set will be removed if it is
1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * in this set, or will be added if it is not in this set.
1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set that defines which elements will be xor'ed from
1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *          this set.
1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) {
1368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exclusiveOr(c.list, c.len, 0);
1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; i<c.strings->size(); ++i) {
1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        void* e = c.strings->elementAt(i);
1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!strings->removeElement(e)) {
1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _add(*(const UnicodeString*)e);
1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes all of the elements from this set.  This set will be
1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * empty after this call returns.
1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::clear(void) {
1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (isFrozen()) {
1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (list != NULL) {
1391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        list[0] = UNICODESET_HIGH;
1392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len = 1;
1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
1395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (strings != NULL) {
1396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        strings->removeAllElements();
1397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (list != NULL && strings != NULL) {
1399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Remove bogus
1400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        fFlags = 0;
1401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Iteration method that returns the number of ranges contained in
1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * this set.
1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeStart
1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeEnd
1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::getRangeCount() const {
1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return len/2;
1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Iteration method that returns the first character in the
1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified range of this set.
1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeCount
1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeEnd
1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 UnicodeSet::getRangeStart(int32_t index) const {
1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return list[index*2];
1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Iteration method that returns the last character in the
1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified range of this set.
1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeStart
1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeEnd
1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 UnicodeSet::getRangeEnd(int32_t index) const {
1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return list[index*2 + 1] - 1;
1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::getStringCount() const {
1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return strings->size();
1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UnicodeString* UnicodeSet::getString(int32_t index) const {
1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (const UnicodeString*) strings->elementAt(index);
1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Reallocate this objects internal structures to take up the least
1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * possible space, without changing this object's value.
1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::compact() {
1448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Delete buffer first to defragment memory less.
1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (buffer != NULL) {
1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(buffer);
1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        buffer = NULL;
1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (len < capacity) {
1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Make the capacity equal to len or 1.
1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // We don't want to realloc of 0 size.
1459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        int32_t newCapacity = len + (len == 0);
1460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * newCapacity);
1461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (temp) {
1462c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            list = temp;
1463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            capacity = newCapacity;
1464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
1465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // else what the heck happened?! We allocated less memory!
1466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Oh well. We'll keep our original array.
1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const {
1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t bmpLength, length, destLength;
1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (destCapacity<0 || (destCapacity>0 && dest==NULL)) {
1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ec=U_ILLEGAL_ARGUMENT_ERROR;
1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* count necessary 16-bit units */
1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    length=this->len-1; // Subtract 1 to ignore final UNICODESET_HIGH
1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // assert(length>=0);
1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (length==0) {
1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* empty set */
1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (destCapacity>0) {
1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest=0;
1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ec=U_BUFFER_OVERFLOW_ERROR;
1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 1;
1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* now length>0 */
1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (this->list[length-1]<=0xffff) {
1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* all BMP */
1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bmpLength=length;
1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if (this->list[0]>=0x10000) {
1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* all supplementary */
1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bmpLength=0;
1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        length*=2;
1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* some BMP, some supplementary */
1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (bmpLength=0; bmpLength<length && this->list[bmpLength]<=0xffff; ++bmpLength) {}
1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        length=bmpLength+2*(length-bmpLength);
1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* length: number of 16-bit array units */
1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (length>0x7fff) {
1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* there are only 15 bits for the length in the first serialized word */
1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ec=U_INDEX_OUTOFBOUNDS_ERROR;
1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * total serialized length:
1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * number of 16-bit array units (length) +
1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * 1 length unit (always) +
1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * 1 bmpLength unit (if there are supplementary values)
1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    destLength=length+((length>bmpLength)?2:1);
1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (destLength<=destCapacity) {
1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar32 *p;
1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t i;
1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *dest=(uint16_t)length;
1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (length>bmpLength) {
1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest|=0x8000;
1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *++dest=(uint16_t)bmpLength;
1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ++dest;
1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* write the BMP part of the array */
1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        p=this->list;
1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i=0; i<bmpLength; ++i) {
1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest++=(uint16_t)*p++;
1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* write the supplementary part of the array */
1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (; i<length; i+=2) {
1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest++=(uint16_t)(*p>>16);
1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest++=(uint16_t)*p++;
1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ec=U_BUFFER_OVERFLOW_ERROR;
1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return destLength;
1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Implementation: Utility methods
1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Allocate our strings vector and return TRUE if successful.
1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::allocateStrings(UErrorCode &status) {
1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1563103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    strings = new UVector(uprv_deleteUObject,
1564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          uhash_compareUnicodeString, 1, status);
1565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (strings == NULL) { // Check for memory allocation error.
1566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_MEMORY_ALLOCATION_ERROR;
1567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return FALSE;
1568c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
1570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete strings;
1571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        strings = NULL;
1572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
1573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid UnicodeSet::ensureCapacity(int32_t newLen, UErrorCode& ec) {
1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (newLen <= capacity)
1579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1580c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * (newLen + GROW_EXTRA));
1581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (temp == NULL) {
1582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        ec = U_MEMORY_ALLOCATION_ERROR;
1583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
1584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
1585c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list = temp;
1587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    capacity = newLen + GROW_EXTRA;
1588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // else we keep the original contents on the memory failure.
1589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid UnicodeSet::ensureBufferCapacity(int32_t newLen, UErrorCode& ec) {
1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (buffer != NULL && newLen <= bufferCapacity)
1593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar32* temp = (UChar32*) uprv_realloc(buffer, sizeof(UChar32) * (newLen + GROW_EXTRA));
1595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (temp == NULL) {
1596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        ec = U_MEMORY_ALLOCATION_ERROR;
1597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
1598c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
1599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    buffer = temp;
1601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bufferCapacity = newLen + GROW_EXTRA;
1602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // else we keep the original contents on the memory failure.
1603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Swap list and buffer.
1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::swapBuffers(void) {
1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // swap list and buffer
1610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32* temp = list;
1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list = buffer;
1612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buffer = temp;
1613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c = capacity;
1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    capacity = bufferCapacity;
1616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bufferCapacity = c;
1617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid UnicodeSet::setToBogus() {
1620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    clear(); // Remove everything in the set.
1621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFlags = kIsBogus;
1622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Implementation: Fundamental operators
1626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
1627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UChar32 max(UChar32 a, UChar32 b) {
1629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (a > b) ? a : b;
1630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 0, 3 is normal: x xor y
1633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 1, 2: x xor ~y == x === y
1634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity) {
1636c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
1640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    ensureBufferCapacity(len + otherLen, status);
1641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
1642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
1643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = 0, j = 0, k = 0;
1646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 a = list[i++];
1647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 b;
1648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (polarity == 1 || polarity == 2) {
1649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b = UNICODESET_LOW;
1650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (other[j] == UNICODESET_LOW) { // skip base if already LOW
1651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++j;
1652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            b = other[j];
1653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b = other[j++];
1656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // simplest of all the routines
1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // sort the values, discarding identicals!
1659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
1660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (a < b) {
1661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buffer[k++] = a;
1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            a = list[i++];
1663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (b < a) {
1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buffer[k++] = b;
1665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            b = other[j++];
1666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (a != UNICODESET_HIGH) { // at this point, a == b
1667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // discard both values!
1668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            a = list[i++];
1669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            b = other[j++];
1670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else { // DONE!
1671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buffer[k++] = UNICODESET_HIGH;
1672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            len = k;
1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    swapBuffers();
1677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
1678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 0 is normal: x union y
1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 2: x union ~y
1682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 1: ~x union y
1683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 3: ~x union ~y
1684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) {
1686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus() || other==NULL) {
1687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
1690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    ensureBufferCapacity(len + otherLen, status);
1691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
1692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
1693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = 0, j = 0, k = 0;
1696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 a = list[i++];
1697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 b = other[j++];
1698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // change from xor is that we have to check overlapping pairs
1699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // polarity bit 1 means a is second, bit 2 means b is.
1700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
1701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        switch (polarity) {
1702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          case 0: // both first; take lower if unequal
1703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (a < b) { // take a
1704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Back up over overlapping ranges in buffer[]
1705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (k > 0 && a <= buffer[k-1]) {
1706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Pick latter end value in buffer[] vs. list[]
1707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    a = max(list[i], buffer[--k]);
1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // No overlap
1710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    buffer[k++] = a;
1711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    a = list[i];
1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                i++; // Common if/else code factored out
1714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if (b < a) { // take b
1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (k > 0 && b <= buffer[k-1]) {
1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    b = max(other[j], buffer[--k]);
1718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    buffer[k++] = b;
1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    b = other[j];
1721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                j++;
1723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else { // a == b, take a, drop b
1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (a == UNICODESET_HIGH) goto loop_end;
1726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // This is symmetrical; it doesn't matter if
1727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // we backtrack with a or b. - liu
1728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (k > 0 && a <= buffer[k-1]) {
1729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    a = max(list[i], buffer[--k]);
1730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // No overlap
1732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    buffer[k++] = a;
1733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    a = list[i];
1734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                i++;
1736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          case 3: // both second; take higher if unequal, and drop other
1742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (b <= a) { // take a
1743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (a == UNICODESET_HIGH) goto loop_end;
1744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = a;
1745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else { // take b
1746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (b == UNICODESET_HIGH) goto loop_end;
1747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = b;
1748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            a = list[i++];
1750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            polarity ^= 1;   // factored common code
1751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            b = other[j++];
1752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            polarity ^= 2;
1753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          case 1: // a second, b first; if b < a, overlap
1755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (a < b) { // no overlap, take a
1756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = a; a = list[i++]; polarity ^= 1;
1757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if (b < a) { // OVERLAP, drop b
1758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else { // a == b, drop both!
1761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (a == UNICODESET_HIGH) goto loop_end;
1762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          case 2: // a first, b second; if a < b, overlap
1769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (b < a) { // no overlap, take b
1770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = b;
1771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else  if (a < b) { // OVERLAP, drop a
1774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else { // a == b, drop both!
1777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (a == UNICODESET_HIGH) goto loop_end;
1778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru loop_end:
1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buffer[k++] = UNICODESET_HIGH;    // terminate
1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len = k;
1789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    swapBuffers();
1790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 0 is normal: x intersect y
1794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 2: x intersect ~y == set-minus
1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 1: ~x intersect y
1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 3: ~x intersect ~y
1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity) {
1799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
1803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    ensureBufferCapacity(len + otherLen, status);
1804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
1805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
1806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = 0, j = 0, k = 0;
1809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 a = list[i++];
1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 b = other[j++];
1811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // change from xor is that we have to check overlapping pairs
1812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // polarity bit 1 means a is second, bit 2 means b is.
1813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
1814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        switch (polarity) {
1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          case 0: // both first; drop the smaller
1816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (a < b) { // drop a
1817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if (b < a) { // drop b
1820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else { // a == b, take one, drop other
1823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (a == UNICODESET_HIGH) goto loop_end;
1824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = a;
1825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          case 3: // both second; take lower if unequal
1832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (a < b) { // take a
1833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = a;
1834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if (b < a) { // take b
1837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = b;
1838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else { // a == b, take one, drop other
1841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (a == UNICODESET_HIGH) goto loop_end;
1842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = a;
1843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          case 1: // a second, b first;
1850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (a < b) { // NO OVERLAP, drop a
1851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if (b < a) { // OVERLAP, take b
1854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = b;
1855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else { // a == b, drop both!
1858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (a == UNICODESET_HIGH) goto loop_end;
1859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          case 2: // a first, b second; if a < b, overlap
1866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (b < a) { // no overlap, drop b
1867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else  if (a < b) { // OVERLAP, take a
1870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = a;
1871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else { // a == b, drop both!
1874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (a == UNICODESET_HIGH) goto loop_end;
1875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru loop_end:
1884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buffer[k++] = UNICODESET_HIGH;    // terminate
1885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len = k;
1886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    swapBuffers();
1887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
1888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Append the <code>toPattern()</code> representation of a
1892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * string to the given <code>StringBuffer</code>.
1893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::_appendToPat(UnicodeString& buf, const UnicodeString& s, UBool
1895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruescapeUnprintable) {
1896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 cp;
1897103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) {
1898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        _appendToPat(buf, cp = s.char32At(i), escapeUnprintable);
1899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Append the <code>toPattern()</code> representation of a
1904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * character to the given <code>StringBuffer</code>.
1905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool
1907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruescapeUnprintable) {
1908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (escapeUnprintable && ICU_Utility::isUnprintable(c)) {
1909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything
1910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // unprintable
1911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (ICU_Utility::escapeUnprintable(buf, c)) {
1912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return;
1913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Okay to let ':' pass through
1916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch (c) {
1917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case SET_OPEN:
1918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case SET_CLOSE:
1919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case HYPHEN:
1920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case COMPLEMENT:
1921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case INTERSECTION:
1922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case BACKSLASH:
1923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case OPEN_BRACE:
1924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case CLOSE_BRACE:
1925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case COLON:
1926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case SymbolTable::SYMBOL_REF:
1927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        buf.append(BACKSLASH);
1928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
1929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
1930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Escape whitespace
1931b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (PatternProps::isWhiteSpace(c)) {
1932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf.append(BACKSLASH);
1933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
1935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buf.append(c);
1937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Append a string representation of this set to result.  This will be
1941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a cleaned version of the string passed to applyPattern(), if there
1942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is one.  Otherwise it will be generated.
1943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
1945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                      UBool escapeUnprintable) const
1946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
1947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pat != NULL) {
1948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t i;
1949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t backslashCount = 0;
1950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i=0; i<patLen; ) {
1951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 c;
1952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U16_NEXT(pat, i, patLen, c);
1953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (escapeUnprintable && ICU_Utility::isUnprintable(c)) {
1954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // If the unprintable character is preceded by an odd
1955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // number of backslashes, then it has been escaped.
1956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Before unescaping it, we delete the final
1957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // backslash.
1958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if ((backslashCount % 2) == 1) {
1959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    result.truncate(result.length() - 1);
1960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ICU_Utility::escapeUnprintable(result, c);
1962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                backslashCount = 0;
1963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                result.append(c);
1965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (c == BACKSLASH) {
1966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++backslashCount;
1967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    backslashCount = 0;
1969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return result;
1973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return _generatePattern(result, escapeUnprintable);
1976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a string representation of this set.  If the result of
1980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * calling this function is passed to a UnicodeSet constructor, it
1981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * will produce another set that is equal to this one.
1982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& UnicodeSet::toPattern(UnicodeString& result,
1984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     UBool escapeUnprintable) const
1985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
1986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result.truncate(0);
1987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return _toPattern(result, escapeUnprintable);
1988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Generate and append a string representation of this set to result.
1992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This does not use this.pat, the cleaned up copy of the string
1993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * passed to applyPattern().
1994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
1996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                            UBool escapeUnprintable) const
1997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
1998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result.append(SET_OPEN);
1999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  // Check against the predefined categories.  We implicitly build
2001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  // up ALL category sets the first time toPattern() is called.
2002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) {
2003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      if (*this == getCategorySet(cat)) {
2004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//          result.append(COLON);
2005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//          result.append(CATEGORY_NAMES, cat*2, 2);
2006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//          return result.append(CATEGORY_CLOSE);
2007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      }
2008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  }
2009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t count = getRangeCount();
2011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // If the set contains at least 2 intervals and includes both
2013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // MIN_VALUE and MAX_VALUE, then the inverse representation will
2014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // be more economical.
2015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (count > 1 &&
2016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        getRangeStart(0) == MIN_VALUE &&
2017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        getRangeEnd(count-1) == MAX_VALUE) {
2018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Emit the inverse
2020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result.append(COMPLEMENT);
2021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (int32_t i = 1; i < count; ++i) {
2023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 start = getRangeEnd(i-1)+1;
2024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 end = getRangeStart(i)-1;
2025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _appendToPat(result, start, escapeUnprintable);
2026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (start != end) {
2027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if ((start+1) != end) {
2028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    result.append(HYPHEN);
2029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                _appendToPat(result, end, escapeUnprintable);
2031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Default; emit the ranges as pairs
2036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
2037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (int32_t i = 0; i < count; ++i) {
2038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 start = getRangeStart(i);
2039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 end = getRangeEnd(i);
2040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _appendToPat(result, start, escapeUnprintable);
2041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (start != end) {
2042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if ((start+1) != end) {
2043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    result.append(HYPHEN);
2044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                _appendToPat(result, end, escapeUnprintable);
2046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i<strings->size(); ++i) {
2051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result.append(OPEN_BRACE);
2052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        _appendToPat(result,
2053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     *(const UnicodeString*) strings->elementAt(i),
2054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     escapeUnprintable);
2055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result.append(CLOSE_BRACE);
2056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result.append(SET_CLOSE);
2058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Release existing cached pattern
2062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::releasePattern() {
2064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pat) {
2065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(pat);
2066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pat = NULL;
2067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        patLen = 0;
2068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Set the new pattern to cache.
2073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::setPattern(const UnicodeString& newPat) {
2075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
2076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t newPatLen = newPat.length();
2077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pat = (UChar *)uprv_malloc((newPatLen + 1) * sizeof(UChar));
2078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pat) {
2079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        patLen = newPatLen;
2080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        newPat.extractBetween(0, patLen, pat);
2081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pat[patLen] = 0;
2082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // else we don't care if malloc failed. This was just a nice cache.
2084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // We can regenerate an equivalent pattern later when requested.
2085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeFunctor *UnicodeSet::freeze() {
2088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(!isFrozen() && !isBogus()) {
2089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Do most of what compact() does before freezing because
2090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // compact() will not work when the set is frozen.
2091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Small modification: Don't shrink if the savings would be tiny (<=GROW_EXTRA).
2092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Delete buffer first to defragment memory less.
2094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (buffer != NULL) {
2095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(buffer);
2096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buffer = NULL;
2097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (capacity > (len + GROW_EXTRA)) {
2099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Make the capacity equal to len or 1.
2100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // We don't want to realloc of 0 size.
2101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            capacity = len + (len == 0);
2102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            list = (UChar32*) uprv_realloc(list, sizeof(UChar32) * capacity);
2103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (list == NULL) { // Check for memory allocation error.
2104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                setToBogus();
2105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return this;
2106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
2107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Optimize contains() and span() and similar functions.
2110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!strings->isEmpty()) {
2111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            stringSpan = new UnicodeSetStringSpan(*this, *strings, UnicodeSetStringSpan::ALL);
2112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (stringSpan != NULL && !stringSpan->needsStringSpanUTF16()) {
2113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // All strings are irrelevant for span() etc. because
2114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // all of each string's code points are contained in this set.
2115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Do not check needsStringSpanUTF8() because UTF-8 has at most as
2116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // many relevant strings as UTF-16.
2117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // (Thus needsStringSpanUTF8() implies needsStringSpanUTF16().)
2118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delete stringSpan;
2119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                stringSpan = NULL;
2120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (stringSpan == NULL) {
2123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // No span-relevant strings: Optimize for code point spans.
2124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            bmpSet=new BMPSet(list, len);
2125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (bmpSet == NULL) { // Check for memory allocation error.
2126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                setToBogus();
2127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
2128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return this;
2131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
2134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length>0 && bmpSet!=NULL) {
2135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (int32_t)(bmpSet->span(s, s+length, spanCondition)-s);
2136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length<0) {
2138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        length=u_strlen(s);
2139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length==0) {
2141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
2142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(stringSpan!=NULL) {
2144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return stringSpan->span(s, length, spanCondition);
2145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(!strings->isEmpty()) {
2146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
2147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeSetStringSpan::FWD_UTF16_NOT_CONTAINED :
2148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeSetStringSpan::FWD_UTF16_CONTAINED;
2149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetStringSpan strSpan(*this, *strings, which);
2150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(strSpan.needsStringSpanUTF16()) {
2151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return strSpan.span(s, length, spanCondition);
2152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 c;
2160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t start=0, prev=0;
2161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    do {
2162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U16_NEXT(s, start, length, c);
2163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(spanCondition!=contains(c)) {
2164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
2165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } while((prev=start)<length);
2167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return prev;
2168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
2171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length>0 && bmpSet!=NULL) {
2172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (int32_t)(bmpSet->spanBack(s, s+length, spanCondition)-s);
2173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length<0) {
2175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        length=u_strlen(s);
2176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length==0) {
2178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
2179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(stringSpan!=NULL) {
2181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return stringSpan->spanBack(s, length, spanCondition);
2182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(!strings->isEmpty()) {
2183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
2184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeSetStringSpan::BACK_UTF16_NOT_CONTAINED :
2185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeSetStringSpan::BACK_UTF16_CONTAINED;
2186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetStringSpan strSpan(*this, *strings, which);
2187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(strSpan.needsStringSpanUTF16()) {
2188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return strSpan.spanBack(s, length, spanCondition);
2189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 c;
2197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t prev=length;
2198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    do {
2199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U16_PREV(s, 0, length, c);
2200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(spanCondition!=contains(c)) {
2201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
2202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } while((prev=length)>0);
2204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return prev;
2205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const {
2208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length>0 && bmpSet!=NULL) {
2209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const uint8_t *s0=(const uint8_t *)s;
2210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (int32_t)(bmpSet->spanUTF8(s0, length, spanCondition)-s0);
2211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length<0) {
221350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        length=(int32_t)uprv_strlen(s);
2214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length==0) {
2216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
2217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(stringSpan!=NULL) {
2219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return stringSpan->spanUTF8((const uint8_t *)s, length, spanCondition);
2220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(!strings->isEmpty()) {
2221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
2222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeSetStringSpan::FWD_UTF8_NOT_CONTAINED :
2223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeSetStringSpan::FWD_UTF8_CONTAINED;
2224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetStringSpan strSpan(*this, *strings, which);
2225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(strSpan.needsStringSpanUTF8()) {
2226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return strSpan.spanUTF8((const uint8_t *)s, length, spanCondition);
2227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 c;
2235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t start=0, prev=0;
2236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    do {
22378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        U8_NEXT_OR_FFFD(s, start, length, c);
2238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(spanCondition!=contains(c)) {
2239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
2240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } while((prev=start)<length);
2242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return prev;
2243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const {
2246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length>0 && bmpSet!=NULL) {
2247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const uint8_t *s0=(const uint8_t *)s;
2248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return bmpSet->spanBackUTF8(s0, length, spanCondition);
2249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length<0) {
225150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        length=(int32_t)uprv_strlen(s);
2252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length==0) {
2254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
2255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(stringSpan!=NULL) {
2257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return stringSpan->spanBackUTF8((const uint8_t *)s, length, spanCondition);
2258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(!strings->isEmpty()) {
2259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
2260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeSetStringSpan::BACK_UTF8_NOT_CONTAINED :
2261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeSetStringSpan::BACK_UTF8_CONTAINED;
2262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetStringSpan strSpan(*this, *strings, which);
2263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(strSpan.needsStringSpanUTF8()) {
2264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return strSpan.spanBackUTF8((const uint8_t *)s, length, spanCondition);
2265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 c;
2273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t prev=length;
2274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    do {
22758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        U8_PREV_OR_FFFD(s, 0, length, c);
2276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(spanCondition!=contains(c)) {
2277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
2278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } while((prev=length)>0);
2280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return prev;
2281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END
2284