1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Copyright (C) 1999-2011, International Business Machines
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Date        Name        Description
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   10/20/99    alan        Creation.
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h"
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/parsepos.h"
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/symtable.h"
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ruleiter.h"
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h"
18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "patternprops.h"
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uhash.h"
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "util.h"
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h"
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "charstr.h"
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustrfmt.h"
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uassert.h"
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "hash.h"
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "bmpset.h"
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unisetspan.h"
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Define UChar constants using hex for EBCDIC compatibility
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Used #define to reduce private static exports and memory access time.
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define SET_OPEN        ((UChar)0x005B) /*[*/
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define SET_CLOSE       ((UChar)0x005D) /*]*/
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define HYPHEN          ((UChar)0x002D) /*-*/
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define COMPLEMENT      ((UChar)0x005E) /*^*/
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define COLON           ((UChar)0x003A) /*:*/
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BACKSLASH       ((UChar)0x005C) /*\*/
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define INTERSECTION    ((UChar)0x0026) /*&*/
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UPPER_U         ((UChar)0x0055) /*U*/
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LOWER_U         ((UChar)0x0075) /*u*/
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define OPEN_BRACE      ((UChar)123)    /*{*/
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define CLOSE_BRACE     ((UChar)125)    /*}*/
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UPPER_P         ((UChar)0x0050) /*P*/
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LOWER_P         ((UChar)0x0070) /*p*/
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UPPER_N         ((UChar)78)     /*N*/
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define EQUALS          ((UChar)0x003D) /*=*/
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// HIGH_VALUE > all valid values. 110000 for codepoints
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UNICODESET_HIGH 0x0110000
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// LOW <= all valid values. ZERO for codepoints
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UNICODESET_LOW 0x000000
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// initial storage. Must be >= 0
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define START_EXTRA 16
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// extra amount for growth. Must be >= 0
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define GROW_EXTRA START_EXTRA
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruSymbolTable::~SymbolTable() {}
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSet)
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Modify the given UChar32 variable so that it is in range, by
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pinning values < UNICODESET_LOW to UNICODESET_LOW, and
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pinning values > UNICODESET_HIGH-1 to UNICODESET_HIGH-1.
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It modifies its argument in-place and also returns it.
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UChar32 pinCodePoint(UChar32& c) {
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c < UNICODESET_LOW) {
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c = UNICODESET_LOW;
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if (c > (UNICODESET_HIGH-1)) {
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c = (UNICODESET_HIGH-1);
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return c;
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Debugging
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// DO NOT DELETE THIS CODE.  This code is used to debug memory leaks.
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// To enable the debugging, define the symbol DEBUG_MEM in the line
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// below.  This will result in text being sent to stdout that looks
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// like this:
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   DEBUG UnicodeSet: ct 0x00A39B20; 397 [\u0A81-\u0A83\u0A85-
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   DEBUG UnicodeSet: dt 0x00A39B20; 396 [\u0A81-\u0A83\u0A85-
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Each line lists a construction (ct) or destruction (dt) event, the
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// object address, the number of outstanding objects after the event,
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// and the pattern of the object in question.
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// #define DEBUG_MEM
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef DEBUG_MEM
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h>
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t _dbgCount = 0;
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline void _dbgct(UnicodeSet* set) {
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString str;
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set->toPattern(str, TRUE);
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char buf[40];
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    str.extract(0, 39, buf, "");
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("DEBUG UnicodeSet: ct 0x%08X; %d %s\n", set, ++_dbgCount, buf);
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline void _dbgdt(UnicodeSet* set) {
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString str;
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set->toPattern(str, TRUE);
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char buf[40];
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    str.extract(0, 39, buf, "");
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("DEBUG UnicodeSet: dt 0x%08X; %d %s\n", set, --_dbgCount, buf);
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#else
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _dbgct(set)
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _dbgdt(set)
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// UnicodeString in UVector support
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void U_CALLCONV cloneUnicodeString(UHashTok *dst, UHashTok *src) {
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dst->pointer = new UnicodeString(*(UnicodeString*)src->pointer);
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int8_t U_CALLCONV compareUnicodeString(UHashTok t1, UHashTok t2) {
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeString &a = *(const UnicodeString*)t1.pointer;
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeString &b = *(const UnicodeString*)t2.pointer;
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return a.compare(b);
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Constructors &c
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Constructs an empty set.
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet() :
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len(1), capacity(1 + START_EXTRA), list(0), bmpSet(0), buffer(0),
146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFlags(0)
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    allocateStrings(status);
151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(list!=NULL){
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        list[0] = UNICODESET_HIGH;
157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else { // If memory allocation failed, set to bogus state.
158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    _dbgct(this);
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Constructs a set containing the given range. If <code>end >
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * start</code> then an empty set is created.
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet(UChar32 start, UChar32 end) :
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len(1), capacity(1 + START_EXTRA), list(0), bmpSet(0), buffer(0),
173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFlags(0)
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    allocateStrings(status);
178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(list!=NULL){
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        list[0] = UNICODESET_HIGH;
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complement(start, end);
185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else { // If memory allocation failed, set to bogus state.
186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    _dbgct(this);
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Constructs a set that is identical to the given UnicodeSet.
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet(const UnicodeSet& o) :
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeFilter(o),
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len(0), capacity(o.isFrozen() ? o.len : o.len + GROW_EXTRA), list(0),
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bmpSet(0),
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buffer(0), bufferCapacity(0),
200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFlags(0)
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    allocateStrings(status);
205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(list!=NULL){
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *this = o;
211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else { // If memory allocation failed, set to bogus state.
212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    _dbgct(this);
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Copy-construct as thawed.
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) :
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeFilter(o),
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len(0), capacity(o.len + GROW_EXTRA), list(0),
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bmpSet(0),
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buffer(0), bufferCapacity(0),
224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFlags(0)
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    allocateStrings(status);
229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(list!=NULL){
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // *this = o except for bmpSet and stringSpan
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        len = o.len;
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_memcpy(list, o.list, len*sizeof(UChar32));
237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (strings != NULL && o.strings != NULL) {
238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            strings->assign(*o.strings, cloneUnicodeString, status);
239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else { // Invalid strings.
240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            setToBogus();
241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return;
242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (o.pat) {
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            setPattern(UnicodeString(o.pat, o.patLen));
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else { // If memory allocation failed, set to bogus state.
247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    _dbgct(this);
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Destructs the set.
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::~UnicodeSet() {
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    _dbgdt(this); // first!
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_free(list);
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete bmpSet;
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (buffer) {
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(buffer);
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete strings;
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete stringSpan;
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Assigns this object to be a copy of another.
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) {
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (this == &o) {
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (isFrozen()) {
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (o.isBogus()) {
279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return *this;
281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    ensureCapacity(o.len, ec);
284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(ec)) {
285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return *this; // There is no way to report this error :-(
286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len = o.len;
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_memcpy(list, o.list, len*sizeof(UChar32));
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (o.bmpSet == NULL) {
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bmpSet = NULL;
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bmpSet = new BMPSet(*o.bmpSet, list, len);
293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (bmpSet == NULL) { // Check for memory allocation error.
294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            setToBogus();
295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return *this;
296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (strings != NULL && o.strings != NULL) {
299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        strings->assign(*o.strings, cloneUnicodeString, ec);
300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    } else { // Invalid strings.
301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return *this;
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (o.stringSpan == NULL) {
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        stringSpan = NULL;
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings);
308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (stringSpan == NULL) { // Check for memory allocation error.
309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            setToBogus();
310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return *this;
311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (o.pat) {
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        setPattern(UnicodeString(o.pat, o.patLen));
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a copy of this object.  All UnicodeMatcher objects have
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to support cloning in order to allow classes using
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * UnicodeMatchers, such as Transliterator, to implement cloning.
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeFunctor* UnicodeSet::clone() const {
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return new UnicodeSet(*this);
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeFunctor *UnicodeSet::cloneAsThawed() const {
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return new UnicodeSet(*this, TRUE);
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Compares the specified object with this set for equality.  Returns
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <tt>true</tt> if the two sets
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * have the same size, and every member of the specified set is
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * contained in this set (or equivalently, every member of this set is
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * contained in the specified set).
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param o set to be compared for equality with this set.
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return <tt>true</tt> if the specified set is equal to this set.
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::operator==(const UnicodeSet& o) const {
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (len != o.len) return FALSE;
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i < len; ++i) {
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (list[i] != o.list[i]) return FALSE;
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (*strings != *o.strings) return FALSE;
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the hash code value for this set.
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the hash code value for this set.
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see Object#hashCode()
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::hashCode(void) const {
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t result = len;
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i < len; ++i) {
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result *= 1000003;
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result += list[i];
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Public API
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the number of elements in this set (its cardinality),
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Note than the elements of a set may include both individual
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * codepoints and strings.
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the number of elements in this set (its cardinality).
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::size(void) const {
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t n = 0;
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t count = getRangeCount();
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i < count; ++i) {
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        n += getRangeEnd(i) - getRangeStart(i) + 1;
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return n + strings->size();
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns <tt>true</tt> if this set contains no elements.
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return <tt>true</tt> if this set contains no elements.
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::isEmpty(void) const {
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return len == 1 && strings->size() == 0;
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains the given character.
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c character to be checked for containment
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::contains(UChar32 c) const {
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Set i to the index of the start item greater than ch
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // We know we will terminate without length test!
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // LATER: for large sets, add binary search
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //int32_t i = -1;
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //for (;;) {
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    if (c < list[++i]) break;
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //}
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (bmpSet != NULL) {
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return bmpSet->contains(c);
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (stringSpan != NULL) {
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return stringSpan->contains(c);
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c >= UNICODESET_HIGH) { // Don't need to check LOW bound
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = findCodePoint(c);
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (UBool)(i & 1); // return true if odd
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the smallest value i such that c < list[i].  Caller
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * must ensure that c is a legal value or this method will enter
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * an infinite loop.  This method performs a binary search.
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c a character in the range MIN_VALUE..MAX_VALUE
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * inclusive
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the smallest integer i in the range 0..len-1,
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * inclusive, such that c < list[i]
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::findCodePoint(UChar32 c) const {
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Examples:
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                       findCodePoint(c)
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       set              list[]         c=0 1 3 4 7 8
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       ===              ==============   ===========
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       []               [110000]         0 0 0 0 0 0
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       [\u0000-\u0003]  [0, 4, 110000]   1 1 1 2 2 2
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       [\u0004-\u0007]  [4, 8, 110000]   0 0 0 1 1 2
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       [:Any:]          [0, 110000]      1 1 1 1 1 1
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Return the smallest i such that c < list[i].  Assume
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c < list[0])
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // High runner test.  c is often after the last range, so an
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // initial check for this condition pays off.
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t lo = 0;
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t hi = len - 1;
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (lo >= hi || c >= list[hi-1])
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return hi;
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // invariant: c >= list[lo]
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // invariant: c < list[hi]
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t i = (lo + hi) >> 1;
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (i == lo) {
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break; // Found!
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (c < list[i]) {
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            hi = i;
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lo = i;
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return hi;
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains every character
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given range.
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of the range
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of the range
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::contains(UChar32 start, UChar32 end) const {
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //int32_t i = -1;
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //for (;;) {
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    if (start < list[++i]) break;
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //}
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = findCodePoint(start);
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ((i & 1) != 0 && end < list[i]);
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns <tt>true</tt> if this set contains the given
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * multicharacter string.
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s string to be checked for containment
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return <tt>true</tt> if this set contains the specified string
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::contains(const UnicodeString& s) const {
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (s.length() == 0) return FALSE;
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t cp = getSingleCP(s);
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (cp < 0) {
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return strings->contains((void*) &s);
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return contains((UChar32) cp);
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains all the characters and strings
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given set.
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set to be checked for containment
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsAll(const UnicodeSet& c) const {
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // The specified set is a subset if all of its pairs are contained in
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // this set.  It's possible to code this more efficiently in terms of
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // direct manipulation of the inversion lists if the need arises.
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t n = c.getRangeCount();
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int i=0; i<n; ++i) {
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!contains(c.getRangeStart(i), c.getRangeEnd(i))) {
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return FALSE;
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!strings->containsAll(*c.strings)) return FALSE;
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains all the characters
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given string.
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s string containing characters to be checked for containment
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsAll(const UnicodeString& s) const {
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_CONTAINED) ==
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                   s.length());
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains none of the characters
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given range.
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of the range
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of the range
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsNone(UChar32 start, UChar32 end) const {
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //int32_t i = -1;
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //for (;;) {
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    if (start < list[++i]) break;
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //}
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = findCodePoint(start);
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ((i & 1) == 0 && end < list[i]);
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains none of the characters and strings
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given set.
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set to be checked for containment
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsNone(const UnicodeSet& c) const {
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // The specified set is a subset if all of its pairs are contained in
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // this set.  It's possible to code this more efficiently in terms of
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // direct manipulation of the inversion lists if the need arises.
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t n = c.getRangeCount();
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; i<n; ++i) {
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!containsNone(c.getRangeStart(i), c.getRangeEnd(i))) {
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return FALSE;
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!strings->containsNone(*c.strings)) return FALSE;
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains none of the characters
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given string.
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s string containing characters to be checked for containment
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsNone(const UnicodeString& s) const {
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_NOT_CONTAINED) ==
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                   s.length());
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns <tt>true</tt> if this set contains any character whose low byte
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is the given value.  This is used by <tt>RuleBasedTransliterator</tt> for
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * indexing.
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::matchesIndexValue(uint8_t v) const {
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* The index value v, in the range [0,255], is contained in this set if
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * it is contained in any pair of this set.  Pairs either have the high
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * bytes equal, or unequal.  If the high bytes are equal, then we have
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * aaxx..aayy, where aa is the high byte.  Then v is contained if xx <=
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * v <= yy.  If the high bytes are unequal we have aaxx..bbyy, bb>aa.
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Then v is contained if xx <= v || v <= yy.  (This is identical to the
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * time zone month containment logic.)
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t rangeCount=getRangeCount();
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<rangeCount; ++i) {
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 low = getRangeStart(i);
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 high = getRangeEnd(i);
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if ((low & ~0xFF) == (high & ~0xFF)) {
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if ((low & 0xFF) <= v && v <= (high & 0xFF)) {
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return TRUE;
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if ((low & 0xFF) <= v || v <= (high & 0xFF)) {
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return TRUE;
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (strings->size() != 0) {
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i=0; i<strings->size(); ++i) {
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i);
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //if (s.length() == 0) {
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //    // Empty strings match everything
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //    return TRUE;
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //}
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // assert(s.length() != 0); // We enforce this elsewhere
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 c = s.char32At(0);
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if ((c & 0xFF) == v) {
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return TRUE;
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return FALSE;
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Implementation of UnicodeMatcher::matches().  Always matches the
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * longest possible multichar string.
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUMatchDegree UnicodeSet::matches(const Replaceable& text,
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 int32_t& offset,
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 int32_t limit,
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 UBool incremental) {
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (offset == limit) {
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Strings, if any, have length != 0, so we don't worry
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // about them here.  If we ever allow zero-length strings
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // we much check for them here.
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (contains(U_ETHER)) {
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return incremental ? U_PARTIAL_MATCH : U_MATCH;
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return U_MISMATCH;
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (strings->size() != 0) { // try strings first
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // might separate forward and backward loops later
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // for now they are combined
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // TODO Improve efficiency of this, at least in the forward
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // direction, if not in both.  In the forward direction we
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // can assume the strings are sorted.
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t i;
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UBool forward = offset < limit;
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // firstChar is the leftmost char to match in the
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // forward direction or the rightmost char to match in
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // the reverse direction.
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar firstChar = text.charAt(offset);
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // If there are multiple strings that can match we
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // return the longest match.
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t highWaterLength = 0;
659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for (i=0; i<strings->size(); ++i) {
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i);
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //if (trial.length() == 0) {
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //    return U_MATCH; // null-string always matches
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //}
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // assert(trial.length() != 0); // We ensure this elsewhere
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UChar c = trial.charAt(forward ? 0 : trial.length() - 1);
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Strings are sorted, so we can optimize in the
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // forward direction.
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (forward && c > firstChar) break;
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (c != firstChar) continue;
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t matchLen = matchRest(text, offset, limit, trial);
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (incremental) {
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int32_t maxLen = forward ? limit-offset : offset-limit;
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (matchLen == maxLen) {
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // We have successfully matched but only up to limit.
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return U_PARTIAL_MATCH;
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (matchLen == trial.length()) {
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // We have successfully matched the whole string.
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (matchLen > highWaterLength) {
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        highWaterLength = matchLen;
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // In the forward direction we know strings
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // are sorted so we can bail early.
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (forward && matchLen < highWaterLength) {
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // We've checked all strings without a partial match.
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // If we have full matches, return the longest one.
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (highWaterLength != 0) {
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                offset += forward ? highWaterLength : -highWaterLength;
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return U_MATCH;
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return UnicodeFilter::matches(text, offset, limit, incremental);
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the longest match for s in text at the given position.
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If limit > start then match forward from start+1 to limit
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * matching all characters except s.charAt(0).  If limit < start,
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * go backward starting from start-1 matching all characters
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * except s.charAt(s.length()-1).  This method assumes that the
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * first character, text.charAt(start), matches s, so it does not
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * check it.
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param text the text to match
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start the first character to match.  In the forward
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * direction, text.charAt(start) is matched against s.charAt(0).
721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * In the reverse direction, it is matched against
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * s.charAt(s.length()-1).
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param limit the limit offset for matching, either last+1 in
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the forward direction, or last-1 in the reverse direction,
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * where last is the index of the last character to match.
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return If part of s matches up to the limit, return |limit -
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * start|.  If all of s matches before reaching the limit, return
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * s.length().  If there is a mismatch between s and text, return
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::matchRest(const Replaceable& text,
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              int32_t start, int32_t limit,
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const UnicodeString& s) {
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t maxLen;
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t slen = s.length();
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (start < limit) {
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        maxLen = limit - start;
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (maxLen > slen) maxLen = slen;
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i = 1; i < maxLen; ++i) {
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (text.charAt(start + i) != s.charAt(i)) return 0;
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        maxLen = start - limit;
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (maxLen > slen) maxLen = slen;
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        --slen; // <=> slen = s.length() - 1;
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i = 1; i < maxLen; ++i) {
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (text.charAt(start - i) != s.charAt(slen - i)) return 0;
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return maxLen;
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Implement of UnicodeMatcher
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::addMatchSetTo(UnicodeSet& toUnionTo) const {
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    toUnionTo.addAll(*this);
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the index of the given character within this set, where
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the set is ordered by ascending code point.  If the character
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is not in this set, return -1.  The inverse of this method is
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>charAt()</code>.
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return an index from 0..size()-1, or -1
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::indexOf(UChar32 c) const {
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c < MIN_VALUE || c > MAX_VALUE) {
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -1;
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = 0;
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t n = 0;
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 start = list[i++];
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (c < start) {
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -1;
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 limit = list[i++];
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (c < limit) {
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return n + c - start;
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        n += limit - start;
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the character at the given index within this set, where
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the set is ordered by ascending code point.  If the index is
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * out of range, return (UChar32)-1.  The inverse of this method is
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>indexOf()</code>.
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param index an index from 0..size()-1
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the character at the given index, or (UChar32)-1.
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 UnicodeSet::charAt(int32_t index) const {
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (index >= 0) {
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // len2 is the largest even integer <= len, that is, it is len
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // for even values and len-1 for odd values.  With odd values
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // the last entry is UNICODESET_HIGH.
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t len2 = len & ~1;
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (int32_t i=0; i < len2;) {
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 start = list[i++];
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t count = list[i++] - start;
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (index < count) {
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return (UChar32)(start + index);
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            index -= count;
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (UChar32)-1;
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Make this object represent the range <code>start - end</code>.
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If <code>end > start</code> then this object is set to an
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * an empty range.
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character in the set, inclusive
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @rparam end last character in the set, inclusive
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) {
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    clear();
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    complement(start, end);
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds the specified range to this set if it is not already
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * present.  If this set already contains the specified range,
830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the call leaves this set unchanged.  If <code>end > start</code>
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then an empty range is added, leaving the set unchanged.
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range to be added
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this set.
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range to be added
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this set.
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::add(UChar32 start, UChar32 end) {
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pinCodePoint(start) < pinCodePoint(end)) {
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        add(range, 2, 0);
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if (start == end) {
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        add(start);
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// #define DEBUG_US_ADD
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef DEBUG_US_ADD
851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h>
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid dump(UChar32 c) {
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c <= 0xFF) {
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("%c", (char)c);
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("U+%04X", c);
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid dump(const UChar32* list, int32_t len) {
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("[");
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; i<len; ++i) {
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (i != 0) printf(", ");
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        dump(list[i]);
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("]");
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds the specified character to this set if it is not already
871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * present.  If this set already contains the specified character,
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the call leaves this set unchanged.
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::add(UChar32 c) {
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // find smallest i such that c < list[i]
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // if odd, then it is IN the set
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // if even, then it is OUT of the set
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = findCodePoint(pinCodePoint(c));
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // already in set?
881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if ((i & 1) != 0  || isFrozen() || isBogus()) return *this;
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // HIGH is 0x110000
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // assert(list[len-1] == HIGH);
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // empty = [HIGH]
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // [start_0, limit_0, start_1, limit_1, HIGH]
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //                             ^
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //                             list[i]
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // i == 0 means c is before the first range
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef DEBUG_US_ADD
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("Add of ");
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dump(c);
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf(" found at %d", i);
899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf(": ");
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dump(list, len);
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf(" => ");
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c == list[i]-1) {
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // c is before start of next range
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        list[i] = c;
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // if we touched the HIGH mark, then add a new one
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (c == (UNICODESET_HIGH - 1)) {
909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            UErrorCode status = U_ZERO_ERROR;
910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            ensureCapacity(len+1, status);
911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (U_FAILURE(status)) {
912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return *this; // There is no way to report this error :-(
913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            list[len++] = UNICODESET_HIGH;
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (i > 0 && c == list[i-1]) {
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // collapse adjacent ranges
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // [..., start_k-1, c, c, limit_k, ..., HIGH]
920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //                     ^
921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //                     list[i]
922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //for (int32_t k=i-1; k<len-2; ++k) {
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //    list[k] = list[k+2];
925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //}
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32* dst = list + i - 1;
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32* src = dst + 2;
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32* srclimit = list + len;
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while (src < srclimit) *(dst++) = *(src++);
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            len -= 2;
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else if (i > 0 && c == list[i-1]) {
936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // c is after end of prior range
937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        list[i-1]++;
938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // no need to check for collapse here
939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // At this point we know the new char is not adjacent to
943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // any existing ranges, and it is not 10FFFF.
944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //                             ^
948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //                             list[i]
949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH]
951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //                             ^
952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //                             list[i]
953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UErrorCode status = U_ZERO_ERROR;
955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        ensureCapacity(len+2, status);
956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (U_FAILURE(status)) {
957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return *this; // There is no way to report this error :-(
958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //for (int32_t k=len-1; k>=i; --k) {
961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //    list[k+2] = list[k];
962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //}
963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32* src = list + len;
964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32* dst = src + 2;
965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32* srclimit = list + i;
966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while (src > srclimit) *(--dst) = *(--src);
967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        list[i] = c;
969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        list[i+1] = c+1;
970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        len += 2;
971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef DEBUG_US_ADD
974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dump(list, len);
975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("\n");
976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=1; i<len; ++i) {
978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (list[i] <= list[i-1]) {
979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Corrupt array!
980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printf("ERROR: list has been corrupted\n");
981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            exit(1);
982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds the specified multicharacter to this set if it is not already
992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * present.  If this set already contains the multicharacter,
993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the call leaves this set unchanged.
994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Thus "ch" => {"ch"}
995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s the source string
997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining
998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::add(const UnicodeString& s) {
1000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (s.length() == 0 || isFrozen() || isBogus()) return *this;
1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t cp = getSingleCP(s);
1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (cp < 0) {
1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!strings->contains((void*) &s)) {
1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _add(s);
1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            releasePattern();
1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        add((UChar32)cp);
1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds the given string, in order, to 'strings'.  The given string
1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * must have been checked by the caller to not be empty and to not
1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * already be in 'strings'.
1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::_add(const UnicodeString& s) {
1019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString* t = new UnicodeString(s);
1023c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (t == NULL) { // Check for memory allocation error.
1024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
1025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
1026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    strings->sortedInsert(t, compareUnicodeString, ec);
1029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(ec)) {
1030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
1031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        delete t;
1032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a code point IF the string consists of a single one.
1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * otherwise returns -1.
1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param string to test
1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::getSingleCP(const UnicodeString& s) {
1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //if (s.length() < 1) {
1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //}
1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (s.length() > 2) return -1;
1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (s.length() == 1) return s.charAt(0);
1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // at this point, len = 2
1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 cp = s.char32At(0);
1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (cp > 0xFFFF) { // is surrogate pair
1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return cp;
1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return -1;
1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this set already any particular character, it has no effect on that character.
1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string
1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining
1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::addAll(const UnicodeString& s) {
1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 cp;
1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i < s.length(); i += UTF_CHAR_LENGTH(cp)) {
1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        cp = s.char32At(i);
1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        add(cp);
1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this set already any particular character, it has no effect on that character.
1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string
1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining
1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::retainAll(const UnicodeString& s) {
1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set;
1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(s);
1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    retainAll(set);
1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this set already any particular character, it has no effect on that character.
1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string
1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining
1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complementAll(const UnicodeString& s) {
1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set;
1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(s);
1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    complementAll(set);
1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this set already any particular character, it has no effect on that character.
1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string
1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining
1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::removeAll(const UnicodeString& s) {
1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet set;
1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    set.addAll(s);
1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    removeAll(set);
1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUnicodeSet& UnicodeSet::removeAllStrings() {
1110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    strings->removeAllElements();
1111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return *this;
1112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string
1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a newly created set containing the given string
1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet* U_EXPORT2 UnicodeSet::createFrom(const UnicodeString& s) {
1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *set = new UnicodeSet();
1123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (set != NULL) { // Check for memory allocation error.
1124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        set->add(s);
1125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return set;
1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string
1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a newly created set containing the given characters
1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet* U_EXPORT2 UnicodeSet::createFromAll(const UnicodeString& s) {
1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet *set = new UnicodeSet();
1137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (set != NULL) { // Check for memory allocation error.
1138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        set->addAll(s);
1139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return set;
1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Retain only the elements in this set that are contained in the
1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified range.  If <code>end > start</code> then an empty range is
1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * retained, leaving the set empty.
1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range to be retained
1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this set.
1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range to be retained
1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this set.
1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::retain(UChar32 start, UChar32 end) {
1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pinCodePoint(start) <= pinCodePoint(end)) {
1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retain(range, 2, 0);
1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        clear();
1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::retain(UChar32 c) {
1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return retain(c, c);
1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes the specified range from this set if it is present.
1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set will not contain the specified range once the call
1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns.  If <code>end > start</code> then an empty range is
1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * removed, leaving the set unchanged.
1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range to be removed
1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from this set.
1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range to be removed
1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from this set.
1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::remove(UChar32 start, UChar32 end) {
1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pinCodePoint(start) <= pinCodePoint(end)) {
1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        retain(range, 2, 2);
1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes the specified character from this set if it is present.
1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set will not contain the specified range once the call
1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns.
1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::remove(UChar32 c) {
1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return remove(c, c);
1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes the specified string from this set if it is present.
1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set will not contain the specified character once the call
1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns.
1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string
1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining
1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
1203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (s.length() == 0 || isFrozen() || isBogus()) return *this;
1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t cp = getSingleCP(s);
1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (cp < 0) {
1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        strings->removeElement((void*) &s);
1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        releasePattern();
1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        remove((UChar32)cp, (UChar32)cp);
1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Complements the specified range in this set.  Any character in
1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the range will be removed if it is in this set, or will be
1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * added if it is not in this set.  If <code>end > start</code>
1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then an empty range is xor'ed, leaving the set unchanged.
1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range to be removed
1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from this set.
1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range to be removed
1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from this set.
1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complement(UChar32 start, UChar32 end) {
1226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pinCodePoint(start) <= pinCodePoint(end)) {
1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 range[3] = { start, end+1, UNICODESET_HIGH };
1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exclusiveOr(range, 2, 0);
1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complement(UChar32 c) {
1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return complement(c, c);
1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is equivalent to
1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complement(void) {
1246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (list[0] == UNICODESET_LOW) {
1251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        ensureBufferCapacity(len-1, status);
1252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (U_FAILURE(status)) {
1253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return *this;
1254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_memcpy(buffer, list + 1, (len-1)*sizeof(UChar32));
1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        --len;
1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        ensureBufferCapacity(len+1, status);
1259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (U_FAILURE(status)) {
1260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            return *this;
1261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_memcpy(buffer + 1, list, len*sizeof(UChar32));
1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        buffer[0] = UNICODESET_LOW;
1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ++len;
1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    swapBuffers();
1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Complement the specified string in this set.
1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set will not contain the specified string once the call
1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns.
1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s the string to complement
1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return this object, for chaining
1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
1280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (s.length() == 0 || isFrozen() || isBogus()) return *this;
1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t cp = getSingleCP(s);
1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (cp < 0) {
1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (strings->contains((void*) &s)) {
1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            strings->removeElement((void*) &s);
1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _add(s);
1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        releasePattern();
1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complement((UChar32)cp, (UChar32)cp);
1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds all of the elements in the specified set to this set if
1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * they're not already present.  This operation effectively
1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * modifies this set so that its value is the <i>union</i> of the two
1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * sets.  The behavior of this operation is unspecified if the specified
1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * collection is modified while the operation is in progress.
1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set whose elements are to be added to this set.
1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #add(char, char)
1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::addAll(const UnicodeSet& c) {
1306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if ( c.len>0 && c.list!=NULL ) {
1307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        add(c.list, c.len, 0);
1308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Add strings in order
1311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if ( c.strings!=NULL ) {
1312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        for (int32_t i=0; i<c.strings->size(); ++i) {
1313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            const UnicodeString* s = (const UnicodeString*)c.strings->elementAt(i);
1314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (!strings->contains((void*) s)) {
1315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                _add(*s);
1316c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Retains only the elements in this set that are contained in the
1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified set.  In other words, removes from this set all of
1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * its elements that are not contained in the specified set.  This
1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * operation effectively modifies this set so that its value is
1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the <i>intersection</i> of the two sets.
1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set that defines which elements this set will retain.
1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) {
1332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    retain(c.list, c.len, 0);
1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    strings->retainAll(*c.strings);
1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes from this set all of its elements that are contained in the
1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified set.  This operation effectively modifies this
1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * set so that its value is the <i>asymmetric set difference</i> of
1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the two sets.
1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set that defines which elements will be removed from
1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *          this set.
1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::removeAll(const UnicodeSet& c) {
1350c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    retain(c.list, c.len, 2);
1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    strings->removeAll(*c.strings);
1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Complements in this set all elements contained in the specified
1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * set.  Any character in the other set will be removed if it is
1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * in this set, or will be added if it is not in this set.
1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set that defines which elements will be xor'ed from
1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *          this set.
1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) {
1367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exclusiveOr(c.list, c.len, 0);
1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i=0; i<c.strings->size(); ++i) {
1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        void* e = c.strings->elementAt(i);
1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!strings->removeElement(e)) {
1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _add(*(const UnicodeString*)e);
1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes all of the elements from this set.  This set will be
1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * empty after this call returns.
1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::clear(void) {
1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (isFrozen()) {
1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (list != NULL) {
1390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        list[0] = UNICODESET_HIGH;
1391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len = 1;
1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
1394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (strings != NULL) {
1395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        strings->removeAllElements();
1396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (list != NULL && strings != NULL) {
1398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Remove bogus
1399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        fFlags = 0;
1400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Iteration method that returns the number of ranges contained in
1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * this set.
1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeStart
1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeEnd
1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::getRangeCount() const {
1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return len/2;
1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Iteration method that returns the first character in the
1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified range of this set.
1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeCount
1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeEnd
1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 UnicodeSet::getRangeStart(int32_t index) const {
1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return list[index*2];
1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Iteration method that returns the last character in the
1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified range of this set.
1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeStart
1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeEnd
1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 UnicodeSet::getRangeEnd(int32_t index) const {
1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return list[index*2 + 1] - 1;
1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::getStringCount() const {
1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return strings->size();
1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UnicodeString* UnicodeSet::getString(int32_t index) const {
1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (const UnicodeString*) strings->elementAt(index);
1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Reallocate this objects internal structures to take up the least
1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * possible space, without changing this object's value.
1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::compact() {
1447c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Delete buffer first to defragment memory less.
1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (buffer != NULL) {
1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(buffer);
1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        buffer = NULL;
1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (len < capacity) {
1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Make the capacity equal to len or 1.
1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // We don't want to realloc of 0 size.
1458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        int32_t newCapacity = len + (len == 0);
1459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * newCapacity);
1460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (temp) {
1461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            list = temp;
1462c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            capacity = newCapacity;
1463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
1464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // else what the heck happened?! We allocated less memory!
1465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Oh well. We'll keep our original array.
1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const {
1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t bmpLength, length, destLength;
1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (destCapacity<0 || (destCapacity>0 && dest==NULL)) {
1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ec=U_ILLEGAL_ARGUMENT_ERROR;
1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* count necessary 16-bit units */
1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    length=this->len-1; // Subtract 1 to ignore final UNICODESET_HIGH
1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // assert(length>=0);
1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (length==0) {
1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* empty set */
1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (destCapacity>0) {
1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest=0;
1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ec=U_BUFFER_OVERFLOW_ERROR;
1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 1;
1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* now length>0 */
1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (this->list[length-1]<=0xffff) {
1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* all BMP */
1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bmpLength=length;
1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if (this->list[0]>=0x10000) {
1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* all supplementary */
1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bmpLength=0;
1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        length*=2;
1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* some BMP, some supplementary */
1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (bmpLength=0; bmpLength<length && this->list[bmpLength]<=0xffff; ++bmpLength) {}
1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        length=bmpLength+2*(length-bmpLength);
1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* length: number of 16-bit array units */
1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (length>0x7fff) {
1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* there are only 15 bits for the length in the first serialized word */
1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ec=U_INDEX_OUTOFBOUNDS_ERROR;
1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * total serialized length:
1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * number of 16-bit array units (length) +
1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * 1 length unit (always) +
1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * 1 bmpLength unit (if there are supplementary values)
1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    destLength=length+((length>bmpLength)?2:1);
1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (destLength<=destCapacity) {
1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar32 *p;
1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t i;
1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *dest=(uint16_t)length;
1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (length>bmpLength) {
1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest|=0x8000;
1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *++dest=(uint16_t)bmpLength;
1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ++dest;
1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* write the BMP part of the array */
1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        p=this->list;
1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i=0; i<bmpLength; ++i) {
1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest++=(uint16_t)*p++;
1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* write the supplementary part of the array */
1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (; i<length; i+=2) {
1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest++=(uint16_t)(*p>>16);
1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *dest++=(uint16_t)*p++;
1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ec=U_BUFFER_OVERFLOW_ERROR;
1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return destLength;
1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Implementation: Utility methods
1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Allocate our strings vector and return TRUE if successful.
1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::allocateStrings(UErrorCode &status) {
1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    strings = new UVector(uhash_deleteUnicodeString,
1563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                          uhash_compareUnicodeString, 1, status);
1564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (strings == NULL) { // Check for memory allocation error.
1565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_MEMORY_ALLOCATION_ERROR;
1566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return FALSE;
1567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete strings;
1570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        strings = NULL;
1571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
1572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid UnicodeSet::ensureCapacity(int32_t newLen, UErrorCode& ec) {
1577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (newLen <= capacity)
1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * (newLen + GROW_EXTRA));
1580c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (temp == NULL) {
1581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        ec = U_MEMORY_ALLOCATION_ERROR;
1582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
1583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
1584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list = temp;
1586c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    capacity = newLen + GROW_EXTRA;
1587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // else we keep the original contents on the memory failure.
1588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid UnicodeSet::ensureBufferCapacity(int32_t newLen, UErrorCode& ec) {
1591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (buffer != NULL && newLen <= bufferCapacity)
1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UChar32* temp = (UChar32*) uprv_realloc(buffer, sizeof(UChar32) * (newLen + GROW_EXTRA));
1594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (temp == NULL) {
1595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        ec = U_MEMORY_ALLOCATION_ERROR;
1596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        setToBogus();
1597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
1598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    buffer = temp;
1600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bufferCapacity = newLen + GROW_EXTRA;
1601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // else we keep the original contents on the memory failure.
1602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Swap list and buffer.
1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::swapBuffers(void) {
1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // swap list and buffer
1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32* temp = list;
1610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list = buffer;
1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buffer = temp;
1612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t c = capacity;
1614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    capacity = bufferCapacity;
1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bufferCapacity = c;
1616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid UnicodeSet::setToBogus() {
1619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    clear(); // Remove everything in the set.
1620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFlags = kIsBogus;
1621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}
1622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
1624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Implementation: Fundamental operators
1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
1626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UChar32 max(UChar32 a, UChar32 b) {
1628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (a > b) ? a : b;
1629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 0, 3 is normal: x xor y
1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 1, 2: x xor ~y == x === y
1633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity) {
1635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
1639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    ensureBufferCapacity(len + otherLen, status);
1640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
1641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
1642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = 0, j = 0, k = 0;
1645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 a = list[i++];
1646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 b;
1647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (polarity == 1 || polarity == 2) {
1648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b = UNICODESET_LOW;
1649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (other[j] == UNICODESET_LOW) { // skip base if already LOW
1650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++j;
1651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            b = other[j];
1652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        b = other[j++];
1655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // simplest of all the routines
1657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // sort the values, discarding identicals!
1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
1659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (a < b) {
1660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buffer[k++] = a;
1661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            a = list[i++];
1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (b < a) {
1663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buffer[k++] = b;
1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            b = other[j++];
1665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (a != UNICODESET_HIGH) { // at this point, a == b
1666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // discard both values!
1667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            a = list[i++];
1668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            b = other[j++];
1669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else { // DONE!
1670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buffer[k++] = UNICODESET_HIGH;
1671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            len = k;
1672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    swapBuffers();
1676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
1677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 0 is normal: x union y
1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 2: x union ~y
1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 1: ~x union y
1682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 3: ~x union ~y
1683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) {
1685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus() || other==NULL) {
1686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
1689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    ensureBufferCapacity(len + otherLen, status);
1690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
1691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
1692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = 0, j = 0, k = 0;
1695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 a = list[i++];
1696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 b = other[j++];
1697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // change from xor is that we have to check overlapping pairs
1698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // polarity bit 1 means a is second, bit 2 means b is.
1699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
1700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        switch (polarity) {
1701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          case 0: // both first; take lower if unequal
1702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (a < b) { // take a
1703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Back up over overlapping ranges in buffer[]
1704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (k > 0 && a <= buffer[k-1]) {
1705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Pick latter end value in buffer[] vs. list[]
1706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    a = max(list[i], buffer[--k]);
1707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // No overlap
1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    buffer[k++] = a;
1710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    a = list[i];
1711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                i++; // Common if/else code factored out
1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if (b < a) { // take b
1715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (k > 0 && b <= buffer[k-1]) {
1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    b = max(other[j], buffer[--k]);
1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    buffer[k++] = b;
1719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    b = other[j];
1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                j++;
1722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else { // a == b, take a, drop b
1724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (a == UNICODESET_HIGH) goto loop_end;
1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // This is symmetrical; it doesn't matter if
1726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // we backtrack with a or b. - liu
1727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (k > 0 && a <= buffer[k-1]) {
1728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    a = max(list[i], buffer[--k]);
1729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // No overlap
1731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    buffer[k++] = a;
1732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    a = list[i];
1733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                i++;
1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          case 3: // both second; take higher if unequal, and drop other
1741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (b <= a) { // take a
1742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (a == UNICODESET_HIGH) goto loop_end;
1743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = a;
1744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else { // take b
1745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (b == UNICODESET_HIGH) goto loop_end;
1746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = b;
1747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            a = list[i++];
1749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            polarity ^= 1;   // factored common code
1750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            b = other[j++];
1751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            polarity ^= 2;
1752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          case 1: // a second, b first; if b < a, overlap
1754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (a < b) { // no overlap, take a
1755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = a; a = list[i++]; polarity ^= 1;
1756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if (b < a) { // OVERLAP, drop b
1757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else { // a == b, drop both!
1760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (a == UNICODESET_HIGH) goto loop_end;
1761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          case 2: // a first, b second; if a < b, overlap
1768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (b < a) { // no overlap, take b
1769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = b;
1770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else  if (a < b) { // OVERLAP, drop a
1773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else { // a == b, drop both!
1776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (a == UNICODESET_HIGH) goto loop_end;
1777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru loop_end:
1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buffer[k++] = UNICODESET_HIGH;    // terminate
1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len = k;
1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    swapBuffers();
1789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
1790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 0 is normal: x intersect y
1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 2: x intersect ~y == set-minus
1794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 1: ~x intersect y
1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 3: ~x intersect ~y
1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity) {
1798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
1802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    ensureBufferCapacity(len + otherLen, status);
1803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (U_FAILURE(status)) {
1804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        return;
1805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
1807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = 0, j = 0, k = 0;
1808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 a = list[i++];
1809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 b = other[j++];
1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // change from xor is that we have to check overlapping pairs
1811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // polarity bit 1 means a is second, bit 2 means b is.
1812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
1813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        switch (polarity) {
1814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          case 0: // both first; drop the smaller
1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (a < b) { // drop a
1816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if (b < a) { // drop b
1819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else { // a == b, take one, drop other
1822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (a == UNICODESET_HIGH) goto loop_end;
1823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = a;
1824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          case 3: // both second; take lower if unequal
1831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (a < b) { // take a
1832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = a;
1833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if (b < a) { // take b
1836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = b;
1837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else { // a == b, take one, drop other
1840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (a == UNICODESET_HIGH) goto loop_end;
1841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = a;
1842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          case 1: // a second, b first;
1849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (a < b) { // NO OVERLAP, drop a
1850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if (b < a) { // OVERLAP, take b
1853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = b;
1854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else { // a == b, drop both!
1857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (a == UNICODESET_HIGH) goto loop_end;
1858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          case 2: // a first, b second; if a < b, overlap
1865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (b < a) { // no overlap, drop b
1866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else  if (a < b) { // OVERLAP, take a
1869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buffer[k++] = a;
1870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else { // a == b, drop both!
1873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (a == UNICODESET_HIGH) goto loop_end;
1874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                a = list[i++];
1875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 1;
1876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                b = other[j++];
1877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                polarity ^= 2;
1878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
1880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru loop_end:
1883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buffer[k++] = UNICODESET_HIGH;    // terminate
1884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len = k;
1885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    swapBuffers();
1886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
1887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Append the <code>toPattern()</code> representation of a
1891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * string to the given <code>StringBuffer</code>.
1892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::_appendToPat(UnicodeString& buf, const UnicodeString& s, UBool
1894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruescapeUnprintable) {
1895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 cp;
1896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i < s.length(); i += UTF_CHAR_LENGTH(cp)) {
1897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        _appendToPat(buf, cp = s.char32At(i), escapeUnprintable);
1898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Append the <code>toPattern()</code> representation of a
1903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * character to the given <code>StringBuffer</code>.
1904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool
1906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruescapeUnprintable) {
1907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (escapeUnprintable && ICU_Utility::isUnprintable(c)) {
1908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything
1909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // unprintable
1910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (ICU_Utility::escapeUnprintable(buf, c)) {
1911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return;
1912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Okay to let ':' pass through
1915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch (c) {
1916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case SET_OPEN:
1917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case SET_CLOSE:
1918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case HYPHEN:
1919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case COMPLEMENT:
1920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case INTERSECTION:
1921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case BACKSLASH:
1922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case OPEN_BRACE:
1923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case CLOSE_BRACE:
1924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case COLON:
1925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case SymbolTable::SYMBOL_REF:
1926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        buf.append(BACKSLASH);
1927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
1928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    default:
1929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Escape whitespace
1930b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if (PatternProps::isWhiteSpace(c)) {
1931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf.append(BACKSLASH);
1932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
1934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buf.append(c);
1936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Append a string representation of this set to result.  This will be
1940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a cleaned version of the string passed to applyPattern(), if there
1941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is one.  Otherwise it will be generated.
1942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
1944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                      UBool escapeUnprintable) const
1945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
1946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pat != NULL) {
1947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t i;
1948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t backslashCount = 0;
1949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (i=0; i<patLen; ) {
1950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 c;
1951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U16_NEXT(pat, i, patLen, c);
1952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (escapeUnprintable && ICU_Utility::isUnprintable(c)) {
1953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // If the unprintable character is preceded by an odd
1954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // number of backslashes, then it has been escaped.
1955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Before unescaping it, we delete the final
1956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // backslash.
1957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if ((backslashCount % 2) == 1) {
1958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    result.truncate(result.length() - 1);
1959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ICU_Utility::escapeUnprintable(result, c);
1961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                backslashCount = 0;
1962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                result.append(c);
1964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (c == BACKSLASH) {
1965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++backslashCount;
1966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    backslashCount = 0;
1968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return result;
1972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return _generatePattern(result, escapeUnprintable);
1975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a string representation of this set.  If the result of
1979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * calling this function is passed to a UnicodeSet constructor, it
1980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * will produce another set that is equal to this one.
1981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& UnicodeSet::toPattern(UnicodeString& result,
1983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     UBool escapeUnprintable) const
1984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
1985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result.truncate(0);
1986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return _toPattern(result, escapeUnprintable);
1987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Generate and append a string representation of this set to result.
1991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This does not use this.pat, the cleaned up copy of the string
1992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * passed to applyPattern().
1993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
1995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                            UBool escapeUnprintable) const
1996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
1997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result.append(SET_OPEN);
1998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  // Check against the predefined categories.  We implicitly build
2000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  // up ALL category sets the first time toPattern() is called.
2001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) {
2002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      if (*this == getCategorySet(cat)) {
2003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//          result.append(COLON);
2004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//          result.append(CATEGORY_NAMES, cat*2, 2);
2005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//          return result.append(CATEGORY_CLOSE);
2006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//      }
2007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  }
2008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t count = getRangeCount();
2010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // If the set contains at least 2 intervals and includes both
2012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // MIN_VALUE and MAX_VALUE, then the inverse representation will
2013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // be more economical.
2014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (count > 1 &&
2015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        getRangeStart(0) == MIN_VALUE &&
2016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        getRangeEnd(count-1) == MAX_VALUE) {
2017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Emit the inverse
2019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result.append(COMPLEMENT);
2020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (int32_t i = 1; i < count; ++i) {
2022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 start = getRangeEnd(i-1)+1;
2023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 end = getRangeStart(i)-1;
2024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _appendToPat(result, start, escapeUnprintable);
2025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (start != end) {
2026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if ((start+1) != end) {
2027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    result.append(HYPHEN);
2028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                _appendToPat(result, end, escapeUnprintable);
2030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Default; emit the ranges as pairs
2035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
2036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (int32_t i = 0; i < count; ++i) {
2037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 start = getRangeStart(i);
2038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 end = getRangeEnd(i);
2039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _appendToPat(result, start, escapeUnprintable);
2040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (start != end) {
2041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if ((start+1) != end) {
2042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    result.append(HYPHEN);
2043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
2044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                _appendToPat(result, end, escapeUnprintable);
2045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i<strings->size(); ++i) {
2050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result.append(OPEN_BRACE);
2051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        _appendToPat(result,
2052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     *(const UnicodeString*) strings->elementAt(i),
2053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     escapeUnprintable);
2054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result.append(CLOSE_BRACE);
2055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result.append(SET_CLOSE);
2057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Release existing cached pattern
2061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::releasePattern() {
2063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pat) {
2064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(pat);
2065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pat = NULL;
2066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        patLen = 0;
2067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
2071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Set the new pattern to cache.
2072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
2073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::setPattern(const UnicodeString& newPat) {
2074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    releasePattern();
2075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t newPatLen = newPat.length();
2076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pat = (UChar *)uprv_malloc((newPatLen + 1) * sizeof(UChar));
2077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pat) {
2078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        patLen = newPatLen;
2079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        newPat.extractBetween(0, patLen, pat);
2080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pat[patLen] = 0;
2081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // else we don't care if malloc failed. This was just a nice cache.
2083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // We can regenerate an equivalent pattern later when requested.
2084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeFunctor *UnicodeSet::freeze() {
2087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if(!isFrozen() && !isBogus()) {
2088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Do most of what compact() does before freezing because
2089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // compact() will not work when the set is frozen.
2090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Small modification: Don't shrink if the savings would be tiny (<=GROW_EXTRA).
2091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Delete buffer first to defragment memory less.
2093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (buffer != NULL) {
2094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_free(buffer);
2095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buffer = NULL;
2096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (capacity > (len + GROW_EXTRA)) {
2098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Make the capacity equal to len or 1.
2099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // We don't want to realloc of 0 size.
2100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            capacity = len + (len == 0);
2101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            list = (UChar32*) uprv_realloc(list, sizeof(UChar32) * capacity);
2102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (list == NULL) { // Check for memory allocation error.
2103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                setToBogus();
2104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                return this;
2105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
2106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Optimize contains() and span() and similar functions.
2109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!strings->isEmpty()) {
2110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            stringSpan = new UnicodeSetStringSpan(*this, *strings, UnicodeSetStringSpan::ALL);
2111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (stringSpan != NULL && !stringSpan->needsStringSpanUTF16()) {
2112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // All strings are irrelevant for span() etc. because
2113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // all of each string's code points are contained in this set.
2114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Do not check needsStringSpanUTF8() because UTF-8 has at most as
2115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // many relevant strings as UTF-16.
2116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // (Thus needsStringSpanUTF8() implies needsStringSpanUTF16().)
2117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delete stringSpan;
2118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                stringSpan = NULL;
2119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
2120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (stringSpan == NULL) {
2122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // No span-relevant strings: Optimize for code point spans.
2123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            bmpSet=new BMPSet(list, len);
2124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (bmpSet == NULL) { // Check for memory allocation error.
2125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                setToBogus();
2126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
2127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return this;
2130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
2133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length>0 && bmpSet!=NULL) {
2134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (int32_t)(bmpSet->span(s, s+length, spanCondition)-s);
2135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length<0) {
2137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        length=u_strlen(s);
2138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length==0) {
2140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
2141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(stringSpan!=NULL) {
2143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return stringSpan->span(s, length, spanCondition);
2144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(!strings->isEmpty()) {
2145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
2146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeSetStringSpan::FWD_UTF16_NOT_CONTAINED :
2147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeSetStringSpan::FWD_UTF16_CONTAINED;
2148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetStringSpan strSpan(*this, *strings, which);
2149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(strSpan.needsStringSpanUTF16()) {
2150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return strSpan.span(s, length, spanCondition);
2151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 c;
2159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t start=0, prev=0;
2160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    do {
2161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U16_NEXT(s, start, length, c);
2162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(spanCondition!=contains(c)) {
2163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
2164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } while((prev=start)<length);
2166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return prev;
2167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
2170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length>0 && bmpSet!=NULL) {
2171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (int32_t)(bmpSet->spanBack(s, s+length, spanCondition)-s);
2172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length<0) {
2174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        length=u_strlen(s);
2175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length==0) {
2177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
2178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(stringSpan!=NULL) {
2180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return stringSpan->spanBack(s, length, spanCondition);
2181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(!strings->isEmpty()) {
2182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
2183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeSetStringSpan::BACK_UTF16_NOT_CONTAINED :
2184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeSetStringSpan::BACK_UTF16_CONTAINED;
2185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetStringSpan strSpan(*this, *strings, which);
2186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(strSpan.needsStringSpanUTF16()) {
2187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return strSpan.spanBack(s, length, spanCondition);
2188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 c;
2196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t prev=length;
2197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    do {
2198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U16_PREV(s, 0, length, c);
2199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(spanCondition!=contains(c)) {
2200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
2201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } while((prev=length)>0);
2203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return prev;
2204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const {
2207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length>0 && bmpSet!=NULL) {
2208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const uint8_t *s0=(const uint8_t *)s;
2209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return (int32_t)(bmpSet->spanUTF8(s0, length, spanCondition)-s0);
2210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length<0) {
221250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        length=(int32_t)uprv_strlen(s);
2213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length==0) {
2215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
2216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(stringSpan!=NULL) {
2218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return stringSpan->spanUTF8((const uint8_t *)s, length, spanCondition);
2219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(!strings->isEmpty()) {
2220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
2221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeSetStringSpan::FWD_UTF8_NOT_CONTAINED :
2222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeSetStringSpan::FWD_UTF8_CONTAINED;
2223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetStringSpan strSpan(*this, *strings, which);
2224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(strSpan.needsStringSpanUTF8()) {
2225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return strSpan.spanUTF8((const uint8_t *)s, length, spanCondition);
2226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 c;
2234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t start=0, prev=0;
2235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    do {
2236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U8_NEXT(s, start, length, c);
2237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(c<0) {
2238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=0xfffd;
2239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(spanCondition!=contains(c)) {
2241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
2242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } while((prev=start)<length);
2244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return prev;
2245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const {
2248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length>0 && bmpSet!=NULL) {
2249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const uint8_t *s0=(const uint8_t *)s;
2250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return bmpSet->spanBackUTF8(s0, length, spanCondition);
2251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length<0) {
225350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        length=(int32_t)uprv_strlen(s);
2254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(length==0) {
2256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return 0;
2257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(stringSpan!=NULL) {
2259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return stringSpan->spanBackUTF8((const uint8_t *)s, length, spanCondition);
2260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(!strings->isEmpty()) {
2261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
2262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeSetStringSpan::BACK_UTF8_NOT_CONTAINED :
2263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeSetStringSpan::BACK_UTF8_CONTAINED;
2264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSetStringSpan strSpan(*this, *strings, which);
2265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(strSpan.needsStringSpanUTF8()) {
2266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return strSpan.spanBackUTF8((const uint8_t *)s, length, spanCondition);
2267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
2271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        spanCondition=USET_SPAN_CONTAINED;  // Pin to 0/1 values.
2272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
2273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 c;
2275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t prev=length;
2276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    do {
2277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U8_PREV(s, 0, length, c);
2278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(c<0) {
2279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=0xfffd;
2280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(spanCondition!=contains(c)) {
2282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
2283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
2284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } while((prev=length)>0);
2285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return prev;
2286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
2287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
2288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END
2289