1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*******************************************************************************
3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Copyright (C) 1999-2011, International Business Machines
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*******************************************************************************
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   file name:  uniset_props.cpp
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   encoding:   US-ASCII
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   tab size:   8 (not used)
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   indentation:4
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   created on: 2004aug25
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   created by: Markus W. Scherer
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Character property dependent functions moved here from uniset.cpp
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h"
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/parsepos.h"
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h"
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uscript.h"
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/symtable.h"
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uset.h"
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/locid.h"
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/brkiter.h"
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uset_imp.h"
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ruleiter.h"
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucln_cmn.h"
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "util.h"
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h"
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uprops.h"
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "propname.h"
3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "normalizer2impl.h"
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucase.h"
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ubidi_props.h"
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uinvchar.h"
4050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uprops.h"
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "charstr.h"
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h"
4350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "mutex.h"
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "umutex.h"
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uassert.h"
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "hash.h"
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_USE
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// initial storage. Must be >= 0
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// *** same as in uniset.cpp ! ***
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define START_EXTRA 16
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Define UChar constants using hex for EBCDIC compatibility
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Used #define to reduce private static exports and memory access time.
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define SET_OPEN        ((UChar)0x005B) /*[*/
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define SET_CLOSE       ((UChar)0x005D) /*]*/
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define HYPHEN          ((UChar)0x002D) /*-*/
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define COMPLEMENT      ((UChar)0x005E) /*^*/
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define COLON           ((UChar)0x003A) /*:*/
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BACKSLASH       ((UChar)0x005C) /*\*/
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define INTERSECTION    ((UChar)0x0026) /*&*/
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UPPER_U         ((UChar)0x0055) /*U*/
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LOWER_U         ((UChar)0x0075) /*u*/
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define OPEN_BRACE      ((UChar)123)    /*{*/
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define CLOSE_BRACE     ((UChar)125)    /*}*/
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UPPER_P         ((UChar)0x0050) /*P*/
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LOWER_P         ((UChar)0x0070) /*p*/
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UPPER_N         ((UChar)78)     /*N*/
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define EQUALS          ((UChar)0x003D) /*=*/
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//static const UChar POSIX_OPEN[]  = { SET_OPEN,COLON,0 };  // "[:"
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar POSIX_CLOSE[] = { COLON,SET_CLOSE,0 };  // ":]"
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//static const UChar PERL_OPEN[]   = { BACKSLASH,LOWER_P,0 }; // "\\p"
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar PERL_CLOSE[]  = { CLOSE_BRACE,0 };    // "}"
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//static const UChar NAME_OPEN[]   = { BACKSLASH,UPPER_N,0 };  // "\\N"
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar HYPHEN_RIGHT_BRACE[] = {HYPHEN,SET_CLOSE,0}; /*-]*/
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Special property set IDs
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char ANY[]   = "ANY";   // [\u0000-\U0010FFFF]
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char ASCII[] = "ASCII"; // [\u0000-\u007F]
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char ASSIGNED[] = "Assigned"; // [:^Cn:]
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Unicode name property alias
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define NAME_PROP "na"
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define NAME_PROP_LENGTH 2
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Delimiter string used in patterns to close a category reference:
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ":]".  Example: "[:Lu:]".
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Cached sets ------------------------------------------------------------- ***
9750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
9850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_BEGIN
9950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic UBool U_CALLCONV uset_cleanup();
10050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CDECL_END
10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Not a TriStateSingletonWrapper because we think the UnicodeSet constructor
10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// can only fail with an out-of-memory error
10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// if we have a correct pattern and the properties data is hardcoded and always available.
10550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass UnicodeSetSingleton : public SimpleSingletonWrapper<UnicodeSet> {
10650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic:
10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeSetSingleton(SimpleSingleton &s, const char *pattern) :
10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            SimpleSingletonWrapper<UnicodeSet>(s), fPattern(pattern) {}
10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeSet *getInstance(UErrorCode &errorCode) {
11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return SimpleSingletonWrapper<UnicodeSet>::getInstance(createInstance, fPattern, errorCode);
11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
11250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoprivate:
11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    static void *createInstance(const void *context, UErrorCode &errorCode) {
11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString pattern((const char *)context, -1, US_INV);
11550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeSet *set=new UnicodeSet(pattern, errorCode);
11650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(set==NULL) {
11750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            errorCode=U_MEMORY_ALLOCATION_ERROR;
118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return NULL;
11950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        set->freeze();
12150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup);
12250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return set;
12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
12450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const char *fPattern;
12650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
12750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_BEGIN
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UnicodeSet *INCLUSIONS[UPROPS_SRC_COUNT] = { NULL }; // cached getInclusions()
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
13250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoSTATIC_SIMPLE_SINGLETON(uni32Singleton);
13350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Inclusions list
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// USetAdder implementation
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Does not use uset.h to reduce code dependencies
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void U_CALLCONV
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_set_add(USet *set, UChar32 c) {
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ((UnicodeSet *)set)->add(c);
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void U_CALLCONV
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_set_addRange(USet *set, UChar32 start, UChar32 end) {
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ((UnicodeSet *)set)->add(start, end);
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void U_CALLCONV
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_set_addString(USet *set, const UChar *str, int32_t length) {
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Cleanup function for UnicodeSet
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool U_CALLCONV uset_cleanup(void) {
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for(i = UPROPS_SRC_NONE; i < UPROPS_SRC_COUNT; ++i) {
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (INCLUSIONS[i] != NULL) {
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete INCLUSIONS[i];
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            INCLUSIONS[i] = NULL;
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
16750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeSetSingleton(uni32Singleton, NULL).deleteInstance();
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_END
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru/*
176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruReduce excessive reallocation, and make it easier to detect initialization
177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruproblems.
178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUsually you don't see smaller sets than this for Unicode 5.0.
179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru*/
180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#define DEFAULT_INCLUSION_CAPACITY 3072
181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru
182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruconst UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool needInit;
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UMTX_CHECK(NULL, (INCLUSIONS[src] == NULL), needInit);
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (needInit) {
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet* incl = new UnicodeSet();
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        USetAdder sa = {
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            (USet *)incl,
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _set_add,
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _set_addRange,
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            _set_addString,
192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            NULL, // don't need remove()
193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            NULL // don't need removeRange()
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        };
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (incl != NULL) {
196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            incl->ensureCapacity(DEFAULT_INCLUSION_CAPACITY, status);
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            switch(src) {
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case UPROPS_SRC_CHAR:
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                uchar_addPropertyStarts(&sa, &status);
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case UPROPS_SRC_PROPSVEC:
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                upropsvec_addPropertyStarts(&sa, &status);
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case UPROPS_SRC_CHAR_AND_PROPSVEC:
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                uchar_addPropertyStarts(&sa, &status);
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                upropsvec_addPropertyStarts(&sa, &status);
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_NORMALIZATION
20927f654740f2a26ad62a5c155af9199af9e69b889claireho            case UPROPS_SRC_CASE_AND_NORM: {
21027f654740f2a26ad62a5c155af9199af9e69b889claireho                const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status);
21127f654740f2a26ad62a5c155af9199af9e69b889claireho                if(U_SUCCESS(status)) {
21227f654740f2a26ad62a5c155af9199af9e69b889claireho                    impl->addPropertyStarts(&sa, status);
21327f654740f2a26ad62a5c155af9199af9e69b889claireho                }
21427f654740f2a26ad62a5c155af9199af9e69b889claireho                ucase_addPropertyStarts(ucase_getSingleton(), &sa, &status);
21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
21627f654740f2a26ad62a5c155af9199af9e69b889claireho            }
21750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            case UPROPS_SRC_NFC: {
21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status);
21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(U_SUCCESS(status)) {
22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    impl->addPropertyStarts(&sa, status);
22150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
22250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
22350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
22450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            case UPROPS_SRC_NFKC: {
22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(status);
22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(U_SUCCESS(status)) {
22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    impl->addPropertyStarts(&sa, status);
22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
23050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
23150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            case UPROPS_SRC_NFKC_CF: {
23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(status);
23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(U_SUCCESS(status)) {
23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    impl->addPropertyStarts(&sa, status);
23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
23827f654740f2a26ad62a5c155af9199af9e69b889claireho            case UPROPS_SRC_NFC_CANON_ITER: {
23927f654740f2a26ad62a5c155af9199af9e69b889claireho                const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status);
24027f654740f2a26ad62a5c155af9199af9e69b889claireho                if(U_SUCCESS(status)) {
24127f654740f2a26ad62a5c155af9199af9e69b889claireho                    impl->addCanonIterPropertyStarts(&sa, status);
24227f654740f2a26ad62a5c155af9199af9e69b889claireho                }
24327f654740f2a26ad62a5c155af9199af9e69b889claireho                break;
24427f654740f2a26ad62a5c155af9199af9e69b889claireho            }
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case UPROPS_SRC_CASE:
24727f654740f2a26ad62a5c155af9199af9e69b889claireho                ucase_addPropertyStarts(ucase_getSingleton(), &sa, &status);
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case UPROPS_SRC_BIDI:
25027f654740f2a26ad62a5c155af9199af9e69b889claireho                ubidi_addPropertyStarts(ubidi_getSingleton(), &sa, &status);
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            default:
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                status = U_INTERNAL_PROGRAM_ERROR;
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_SUCCESS(status)) {
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Compact for caching
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                incl->compact();
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                umtx_lock(NULL);
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (INCLUSIONS[src] == NULL) {
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    INCLUSIONS[src] = incl;
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    incl = NULL;
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup);
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                umtx_unlock(NULL);
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete incl;
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            status = U_MEMORY_ALLOCATION_ERROR;
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return INCLUSIONS[src];
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho// Cache some sets for other services -------------------------------------- ***
27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
27750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CFUNC UnicodeSet *
27850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehouniset_getUnicode32Instance(UErrorCode &errorCode) {
27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return UnicodeSetSingleton(uni32Singleton, "[:age=3.2:]").getInstance(errorCode);
28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
28150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// helper functions for matching of pattern syntax pieces ------------------ ***
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// these functions are parallel to the PERL_OPEN etc. strings above
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// using these functions is not only faster than UnicodeString::compare() and
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// caseCompare(), but they also make UnicodeSet work for simple patterns when
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// no Unicode properties data is available - when caseCompare() fails
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UBool
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruisPerlOpen(const UnicodeString &pattern, int32_t pos) {
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar c;
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return pattern.charAt(pos)==BACKSLASH && ((c=pattern.charAt(pos+1))==LOWER_P || c==UPPER_P);
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*static inline UBool
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruisPerlClose(const UnicodeString &pattern, int32_t pos) {
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return pattern.charAt(pos)==CLOSE_BRACE;
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}*/
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UBool
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruisNameOpen(const UnicodeString &pattern, int32_t pos) {
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return pattern.charAt(pos)==BACKSLASH && pattern.charAt(pos+1)==UPPER_N;
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UBool
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruisPOSIXOpen(const UnicodeString &pattern, int32_t pos) {
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return pattern.charAt(pos)==SET_OPEN && pattern.charAt(pos+1)==COLON;
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*static inline UBool
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruisPOSIXClose(const UnicodeString &pattern, int32_t pos) {
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return pattern.charAt(pos)==COLON && pattern.charAt(pos+1)==SET_CLOSE;
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}*/
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// TODO memory debugging provided inside uniset.cpp
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// could be made available here but probably obsolete with use of modern
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// memory leak checker tools
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _dbgct(me)
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Constructors &c
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Constructs a set from the given pattern, optionally ignoring
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * white space.  See the class description for the syntax of the
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pattern language.
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pattern a string specifying what characters are in the set
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet(const UnicodeString& pattern,
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       UErrorCode& status) :
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFlags(0)
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_SUCCESS(status)){
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* test for NULL */
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(list == NULL) {
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            status = U_MEMORY_ALLOCATION_ERROR;
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }else{
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            allocateStrings(status);
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            applyPattern(pattern, USET_IGNORE_SPACE, NULL, status);
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    _dbgct(this);
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Constructs a set from the given pattern, optionally ignoring
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * white space.  See the class description for the syntax of the
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pattern language.
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pattern a string specifying what characters are in the set
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param options bitmask for options to apply to the pattern.
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet(const UnicodeString& pattern,
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       uint32_t options,
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       const SymbolTable* symbols,
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       UErrorCode& status) :
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFlags(0)
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_SUCCESS(status)){
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* test for NULL */
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(list == NULL) {
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            status = U_MEMORY_ALLOCATION_ERROR;
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }else{
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            allocateStrings(status);
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            applyPattern(pattern, options, symbols, status);
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    _dbgct(this);
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       uint32_t options,
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       const SymbolTable* symbols,
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       UErrorCode& status) :
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0),
383c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL),
384c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    fFlags(0)
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_SUCCESS(status)){
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity);
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* test for NULL */
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(list == NULL) {
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            status = U_MEMORY_ALLOCATION_ERROR;
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }else{
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            allocateStrings(status);
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            applyPattern(pattern, pos, options, symbols, status);
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    _dbgct(this);
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Public API
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     UErrorCode& status) {
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return applyPattern(pattern, USET_IGNORE_SPACE, NULL, status);
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Modifies this set to represent the set specified by the given
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pattern, optionally ignoring white space.  See the class
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * description for the syntax of the pattern language.
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pattern a string specifying what characters are in the set
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param options bitmask for options to apply to the pattern.
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE.
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     uint32_t options,
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     const SymbolTable* symbols,
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     UErrorCode& status) {
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status) || isFrozen()) {
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ParsePosition pos(0);
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    applyPattern(pattern, pos, options, symbols, status);
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) return *this;
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i = pos.getIndex();
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (options & USET_IGNORE_SPACE) {
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Skip over trailing whitespace
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ICU_Utility::skipWhitespace(pattern, i, TRUE);
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (i != pattern.length()) {
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_ILLEGAL_ARGUMENT_ERROR;
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              ParsePosition& pos,
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              uint32_t options,
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const SymbolTable* symbols,
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              UErrorCode& status) {
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status) || isFrozen()) {
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Need to build the pattern in a temporary string because
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // _applyPattern calls add() etc., which set pat to empty.
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString rebuiltPat;
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RuleCharacterIterator chars(pattern, symbols, pos);
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    applyPattern(chars, symbols, rebuiltPat, options, status);
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) return *this;
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (chars.inVariable()) {
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // syntaxError(chars, "Extra chars in variable value");
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_MALFORMED_SET;
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    setPattern(rebuiltPat);
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return true if the given position, in the given pattern, appears
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to be the start of a UnicodeSet pattern.
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) {
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ((pos+1) < pattern.length() &&
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pattern.charAt(pos) == (UChar)91/*[*/) ||
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        resemblesPropertyPattern(pattern, pos);
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Implementation: Pattern parsing
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * A small all-inline class to manage a UnicodeSet pointer.  Add
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * operator->() etc. as needed.
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass UnicodeSetPointer {
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet* p;
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic:
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    inline UnicodeSetPointer() : p(0) {}
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    inline ~UnicodeSetPointer() { delete p; }
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    inline UnicodeSet* pointer() { return p; }
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    inline UBool allocate() {
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (p == 0) {
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            p = new UnicodeSet();
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return p != 0;
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse the pattern from the given RuleCharacterIterator.  The
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * iterator is advanced over the parsed pattern.
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param chars iterator over the pattern characters.  Upon return
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * it will be advanced to the first character after the parsed
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pattern, or the end of the iteration if all characters are
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * parsed.
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param symbols symbol table to use to parse and dereference
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * variables, or null if none.
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param rebuiltPat the pattern that was parsed, rebuilt or
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * copied from the input pattern, as appropriate.
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param options a bit mask of zero or more of the following:
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * IGNORE_SPACE, CASE.
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::applyPattern(RuleCharacterIterator& chars,
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              const SymbolTable* symbols,
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              UnicodeString& rebuiltPat,
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              uint32_t options,
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                              UErrorCode& ec) {
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) return;
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Syntax characters: [ ] ^ - & { }
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Recognized special forms for chars, sets: c-c s-s s&s
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t opts = RuleCharacterIterator::PARSE_VARIABLES |
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                   RuleCharacterIterator::PARSE_ESCAPES;
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((options & USET_IGNORE_SPACE) != 0) {
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        opts |= RuleCharacterIterator::SKIP_WHITESPACE;
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString patLocal, buf;
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool usePat = FALSE;
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSetPointer scratch;
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RuleCharacterIterator::Pos backup;
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // mode: 0=before [, 1=between [...], 2=after ]
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // lastItem: 0=none, 1=char, 2=set
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int8_t lastItem = 0, mode = 0;
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 lastChar = 0;
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar op = 0;
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool invert = FALSE;
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    clear();
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while (mode != 2 && !chars.atEnd()) {
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_ASSERT((lastItem == 0 && op == 0) ||
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 (lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) ||
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 (lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ ||
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    op == INTERSECTION /*'&'*/)));
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c = 0;
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UBool literal = FALSE;
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeSet* nested = 0; // alias - do not delete
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // -------- Check for property pattern
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int8_t setMode = 0;
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (resemblesPropertyPattern(chars, opts)) {
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            setMode = 2;
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // -------- Parse '[' of opening delimiter OR nested set.
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // If there is a nested set, use `setMode' to define how
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // the set should be parsed.  If the '[' is part of the
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // opening delimiter for this pattern, parse special
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // strings "[", "[^", "[-", and "[^-".  Check for stand-in
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // characters representing a nested set in the symbol
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // table.
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else {
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Prepare to backup if necessary
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            chars.getPos(backup);
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c = chars.next(opts, literal, ec);
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) return;
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (c == 0x5B /*'['*/ && !literal) {
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (mode == 1) {
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    chars.setPos(backup); // backup
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    setMode = 1;
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Handle opening '[' delimiter
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    mode = 1;
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    patLocal.append((UChar) 0x5B /*'['*/);
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    chars.getPos(backup); // prepare to backup
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    c = chars.next(opts, literal, ec);
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (U_FAILURE(ec)) return;
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (c == 0x5E /*'^'*/ && !literal) {
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        invert = TRUE;
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        patLocal.append((UChar) 0x5E /*'^'*/);
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        chars.getPos(backup); // prepare to backup
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        c = chars.next(opts, literal, ec);
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (U_FAILURE(ec)) return;
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Fall through to handle special leading '-';
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // otherwise restart loop for nested [], \p{}, etc.
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (c == HYPHEN /*'-'*/) {
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        literal = TRUE;
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Fall through to handle literal '-' below
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        chars.setPos(backup); // backup
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if (symbols != 0) {
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const UnicodeFunctor *m = symbols->lookupMatcher(c);
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (m != 0) {
60627f654740f2a26ad62a5c155af9199af9e69b889claireho                    const UnicodeSet *ms = dynamic_cast<const UnicodeSet *>(m);
60727f654740f2a26ad62a5c155af9199af9e69b889claireho                    if (ms == NULL) {
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ec = U_MALFORMED_SET;
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return;
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // casting away const, but `nested' won't be modified
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // (important not to modify stored set)
61327f654740f2a26ad62a5c155af9199af9e69b889claireho                    nested = const_cast<UnicodeSet*>(ms);
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    setMode = 3;
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // -------- Handle a nested set.  This either is inline in
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // the pattern or represented by a stand-in that has
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // previously been parsed and was looked up in the symbol
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // table.
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (setMode != 0) {
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (lastItem == 1) {
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (op != 0) {
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // syntaxError(chars, "Char expected after operator");
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ec = U_MALFORMED_SET;
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return;
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                add(lastChar, lastChar);
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                _appendToPat(patLocal, lastChar, FALSE);
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                lastItem = 0;
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                op = 0;
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (op == HYPHEN /*'-'*/ || op == INTERSECTION /*'&'*/) {
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                patLocal.append(op);
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (nested == 0) {
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // lazy allocation
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (!scratch.allocate()) {
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ec = U_MEMORY_ALLOCATION_ERROR;
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return;
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                nested = scratch.pointer();
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            switch (setMode) {
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 1:
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                nested->applyPattern(chars, symbols, patLocal, options, ec);
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 2:
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                chars.skipIgnored(opts);
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                nested->applyPropertyPattern(chars, patLocal, ec);
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (U_FAILURE(ec)) return;
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 3: // `nested' already parsed
659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                nested->_toPattern(patLocal, FALSE);
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            usePat = TRUE;
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (mode == 0) {
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Entire pattern is a category; leave parse loop
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *this = *nested;
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                mode = 2;
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            switch (op) {
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case HYPHEN: /*'-'*/
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                removeAll(*nested);
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case INTERSECTION: /*'&'*/
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                retainAll(*nested);
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 0:
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                addAll(*nested);
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            op = 0;
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lastItem = 2;
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (mode == 0) {
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // syntaxError(chars, "Missing '['");
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ec = U_MALFORMED_SET;
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return;
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // -------- Parse special (syntax) characters.  If the
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // current character is not special, or if it is escaped,
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // then fall through and handle it below.
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!literal) {
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            switch (c) {
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 0x5D /*']'*/:
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (lastItem == 1) {
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    add(lastChar, lastChar);
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    _appendToPat(patLocal, lastChar, FALSE);
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Treat final trailing '-' as a literal
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (op == HYPHEN /*'-'*/) {
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    add(op, op);
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    patLocal.append(op);
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if (op == INTERSECTION /*'&'*/) {
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // syntaxError(chars, "Trailing '&'");
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ec = U_MALFORMED_SET;
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return;
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                patLocal.append((UChar) 0x5D /*']'*/);
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                mode = 2;
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case HYPHEN /*'-'*/:
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (op == 0) {
721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (lastItem != 0) {
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        op = (UChar) c;
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Treat final trailing '-' as a literal
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        add(c, c);
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        c = chars.next(opts, literal, ec);
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (U_FAILURE(ec)) return;
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (c == 0x5D /*']'*/ && !literal) {
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            patLocal.append(HYPHEN_RIGHT_BRACE);
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            mode = 2;
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            continue;
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // syntaxError(chars, "'-' not after char or set");
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ec = U_MALFORMED_SET;
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return;
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case INTERSECTION /*'&'*/:
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (lastItem == 2 && op == 0) {
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    op = (UChar) c;
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // syntaxError(chars, "'&' not after set");
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ec = U_MALFORMED_SET;
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return;
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 0x5E /*'^'*/:
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // syntaxError(chars, "'^' not after '['");
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ec = U_MALFORMED_SET;
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return;
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case 0x7B /*'{'*/:
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (op != 0) {
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // syntaxError(chars, "Missing operand after operator");
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ec = U_MALFORMED_SET;
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return;
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (lastItem == 1) {
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    add(lastChar, lastChar);
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    _appendToPat(patLocal, lastChar, FALSE);
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                lastItem = 0;
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf.truncate(0);
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                {
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UBool ok = FALSE;
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    while (!chars.atEnd()) {
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        c = chars.next(opts, literal, ec);
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (U_FAILURE(ec)) return;
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (c == 0x7D /*'}'*/ && !literal) {
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            ok = TRUE;
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            break;
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        buf.append(c);
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (buf.length() < 1 || !ok) {
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // syntaxError(chars, "Invalid multicharacter string");
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ec = U_MALFORMED_SET;
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return;
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // We have new string. Add it to set and continue;
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // we don't need to drop through to the further
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // processing
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                add(buf);
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                patLocal.append((UChar) 0x7B /*'{'*/);
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                _appendToPat(patLocal, buf, FALSE);
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                patLocal.append((UChar) 0x7D /*'}'*/);
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case SymbolTable::SYMBOL_REF:
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                //         symbols  nosymbols
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // [a-$]   error    error (ambiguous)
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // [a$]    anchor   anchor
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // [a-$x]  var "x"* literal '$'
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // [a-$.]  error    literal '$'
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // *We won't get here in the case of var "x"
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                {
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    chars.getPos(backup);
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    c = chars.next(opts, literal, ec);
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (U_FAILURE(ec)) return;
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UBool anchor = (c == 0x5D /*']'*/ && !literal);
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (symbols == 0 && !anchor) {
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        c = SymbolTable::SYMBOL_REF;
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        chars.setPos(backup);
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break; // literal '$'
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (anchor && op == 0) {
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (lastItem == 1) {
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            add(lastChar, lastChar);
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            _appendToPat(patLocal, lastChar, FALSE);
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        add(U_ETHER);
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        usePat = TRUE;
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        patLocal.append((UChar) SymbolTable::SYMBOL_REF);
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        patLocal.append((UChar) 0x5D /*']'*/);
814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        mode = 2;
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // syntaxError(chars, "Unquoted '$'");
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ec = U_MALFORMED_SET;
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return;
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            default:
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // -------- Parse literal characters.  This includes both
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // escaped chars ("\u4E01") and non-syntax characters
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // ("a").
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        switch (lastItem) {
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 0:
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lastItem = 1;
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lastChar = c;
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 1:
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (op == HYPHEN /*'-'*/) {
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (lastChar >= c) {
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Don't allow redundant (a-a) or empty (b-a) ranges;
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // these are most likely typos.
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // syntaxError(chars, "Invalid range");
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ec = U_MALFORMED_SET;
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return;
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                add(lastChar, c);
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                _appendToPat(patLocal, lastChar, FALSE);
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                patLocal.append(op);
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                _appendToPat(patLocal, c, FALSE);
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                lastItem = 0;
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                op = 0;
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                add(lastChar, lastChar);
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                _appendToPat(patLocal, lastChar, FALSE);
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                lastChar = c;
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 2:
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (op != 0) {
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // syntaxError(chars, "Set expected after operator");
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ec = U_MALFORMED_SET;
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return;
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lastChar = c;
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            lastItem = 1;
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (mode != 2) {
869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // syntaxError(chars, "Missing ']'");
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ec = U_MALFORMED_SET;
871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    chars.skipIgnored(opts);
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /**
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Handle global flags (invert, case insensitivity).  If this
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * pattern should be compiled case-insensitive, then we need
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * to close over case BEFORE COMPLEMENTING.  This makes
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * patterns like /[^abc]/i work.
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((options & USET_CASE_INSENSITIVE) != 0) {
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        closeOver(USET_CASE_INSENSITIVE);
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else if ((options & USET_ADD_CASE_MAPPINGS) != 0) {
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        closeOver(USET_ADD_CASE_MAPPINGS);
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (invert) {
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complement();
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Use the rebuilt pattern (patLocal) only if necessary.  Prefer the
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // generated pattern.
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (usePat) {
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        rebuiltPat.append(patLocal);
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        _generatePattern(rebuiltPat, FALSE);
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
899c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isBogus() && U_SUCCESS(ec)) {
900c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // We likely ran out of memory. AHHH!
901c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        ec = U_MEMORY_ALLOCATION_ERROR;
902c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Property set implementation
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool numericValueFilter(UChar32 ch, void* context) {
910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return u_getNumericValue(ch) == *(double*)context;
911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool generalCategoryMaskFilter(UChar32 ch, void* context) {
914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t value = *(int32_t*)context;
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (U_GET_GC_MASK((UChar32) ch) & value) != 0;
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool versionFilter(UChar32 ch, void* context) {
91927f654740f2a26ad62a5c155af9199af9e69b889claireho    static const UVersionInfo none = { 0, 0, 0, 0 };
92027f654740f2a26ad62a5c155af9199af9e69b889claireho    UVersionInfo v;
921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    u_charAge(ch, v);
92227f654740f2a26ad62a5c155af9199af9e69b889claireho    UVersionInfo* version = (UVersionInfo*)context;
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return uprv_memcmp(&v, &none, sizeof(v)) > 0 && uprv_memcmp(&v, version, sizeof(v)) <= 0;
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct {
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UProperty prop;
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t value;
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} IntPropertyContext;
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool intPropertyFilter(UChar32 ch, void* context) {
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    IntPropertyContext* c = (IntPropertyContext*)context;
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return u_getIntPropertyValue((UChar32) ch, c->prop) == c->value;
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
93627f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool scriptExtensionsFilter(UChar32 ch, void* context) {
93727f654740f2a26ad62a5c155af9199af9e69b889claireho    return uscript_hasScript(ch, *(UScriptCode*)context);
93827f654740f2a26ad62a5c155af9199af9e69b889claireho}
939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Generic filter-based scanning code for UCD property UnicodeSets.
942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::applyFilter(UnicodeSet::Filter filter,
944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                             void* context,
945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                             int32_t src,
946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                             UErrorCode &status) {
94727f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(status)) return;
94827f654740f2a26ad62a5c155af9199af9e69b889claireho
94927f654740f2a26ad62a5c155af9199af9e69b889claireho    // Logically, walk through all Unicode characters, noting the start
950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // and end of each range for which filter.contain(c) is
951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // true.  Add each range to a set.
952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
95327f654740f2a26ad62a5c155af9199af9e69b889claireho    // To improve performance, use an inclusions set which
954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // encodes information about character ranges that are known
95527f654740f2a26ad62a5c155af9199af9e69b889claireho    // to have identical properties.
95627f654740f2a26ad62a5c155af9199af9e69b889claireho    // getInclusions(src) contains exactly the first characters of
95727f654740f2a26ad62a5c155af9199af9e69b889claireho    // same-value ranges for the given properties "source".
958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UnicodeSet* inclusions = getInclusions(src, status);
959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    clear();
964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 startHasProperty = -1;
966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    int32_t limitRange = inclusions->getRangeCount();
967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int j=0; j<limitRange; ++j) {
969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // get current range
970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 start = inclusions->getRangeStart(j);
971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 end = inclusions->getRangeEnd(j);
972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // for all the code points in the range, process
974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (UChar32 ch = start; ch <= end; ++ch) {
975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // only add to this UnicodeSet on inflection points --
976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // where the hasProperty value changes to false
977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if ((*filter)(ch, context)) {
978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (startHasProperty < 0) {
979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    startHasProperty = ch;
980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if (startHasProperty >= 0) {
982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                add(startHasProperty, ch-1);
983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                startHasProperty = -1;
984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (startHasProperty >= 0) {
988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        add((UChar32)startHasProperty, (UChar32)0x10FFFF);
989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isBogus() && U_SUCCESS(status)) {
991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // We likely ran out of memory. AHHH!
992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        status = U_MEMORY_ALLOCATION_ERROR;
993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Note: we use ' ' in compiler code page */
998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t j = 0;
999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char ch;
1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    --dstCapacity; /* make room for term. zero */
1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while ((ch = *src++) != 0) {
1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (ch == ' ' && (j==0 || (j>0 && dst[j-1]==' '))) {
1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (j >= dstCapacity) return FALSE;
1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        dst[j++] = ch;
1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (j > 0 && dst[j-1] == ' ') --j;
1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    dst[j] = 0;
1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Property set API
1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define FAIL(ec) {ec=U_ILLEGAL_ARGUMENT_ERROR; return *this;}
1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet&
1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) {
1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec) || isFrozen()) return *this;
1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (prop == UCHAR_GENERAL_CATEGORY_MASK) {
1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        applyFilter(generalCategoryMaskFilter, &value, UPROPS_SRC_CHAR, ec);
102527f654740f2a26ad62a5c155af9199af9e69b889claireho    } else if (prop == UCHAR_SCRIPT_EXTENSIONS) {
102627f654740f2a26ad62a5c155af9199af9e69b889claireho        UScriptCode script = (UScriptCode)value;
102727f654740f2a26ad62a5c155af9199af9e69b889claireho        applyFilter(scriptExtensionsFilter, &script, UPROPS_SRC_PROPSVEC, ec);
1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        IntPropertyContext c = {prop, value};
1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        applyFilter(intPropertyFilter, &c, uprops_getSource(prop), ec);
1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet&
1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::applyPropertyAlias(const UnicodeString& prop,
1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               const UnicodeString& value,
1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               UErrorCode& ec) {
1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec) || isFrozen()) return *this;
1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // prop and value used to be converted to char * using the default
1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // converter instead of the invariant conversion.
1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // This should not be necessary because all Unicode property and value
1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // names use only invariant characters.
1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // If there are any variant characters, then we won't find them anyway.
1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Checking first avoids assertion failures in the conversion.
1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if( !uprv_isInvariantUString(prop.getBuffer(), prop.length()) ||
1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        !uprv_isInvariantUString(value.getBuffer(), value.length())
1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        FAIL(ec);
1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
105227f654740f2a26ad62a5c155af9199af9e69b889claireho    CharString pname, vname;
105327f654740f2a26ad62a5c155af9199af9e69b889claireho    pname.appendInvariantChars(prop, ec);
105427f654740f2a26ad62a5c155af9199af9e69b889claireho    vname.appendInvariantChars(value, ec);
105527f654740f2a26ad62a5c155af9199af9e69b889claireho    if (U_FAILURE(ec)) return *this;
1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UProperty p;
1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t v;
1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool mustNotBeEmpty = FALSE, invert = FALSE;
1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (value.length() > 0) {
106227f654740f2a26ad62a5c155af9199af9e69b889claireho        p = u_getPropertyEnum(pname.data());
1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (p == UCHAR_INVALID_CODE) FAIL(ec);
1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Treat gc as gcm
1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (p == UCHAR_GENERAL_CATEGORY) {
1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            p = UCHAR_GENERAL_CATEGORY_MASK;
1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if ((p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) ||
1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            (p >= UCHAR_INT_START && p < UCHAR_INT_LIMIT) ||
1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            (p >= UCHAR_MASK_START && p < UCHAR_MASK_LIMIT)) {
107327f654740f2a26ad62a5c155af9199af9e69b889claireho            v = u_getPropertyValueEnum(p, vname.data());
1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (v == UCHAR_INVALID_CODE) {
1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Handle numeric CCC
1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (p == UCHAR_CANONICAL_COMBINING_CLASS ||
1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    p == UCHAR_TRAIL_CANONICAL_COMBINING_CLASS ||
1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    p == UCHAR_LEAD_CANONICAL_COMBINING_CLASS) {
1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    char* end;
108027f654740f2a26ad62a5c155af9199af9e69b889claireho                    double value = uprv_strtod(vname.data(), &end);
1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    v = (int32_t) value;
1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (v != value || v < 0 || *end != 0) {
1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // non-integral or negative value, or trailing junk
1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        FAIL(ec);
1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // If the resultant set is empty then the numeric value
1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // was invalid.
1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    mustNotBeEmpty = TRUE;
1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    FAIL(ec);
1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        else {
1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            switch (p) {
1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case UCHAR_NUMERIC_VALUE:
1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                {
1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    char* end;
110127f654740f2a26ad62a5c155af9199af9e69b889claireho                    double value = uprv_strtod(vname.data(), &end);
1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (*end != 0) {
1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        FAIL(ec);
1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    applyFilter(numericValueFilter, &value, UPROPS_SRC_CHAR, ec);
1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return *this;
1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case UCHAR_NAME:
1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case UCHAR_UNICODE_1_NAME:
1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                {
1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Must munge name, since u_charFromName() does not do
1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // 'loose' matching.
1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    char buf[128]; // it suffices that this be > uprv_getMaxCharNameLength
111527f654740f2a26ad62a5c155af9199af9e69b889claireho                    if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec);
1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UCharNameChoice choice = (p == UCHAR_NAME) ?
1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        U_EXTENDED_CHAR_NAME : U_UNICODE_10_CHAR_NAME;
1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UChar32 ch = u_charFromName(choice, buf, &ec);
1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (U_SUCCESS(ec)) {
1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        clear();
1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        add(ch);
1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return *this;
1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        FAIL(ec);
1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            case UCHAR_AGE:
1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                {
1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // Must munge name, since u_versionFromString() does not do
1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // 'loose' matching.
1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    char buf[128];
113327f654740f2a26ad62a5c155af9199af9e69b889claireho                    if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec);
1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UVersionInfo version;
1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_versionFromString(version, buf);
1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    applyFilter(versionFilter, &version, UPROPS_SRC_PROPSVEC, ec);
1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return *this;
1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
114027f654740f2a26ad62a5c155af9199af9e69b889claireho            case UCHAR_SCRIPT_EXTENSIONS:
114127f654740f2a26ad62a5c155af9199af9e69b889claireho                v = u_getPropertyValueEnum(UCHAR_SCRIPT, vname.data());
114227f654740f2a26ad62a5c155af9199af9e69b889claireho                if (v == UCHAR_INVALID_CODE) {
114327f654740f2a26ad62a5c155af9199af9e69b889claireho                    FAIL(ec);
114427f654740f2a26ad62a5c155af9199af9e69b889claireho                }
114527f654740f2a26ad62a5c155af9199af9e69b889claireho                // fall through to calling applyIntPropertyValue()
114627f654740f2a26ad62a5c155af9199af9e69b889claireho                break;
1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            default:
1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // p is a non-binary, non-enumerated property that we
1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // don't support (yet).
1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                FAIL(ec);
1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // value is empty.  Interpret as General Category, Script, or
1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Binary property.
1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        p = UCHAR_GENERAL_CATEGORY_MASK;
115927f654740f2a26ad62a5c155af9199af9e69b889claireho        v = u_getPropertyValueEnum(p, pname.data());
1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (v == UCHAR_INVALID_CODE) {
1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            p = UCHAR_SCRIPT;
116227f654740f2a26ad62a5c155af9199af9e69b889claireho            v = u_getPropertyValueEnum(p, pname.data());
1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (v == UCHAR_INVALID_CODE) {
116427f654740f2a26ad62a5c155af9199af9e69b889claireho                p = u_getPropertyEnum(pname.data());
1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) {
1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    v = 1;
116727f654740f2a26ad62a5c155af9199af9e69b889claireho                } else if (0 == uprv_comparePropertyNames(ANY, pname.data())) {
1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    set(MIN_VALUE, MAX_VALUE);
1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return *this;
117027f654740f2a26ad62a5c155af9199af9e69b889claireho                } else if (0 == uprv_comparePropertyNames(ASCII, pname.data())) {
1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    set(0, 0x7F);
1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return *this;
117327f654740f2a26ad62a5c155af9199af9e69b889claireho                } else if (0 == uprv_comparePropertyNames(ASSIGNED, pname.data())) {
1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // [:Assigned:]=[:^Cn:]
1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    p = UCHAR_GENERAL_CATEGORY_MASK;
1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    v = U_GC_CN_MASK;
1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    invert = TRUE;
1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    FAIL(ec);
1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
118427f654740f2a26ad62a5c155af9199af9e69b889claireho
1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    applyIntPropertyValue(p, v, ec);
1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(invert) {
1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        complement();
1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_SUCCESS(ec) && (mustNotBeEmpty && isEmpty())) {
1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // mustNotBeEmpty is set to true if an empty set indicates
1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // invalid input.
1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ec = U_ILLEGAL_ARGUMENT_ERROR;
1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isBogus() && U_SUCCESS(ec)) {
1197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // We likely ran out of memory. AHHH!
1198c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        ec = U_MEMORY_ALLOCATION_ERROR;
1199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Property set patterns
1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return true if the given position, in the given pattern, appears
1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to be the start of a property set pattern.
1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::resemblesPropertyPattern(const UnicodeString& pattern,
1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                           int32_t pos) {
1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Patterns are at least 5 characters long
1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((pos+5) > pattern.length()) {
1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Look for an opening [:, [:^, \p, or \P
1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return isPOSIXOpen(pattern, pos) || isPerlOpen(pattern, pos) || isNameOpen(pattern, pos);
1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return true if the given iterator appears to point at a
1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * property pattern.  Regardless of the result, return with the
1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * iterator unchanged.
1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param chars iterator over the pattern characters.  Upon return
1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * it will be unchanged.
1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param iterOpts RuleCharacterIterator options
1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::resemblesPropertyPattern(RuleCharacterIterator& chars,
1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                           int32_t iterOpts) {
1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // NOTE: literal will always be FALSE, because we don't parse escapes.
1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool result = FALSE, literal;
1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    iterOpts &= ~RuleCharacterIterator::PARSE_ESCAPES;
1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RuleCharacterIterator::Pos pos;
1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    chars.getPos(pos);
1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 c = chars.next(iterOpts, literal, ec);
1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (c == 0x5B /*'['*/ || c == 0x5C /*'\\'*/) {
1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 d = chars.next(iterOpts & ~RuleCharacterIterator::SKIP_WHITESPACE,
1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                               literal, ec);
1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = (c == 0x5B /*'['*/) ? (d == 0x3A /*':'*/) :
1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 (d == 0x4E /*'N'*/ || d == 0x70 /*'p'*/ || d == 0x50 /*'P'*/);
1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    chars.setPos(pos);
1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result && U_SUCCESS(ec);
1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse the given property pattern at the given parse position.
1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                             ParsePosition& ppos,
1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                             UErrorCode &ec) {
1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t pos = ppos.getIndex();
1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool posix = FALSE; // true for [:pat:], false for \p{pat} \P{pat} \N{pat}
1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool isName = FALSE; // true for \N{pat}, o/w false
1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool invert = FALSE;
1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) return *this;
1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Minimum length is 5 characters, e.g. \p{L}
1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if ((pos+5) > pattern.length()) {
1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        FAIL(ec);
1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // On entry, ppos should point to one of the following locations:
1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Look for an opening [:, [:^, \p, or \P
1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (isPOSIXOpen(pattern, pos)) {
1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        posix = TRUE;
1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pos += 2;
1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pos = ICU_Utility::skipWhitespace(pattern, pos);
1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (pos < pattern.length() && pattern.charAt(pos) == COMPLEMENT) {
1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++pos;
1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            invert = TRUE;
1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if (isPerlOpen(pattern, pos) || isNameOpen(pattern, pos)) {
1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar c = pattern.charAt(pos+1);
1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        invert = (c == UPPER_P);
1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        isName = (c == UPPER_N);
1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pos += 2;
1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pos = ICU_Utility::skipWhitespace(pattern, pos);
1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (pos == pattern.length() || pattern.charAt(pos++) != OPEN_BRACE) {
1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Syntax error; "\p" or "\P" not followed by "{"
1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            FAIL(ec);
1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Open delimiter not seen
1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        FAIL(ec);
1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Look for the matching close delimiter, either :] or }
1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t close = pattern.indexOf(posix ? POSIX_CLOSE : PERL_CLOSE, pos);
1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (close < 0) {
1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Syntax error; close delimiter missing
1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        FAIL(ec);
1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Look for an '=' sign.  If this is present, we will parse a
1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // medium \p{gc=Cf} or long \p{GeneralCategory=Format}
1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // pattern.
1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t equals = pattern.indexOf(EQUALS, pos);
1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString propName, valueName;
1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (equals >= 0 && equals < close && !isName) {
1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Equals seen; parse medium/long pattern
1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pattern.extractBetween(pos, equals, propName);
1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pattern.extractBetween(equals+1, close, valueName);
1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else {
1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Handle case where no '=' is seen, and \N{}
1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pattern.extractBetween(pos, close, propName);
1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Handle \N{name}
1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (isName) {
1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // This is a little inefficient since it means we have to
1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // parse NAME_PROP back to UCHAR_NAME even though we already
1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // know it's UCHAR_NAME.  If we refactor the API to
1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // support args of (UProperty, char*) then we can remove
1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // NAME_PROP and make this a little more efficient.
1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            valueName = propName;
1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            propName = UnicodeString(NAME_PROP, NAME_PROP_LENGTH, US_INV);
1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    applyPropertyAlias(propName, valueName, ec);
1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_SUCCESS(ec)) {
1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (invert) {
1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            complement();
1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Move to the limit position after the close delimiter if the
1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // parse succeeded.
1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ppos.setIndex(close + (posix ? 2 : 1));
1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse a property pattern.
1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param chars iterator over the pattern characters.  Upon return
1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * it will be advanced to the first character after the parsed
1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pattern, or the end of the iteration if all characters are
1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * parsed.
1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param rebuiltPat the pattern that was parsed, rebuilt or
1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * copied from the input pattern, as appropriate.
1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars,
1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                      UnicodeString& rebuiltPat,
1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                      UErrorCode& ec) {
1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) return;
1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString pattern;
1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    chars.lookahead(pattern);
1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ParsePosition pos(0);
1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    applyPropertyPattern(pattern, pos, ec);
1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) return;
1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pos.getIndex() == 0) {
1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // syntaxError(chars, "Invalid property pattern");
1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ec = U_MALFORMED_SET;
1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    chars.jumpahead(pos.getIndex());
1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    rebuiltPat.append(pattern, 0, pos.getIndex());
1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Case folding API
1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// add the result of a full case mapping to the set
1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// use str as a temporary string to avoid constructing one
1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline void
1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruaddCaseMapping(UnicodeSet &set, int32_t result, const UChar *full, UnicodeString &str) {
1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(result >= 0) {
1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(result > UCASE_MAX_STRING_LENGTH) {
1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // add a single-code point case mapping
1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            set.add(result);
1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // add a string case mapping from full with length result
1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            str.setTo((UBool)FALSE, full, result);
1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            set.add(str);
1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // result < 0: the code point mapped to itself, no need to add it
1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // see ucase.h
1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::closeOver(int32_t attribute) {
1392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (isFrozen() || isBogus()) {
1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return *this;
1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) {
139627f654740f2a26ad62a5c155af9199af9e69b889claireho        const UCaseProps *csp = ucase_getSingleton();
139727f654740f2a26ad62a5c155af9199af9e69b889claireho        {
1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeSet foldSet(*this);
1399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString str;
1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            USetAdder sa = {
140127f654740f2a26ad62a5c155af9199af9e69b889claireho                foldSet.toUSet(),
1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                _set_add,
1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                _set_addRange,
1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                _set_addString,
1405c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                NULL, // don't need remove()
1406c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                NULL // don't need removeRange()
1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            };
1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // start with input set to guarantee inclusion
1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // USET_CASE: remove strings because the strings will actually be reduced (folded);
1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            //            therefore, start with no strings and add only those needed
1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (attribute & USET_CASE_INSENSITIVE) {
1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                foldSet.strings->removeAllElements();
1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t n = getRangeCount();
1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 result;
1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const UChar *full;
1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t locCache = 0;
1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for (int32_t i=0; i<n; ++i) {
1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UChar32 start = getRangeStart(i);
1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UChar32 end   = getRangeEnd(i);
1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (attribute & USET_CASE_INSENSITIVE) {
1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // full case closure
1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    for (UChar32 cp=start; cp<=end; ++cp) {
1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ucase_addCaseClosure(csp, cp, &sa);
1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // add case mappings
1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // (does not add long s for regular s, or Kelvin for k, for example)
1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    for (UChar32 cp=start; cp<=end; ++cp) {
1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache);
1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        addCaseMapping(foldSet, result, full, str);
1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache);
1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        addCaseMapping(foldSet, result, full, str);
1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache);
1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        addCaseMapping(foldSet, result, full, str);
1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        result = ucase_toFullFolding(csp, cp, &full, 0);
1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        addCaseMapping(foldSet, result, full, str);
1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (strings != NULL && strings->size() > 0) {
1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (attribute & USET_CASE_INSENSITIVE) {
1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    for (int32_t j=0; j<strings->size(); ++j) {
1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        str = *(const UnicodeString *) strings->elementAt(j);
1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        str.foldCase();
1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) {
1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            foldSet.add(str); // does not map to code points: add the folded string itself
1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    Locale root("");
1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION
146027f654740f2a26ad62a5c155af9199af9e69b889claireho                    UErrorCode status = U_ZERO_ERROR;
1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    BreakIterator *bi = BreakIterator::createWordInstance(root, status);
1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (U_SUCCESS(status)) {
146327f654740f2a26ad62a5c155af9199af9e69b889claireho#endif
1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        const UnicodeString *pStr;
1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        for (int32_t j=0; j<strings->size(); ++j) {
1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            pStr = (const UnicodeString *) strings->elementAt(j);
1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            (str = *pStr).toLower(root);
1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            foldSet.add(str);
1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION
1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            (str = *pStr).toTitle(bi, root);
1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            foldSet.add(str);
1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            (str = *pStr).toUpper(root);
1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            foldSet.add(str);
1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            (str = *pStr).foldCase();
1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            foldSet.add(str);
1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION
148027f654740f2a26ad62a5c155af9199af9e69b889claireho                    }
1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    delete bi;
1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *this = foldSet;
1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return *this;
1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END
1492