1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru********************************************************************** 38393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius* Copyright (C) 1999-2012, International Business Machines 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru********************************************************************** 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Date Name Description 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 10/20/99 alan Creation. 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru********************************************************************** 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/parsepos.h" 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/symtable.h" 14103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/uniset.h" 15103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf8.h" 16103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h" 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ruleiter.h" 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "patternprops.h" 21103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "uelement.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "util.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "charstr.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustrfmt.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uassert.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "bmpset.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unisetspan.h" 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Define UChar constants using hex for EBCDIC compatibility 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Used #define to reduce private static exports and memory access time. 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define SET_OPEN ((UChar)0x005B) /*[*/ 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define SET_CLOSE ((UChar)0x005D) /*]*/ 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define HYPHEN ((UChar)0x002D) /*-*/ 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define COMPLEMENT ((UChar)0x005E) /*^*/ 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define COLON ((UChar)0x003A) /*:*/ 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BACKSLASH ((UChar)0x005C) /*\*/ 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define INTERSECTION ((UChar)0x0026) /*&*/ 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UPPER_U ((UChar)0x0055) /*U*/ 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LOWER_U ((UChar)0x0075) /*u*/ 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define OPEN_BRACE ((UChar)123) /*{*/ 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define CLOSE_BRACE ((UChar)125) /*}*/ 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UPPER_P ((UChar)0x0050) /*P*/ 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LOWER_P ((UChar)0x0070) /*p*/ 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UPPER_N ((UChar)78) /*N*/ 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define EQUALS ((UChar)0x003D) /*=*/ 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// HIGH_VALUE > all valid values. 110000 for codepoints 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UNICODESET_HIGH 0x0110000 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// LOW <= all valid values. ZERO for codepoints 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UNICODESET_LOW 0x000000 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// initial storage. Must be >= 0 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define START_EXTRA 16 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// extra amount for growth. Must be >= 0 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define GROW_EXTRA START_EXTRA 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruSymbolTable::~SymbolTable() {} 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSet) 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Modify the given UChar32 variable so that it is in range, by 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pinning values < UNICODESET_LOW to UNICODESET_LOW, and 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pinning values > UNICODESET_HIGH-1 to UNICODESET_HIGH-1. 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It modifies its argument in-place and also returns it. 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UChar32 pinCodePoint(UChar32& c) { 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < UNICODESET_LOW) { 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = UNICODESET_LOW; 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (c > (UNICODESET_HIGH-1)) { 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = (UNICODESET_HIGH-1); 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Debugging 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// DO NOT DELETE THIS CODE. This code is used to debug memory leaks. 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// To enable the debugging, define the symbol DEBUG_MEM in the line 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// below. This will result in text being sent to stdout that looks 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// like this: 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// DEBUG UnicodeSet: ct 0x00A39B20; 397 [\u0A81-\u0A83\u0A85- 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// DEBUG UnicodeSet: dt 0x00A39B20; 396 [\u0A81-\u0A83\u0A85- 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Each line lists a construction (ct) or destruction (dt) event, the 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// object address, the number of outstanding objects after the event, 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// and the pattern of the object in question. 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// #define DEBUG_MEM 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef DEBUG_MEM 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h> 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t _dbgCount = 0; 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline void _dbgct(UnicodeSet* set) { 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str; 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set->toPattern(str, TRUE); 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char buf[40]; 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.extract(0, 39, buf, ""); 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("DEBUG UnicodeSet: ct 0x%08X; %d %s\n", set, ++_dbgCount, buf); 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline void _dbgdt(UnicodeSet* set) { 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str; 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set->toPattern(str, TRUE); 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char buf[40]; 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.extract(0, 39, buf, ""); 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("DEBUG UnicodeSet: dt 0x%08X; %d %s\n", set, --_dbgCount, buf); 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#else 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _dbgct(set) 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _dbgdt(set) 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// UnicodeString in UVector support 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 128103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic void U_CALLCONV cloneUnicodeString(UElement *dst, UElement *src) { 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dst->pointer = new UnicodeString(*(UnicodeString*)src->pointer); 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 132103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) { 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString &a = *(const UnicodeString*)t1.pointer; 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString &b = *(const UnicodeString*)t2.pointer; 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return a.compare(b); 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Constructors &c 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Constructs an empty set. 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet() : 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len(1), capacity(1 + START_EXTRA), list(0), bmpSet(0), buffer(0), 147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), 148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFlags(0) 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru allocateStrings(status); 152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(list!=NULL){ 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list[0] = UNICODESET_HIGH; 158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // If memory allocation failed, set to bogus state. 159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 160c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _dbgct(this); 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Constructs a set containing the given range. If <code>end > 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * start</code> then an empty set is created. 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet(UChar32 start, UChar32 end) : 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len(1), capacity(1 + START_EXTRA), list(0), bmpSet(0), buffer(0), 174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), 175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFlags(0) 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru allocateStrings(status); 179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(list!=NULL){ 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list[0] = UNICODESET_HIGH; 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complement(start, end); 186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // If memory allocation failed, set to bogus state. 187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _dbgct(this); 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Constructs a set that is identical to the given UnicodeSet. 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet(const UnicodeSet& o) : 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeFilter(o), 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len(0), capacity(o.isFrozen() ? o.len : o.len + GROW_EXTRA), list(0), 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bmpSet(0), 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer(0), bufferCapacity(0), 201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), 202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFlags(0) 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru allocateStrings(status); 206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 208c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(list!=NULL){ 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *this = o; 212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // If memory allocation failed, set to bogus state. 213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _dbgct(this); 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Copy-construct as thawed. 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) : 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeFilter(o), 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len(0), capacity(o.len + GROW_EXTRA), list(0), 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bmpSet(0), 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer(0), bufferCapacity(0), 225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), 226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFlags(0) 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru allocateStrings(status); 230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 232c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(list!=NULL){ 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // *this = o except for bmpSet and stringSpan 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = o.len; 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(list, o.list, len*sizeof(UChar32)); 238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (strings != NULL && o.strings != NULL) { 239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strings->assign(*o.strings, cloneUnicodeString, status); 240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // Invalid strings. 241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 243c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (o.pat) { 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setPattern(UnicodeString(o.pat, o.patLen)); 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // If memory allocation failed, set to bogus state. 248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _dbgct(this); 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Destructs the set. 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::~UnicodeSet() { 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _dbgdt(this); // first! 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(list); 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bmpSet; 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (buffer) { 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(buffer); 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete strings; 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete stringSpan; 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Assigns this object to be a copy of another. 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) { 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (this == &o) { 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (isFrozen()) { 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (o.isBogus()) { 280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; 282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ensureCapacity(o.len, ec); 285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(ec)) { 286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; // There is no way to report this error :-( 287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = o.len; 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(list, o.list, len*sizeof(UChar32)); 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (o.bmpSet == NULL) { 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bmpSet = NULL; 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bmpSet = new BMPSet(*o.bmpSet, list, len); 294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (bmpSet == NULL) { // Check for memory allocation error. 295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; 297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (strings != NULL && o.strings != NULL) { 300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strings->assign(*o.strings, cloneUnicodeString, ec); 301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // Invalid strings. 302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (o.stringSpan == NULL) { 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stringSpan = NULL; 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings); 309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (stringSpan == NULL) { // Check for memory allocation error. 310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; 312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (o.pat) { 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setPattern(UnicodeString(o.pat, o.patLen)); 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a copy of this object. All UnicodeMatcher objects have 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to support cloning in order to allow classes using 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * UnicodeMatchers, such as Transliterator, to implement cloning. 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeFunctor* UnicodeSet::clone() const { 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return new UnicodeSet(*this); 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeFunctor *UnicodeSet::cloneAsThawed() const { 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return new UnicodeSet(*this, TRUE); 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Compares the specified object with this set for equality. Returns 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <tt>true</tt> if the two sets 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * have the same size, and every member of the specified set is 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * contained in this set (or equivalently, every member of this set is 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * contained in the specified set). 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param o set to be compared for equality with this set. 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return <tt>true</tt> if the specified set is equal to this set. 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::operator==(const UnicodeSet& o) const { 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (len != o.len) return FALSE; 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < len; ++i) { 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (list[i] != o.list[i]) return FALSE; 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*strings != *o.strings) return FALSE; 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the hash code value for this set. 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the hash code value for this set. 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see Object#hashCode() 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::hashCode(void) const { 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result = len; 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < len; ++i) { 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result *= 1000003; 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result += list[i]; 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Public API 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the number of elements in this set (its cardinality), 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Note than the elements of a set may include both individual 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * codepoints and strings. 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the number of elements in this set (its cardinality). 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::size(void) const { 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t n = 0; 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count = getRangeCount(); 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < count; ++i) { 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n += getRangeEnd(i) - getRangeStart(i) + 1; 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return n + strings->size(); 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns <tt>true</tt> if this set contains no elements. 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return <tt>true</tt> if this set contains no elements. 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::isEmpty(void) const { 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return len == 1 && strings->size() == 0; 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains the given character. 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c character to be checked for containment 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::contains(UChar32 c) const { 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Set i to the index of the start item greater than ch 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We know we will terminate without length test! 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LATER: for large sets, add binary search 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //int32_t i = -1; 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //for (;;) { 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if (c < list[++i]) break; 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (bmpSet != NULL) { 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return bmpSet->contains(c); 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (stringSpan != NULL) { 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return stringSpan->contains(c); 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c >= UNICODESET_HIGH) { // Don't need to check LOW bound 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = findCodePoint(c); 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UBool)(i & 1); // return true if odd 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the smallest value i such that c < list[i]. Caller 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * must ensure that c is a legal value or this method will enter 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * an infinite loop. This method performs a binary search. 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c a character in the range MIN_VALUE..MAX_VALUE 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * inclusive 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the smallest integer i in the range 0..len-1, 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * inclusive, such that c < list[i] 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::findCodePoint(UChar32 c) const { 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Examples: 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru findCodePoint(c) 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set list[] c=0 1 3 4 7 8 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru === ============== =========== 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru [] [110000] 0 0 0 0 0 0 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru [:Any:] [0, 110000] 1 1 1 1 1 1 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Return the smallest i such that c < list[i]. Assume 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // list[len - 1] == HIGH and that c is legal (0..HIGH-1). 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < list[0]) 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // High runner test. c is often after the last range, so an 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // initial check for this condition pays off. 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t lo = 0; 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t hi = len - 1; 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lo >= hi || c >= list[hi-1]) 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return hi; 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // invariant: c >= list[lo] 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // invariant: c < list[hi] 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = (lo + hi) >> 1; 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i == lo) { 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; // Found! 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (c < list[i]) { 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hi = i; 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lo = i; 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return hi; 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains every character 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given range. 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of the range 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of the range 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::contains(UChar32 start, UChar32 end) const { 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //int32_t i = -1; 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //for (;;) { 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if (start < list[++i]) break; 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = findCodePoint(start); 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ((i & 1) != 0 && end < list[i]); 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns <tt>true</tt> if this set contains the given 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * multicharacter string. 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s string to be checked for containment 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return <tt>true</tt> if this set contains the specified string 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::contains(const UnicodeString& s) const { 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s.length() == 0) return FALSE; 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t cp = getSingleCP(s); 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cp < 0) { 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return strings->contains((void*) &s); 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return contains((UChar32) cp); 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains all the characters and strings 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given set. 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set to be checked for containment 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsAll(const UnicodeSet& c) const { 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The specified set is a subset if all of its pairs are contained in 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // this set. It's possible to code this more efficiently in terms of 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // direct manipulation of the inversion lists if the need arises. 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t n = c.getRangeCount(); 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int i=0; i<n; ++i) { 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!contains(c.getRangeStart(i), c.getRangeEnd(i))) { 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!strings->containsAll(*c.strings)) return FALSE; 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains all the characters 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given string. 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s string containing characters to be checked for containment 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsAll(const UnicodeString& s) const { 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_CONTAINED) == 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.length()); 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains none of the characters 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given range. 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of the range 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of the range 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsNone(UChar32 start, UChar32 end) const { 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //int32_t i = -1; 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //for (;;) { 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if (start < list[++i]) break; 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = findCodePoint(start); 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ((i & 1) == 0 && end < list[i]); 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains none of the characters and strings 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given set. 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set to be checked for containment 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsNone(const UnicodeSet& c) const { 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The specified set is a subset if all of its pairs are contained in 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // this set. It's possible to code this more efficiently in terms of 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // direct manipulation of the inversion lists if the need arises. 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t n = c.getRangeCount(); 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<n; ++i) { 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!containsNone(c.getRangeStart(i), c.getRangeEnd(i))) { 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!strings->containsNone(*c.strings)) return FALSE; 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains none of the characters 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given string. 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s string containing characters to be checked for containment 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsNone(const UnicodeString& s) const { 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_NOT_CONTAINED) == 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.length()); 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns <tt>true</tt> if this set contains any character whose low byte 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is the given value. This is used by <tt>RuleBasedTransliterator</tt> for 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * indexing. 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::matchesIndexValue(uint8_t v) const { 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* The index value v, in the range [0,255], is contained in this set if 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * it is contained in any pair of this set. Pairs either have the high 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * bytes equal, or unequal. If the high bytes are equal, then we have 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * aaxx..aayy, where aa is the high byte. Then v is contained if xx <= 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * v <= yy. If the high bytes are unequal we have aaxx..bbyy, bb>aa. 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Then v is contained if xx <= v || v <= yy. (This is identical to the 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * time zone month containment logic.) 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t rangeCount=getRangeCount(); 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<rangeCount; ++i) { 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 low = getRangeStart(i); 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 high = getRangeEnd(i); 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((low & ~0xFF) == (high & ~0xFF)) { 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((low & 0xFF) <= v && v <= (high & 0xFF)) { 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if ((low & 0xFF) <= v || v <= (high & 0xFF)) { 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (strings->size() != 0) { 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<strings->size(); ++i) { 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i); 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if (s.length() == 0) { 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // // Empty strings match everything 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // return TRUE; 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // assert(s.length() != 0); // We enforce this elsewhere 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c = s.char32At(0); 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((c & 0xFF) == v) { 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Implementation of UnicodeMatcher::matches(). Always matches the 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * longest possible multichar string. 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUMatchDegree UnicodeSet::matches(const Replaceable& text, 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t& offset, 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t limit, 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool incremental) { 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (offset == limit) { 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Strings, if any, have length != 0, so we don't worry 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // about them here. If we ever allow zero-length strings 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we much check for them here. 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (contains(U_ETHER)) { 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return incremental ? U_PARTIAL_MATCH : U_MATCH; 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return U_MISMATCH; 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (strings->size() != 0) { // try strings first 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // might separate forward and backward loops later 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for now they are combined 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO Improve efficiency of this, at least in the forward 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // direction, if not in both. In the forward direction we 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // can assume the strings are sorted. 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool forward = offset < limit; 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // firstChar is the leftmost char to match in the 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // forward direction or the rightmost char to match in 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the reverse direction. 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar firstChar = text.charAt(offset); 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If there are multiple strings that can match we 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // return the longest match. 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t highWaterLength = 0; 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<strings->size(); ++i) { 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i); 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if (trial.length() == 0) { 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // return U_MATCH; // null-string always matches 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // assert(trial.length() != 0); // We ensure this elsewhere 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c = trial.charAt(forward ? 0 : trial.length() - 1); 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Strings are sorted, so we can optimize in the 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // forward direction. 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (forward && c > firstChar) break; 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c != firstChar) continue; 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t matchLen = matchRest(text, offset, limit, trial); 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (incremental) { 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t maxLen = forward ? limit-offset : offset-limit; 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (matchLen == maxLen) { 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We have successfully matched but only up to limit. 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return U_PARTIAL_MATCH; 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (matchLen == trial.length()) { 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We have successfully matched the whole string. 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (matchLen > highWaterLength) { 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru highWaterLength = matchLen; 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // In the forward direction we know strings 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // are sorted so we can bail early. 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (forward && matchLen < highWaterLength) { 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We've checked all strings without a partial match. 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we have full matches, return the longest one. 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (highWaterLength != 0) { 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru offset += forward ? highWaterLength : -highWaterLength; 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return U_MATCH; 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UnicodeFilter::matches(text, offset, limit, incremental); 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the longest match for s in text at the given position. 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If limit > start then match forward from start+1 to limit 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * matching all characters except s.charAt(0). If limit < start, 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * go backward starting from start-1 matching all characters 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * except s.charAt(s.length()-1). This method assumes that the 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * first character, text.charAt(start), matches s, so it does not 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * check it. 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param text the text to match 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start the first character to match. In the forward 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * direction, text.charAt(start) is matched against s.charAt(0). 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * In the reverse direction, it is matched against 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * s.charAt(s.length()-1). 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param limit the limit offset for matching, either last+1 in 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the forward direction, or last-1 in the reverse direction, 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * where last is the index of the last character to match. 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return If part of s matches up to the limit, return |limit - 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * start|. If all of s matches before reaching the limit, return 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * s.length(). If there is a mismatch between s and text, return 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::matchRest(const Replaceable& text, 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start, int32_t limit, 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& s) { 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t maxLen; 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t slen = s.length(); 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (start < limit) { 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxLen = limit - start; 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxLen > slen) maxLen = slen; 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 1; i < maxLen; ++i) { 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (text.charAt(start + i) != s.charAt(i)) return 0; 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxLen = start - limit; 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxLen > slen) maxLen = slen; 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --slen; // <=> slen = s.length() - 1; 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 1; i < maxLen; ++i) { 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (text.charAt(start - i) != s.charAt(slen - i)) return 0; 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return maxLen; 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Implement of UnicodeMatcher 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::addMatchSetTo(UnicodeSet& toUnionTo) const { 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru toUnionTo.addAll(*this); 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the index of the given character within this set, where 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the set is ordered by ascending code point. If the character 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is not in this set, return -1. The inverse of this method is 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>charAt()</code>. 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return an index from 0..size()-1, or -1 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::indexOf(UChar32 c) const { 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < MIN_VALUE || c > MAX_VALUE) { 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = 0; 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t n = 0; 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 start = list[i++]; 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < start) { 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 limit = list[i++]; 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < limit) { 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return n + c - start; 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n += limit - start; 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the character at the given index within this set, where 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the set is ordered by ascending code point. If the index is 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * out of range, return (UChar32)-1. The inverse of this method is 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>indexOf()</code>. 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param index an index from 0..size()-1 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the character at the given index, or (UChar32)-1. 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 UnicodeSet::charAt(int32_t index) const { 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (index >= 0) { 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // len2 is the largest even integer <= len, that is, it is len 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for even values and len-1 for odd values. With odd values 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the last entry is UNICODESET_HIGH. 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t len2 = len & ~1; 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i < len2;) { 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 start = list[i++]; 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count = list[i++] - start; 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (index < count) { 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UChar32)(start + index); 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index -= count; 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UChar32)-1; 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Make this object represent the range <code>start - end</code>. 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If <code>end > start</code> then this object is set to an 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * an empty range. 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character in the set, inclusive 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @rparam end last character in the set, inclusive 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) { 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru clear(); 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complement(start, end); 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds the specified range to this set if it is not already 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * present. If this set already contains the specified range, 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the call leaves this set unchanged. If <code>end > start</code> 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then an empty range is added, leaving the set unchanged. 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range to be added 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this set. 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range to be added 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this set. 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::add(UChar32 start, UChar32 end) { 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pinCodePoint(start) < pinCodePoint(end)) { 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru add(range, 2, 0); 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (start == end) { 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru add(start); 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// #define DEBUG_US_ADD 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef DEBUG_US_ADD 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h> 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid dump(UChar32 c) { 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c <= 0xFF) { 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%c", (char)c); 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("U+%04X", c); 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid dump(const UChar32* list, int32_t len) { 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("["); 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<len; ++i) { 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i != 0) printf(", "); 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dump(list[i]); 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("]"); 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds the specified character to this set if it is not already 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * present. If this set already contains the specified character, 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the call leaves this set unchanged. 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::add(UChar32 c) { 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find smallest i such that c < list[i] 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if odd, then it is IN the set 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if even, then it is OUT of the set 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = findCodePoint(pinCodePoint(c)); 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // already in set? 882c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((i & 1) != 0 || isFrozen() || isBogus()) return *this; 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // HIGH is 0x110000 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // assert(list[len-1] == HIGH); 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // empty = [HIGH] 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // [start_0, limit_0, start_1, limit_1, HIGH] 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH] 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ^ 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // list[i] 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // i == 0 means c is before the first range 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef DEBUG_US_ADD 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("Add of "); 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dump(c); 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf(" found at %d", i); 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf(": "); 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dump(list, len); 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf(" => "); 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == list[i]-1) { 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // c is before start of next range 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list[i] = c; 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if we touched the HIGH mark, then add a new one 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == (UNICODESET_HIGH - 1)) { 910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ensureCapacity(len+1, status); 912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; // There is no way to report this error :-( 914c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list[len++] = UNICODESET_HIGH; 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i > 0 && c == list[i-1]) { 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // collapse adjacent ranges 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // [..., start_k-1, c, c, limit_k, ..., HIGH] 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ^ 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // list[i] 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //for (int32_t k=i-1; k<len-2; ++k) { 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // list[k] = list[k+2]; 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32* dst = list + i - 1; 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32* src = dst + 2; 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32* srclimit = list + len; 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (src < srclimit) *(dst++) = *(src++); 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len -= 2; 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (i > 0 && c == list[i-1]) { 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // c is after end of prior range 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list[i-1]++; 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // no need to check for collapse here 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // At this point we know the new char is not adjacent to 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // any existing ranges, and it is not 10FFFF. 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH] 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ^ 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // list[i] 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH] 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ^ 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // list[i] 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ensureCapacity(len+2, status); 957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; // There is no way to report this error :-( 959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //for (int32_t k=len-1; k>=i; --k) { 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // list[k+2] = list[k]; 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32* src = list + len; 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32* dst = src + 2; 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32* srclimit = list + i; 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (src > srclimit) *(--dst) = *(--src); 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list[i] = c; 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list[i+1] = c+1; 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len += 2; 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef DEBUG_US_ADD 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dump(list, len); 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("\n"); 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=1; i<len; ++i) { 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (list[i] <= list[i-1]) { 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Corrupt array! 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("ERROR: list has been corrupted\n"); 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exit(1); 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds the specified multicharacter to this set if it is not already 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * present. If this set already contains the multicharacter, 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the call leaves this set unchanged. 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Thus "ch" => {"ch"} 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s the source string 998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::add(const UnicodeString& s) { 1001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (s.length() == 0 || isFrozen() || isBogus()) return *this; 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t cp = getSingleCP(s); 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cp < 0) { 1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!strings->contains((void*) &s)) { 1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _add(s); 1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru add((UChar32)cp); 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds the given string, in order, to 'strings'. The given string 1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * must have been checked by the caller to not be empty and to not 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * already be in 'strings'. 1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::_add(const UnicodeString& s) { 1020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString* t = new UnicodeString(s); 1024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (t == NULL) { // Check for memory allocation error. 1025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 1026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 1027c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strings->sortedInsert(t, compareUnicodeString, ec); 1030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(ec)) { 1031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 1032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete t; 1033c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a code point IF the string consists of a single one. 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * otherwise returns -1. 1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param string to test 1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::getSingleCP(const UnicodeString& s) { 1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if (s.length() < 1) { 1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet"); 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s.length() > 2) return -1; 1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s.length() == 1) return s.charAt(0); 1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // at this point, len = 2 1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 cp = s.char32At(0); 1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cp > 0xFFFF) { // is surrogate pair 1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return cp; 1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this set already any particular character, it has no effect on that character. 1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string 1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining 1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::addAll(const UnicodeString& s) { 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 cp; 1064103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) { 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp = s.char32At(i); 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru add(cp); 1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"} 1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this set already any particular character, it has no effect on that character. 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string 1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::retainAll(const UnicodeString& s) { 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set; 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(s); 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retainAll(set); 1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"} 1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this set already any particular character, it has no effect on that character. 1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string 1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining 1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complementAll(const UnicodeString& s) { 1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set; 1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(s); 1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complementAll(set); 1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"} 1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this set already any particular character, it has no effect on that character. 1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string 1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining 1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::removeAll(const UnicodeString& s) { 1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set; 1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(s); 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru removeAll(set); 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUnicodeSet& UnicodeSet::removeAllStrings() { 1111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strings->removeAllElements(); 1112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; 1113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Makes a set from a multicharacter string. Thus "ch" => {"ch"} 1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> 1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string 1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a newly created set containing the given string 1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet* U_EXPORT2 UnicodeSet::createFrom(const UnicodeString& s) { 1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *set = new UnicodeSet(); 1124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (set != NULL) { // Check for memory allocation error. 1125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru set->add(s); 1126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return set; 1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"} 1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string 1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a newly created set containing the given characters 1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet* U_EXPORT2 UnicodeSet::createFromAll(const UnicodeString& s) { 1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *set = new UnicodeSet(); 1138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (set != NULL) { // Check for memory allocation error. 1139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru set->addAll(s); 1140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return set; 1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Retain only the elements in this set that are contained in the 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified range. If <code>end > start</code> then an empty range is 1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * retained, leaving the set empty. 1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range to be retained 1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this set. 1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range to be retained 1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this set. 1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::retain(UChar32 start, UChar32 end) { 1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pinCodePoint(start) <= pinCodePoint(end)) { 1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; 1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retain(range, 2, 0); 1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru clear(); 1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::retain(UChar32 c) { 1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return retain(c, c); 1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes the specified range from this set if it is present. 1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set will not contain the specified range once the call 1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns. If <code>end > start</code> then an empty range is 1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * removed, leaving the set unchanged. 1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range to be removed 1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from this set. 1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range to be removed 1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from this set. 1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::remove(UChar32 start, UChar32 end) { 1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pinCodePoint(start) <= pinCodePoint(end)) { 1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; 1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retain(range, 2, 2); 1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes the specified character from this set if it is present. 1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set will not contain the specified range once the call 1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns. 1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::remove(UChar32 c) { 1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return remove(c, c); 1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes the specified string from this set if it is present. 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set will not contain the specified character once the call 1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns. 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining 1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::remove(const UnicodeString& s) { 1204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (s.length() == 0 || isFrozen() || isBogus()) return *this; 1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t cp = getSingleCP(s); 1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cp < 0) { 1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strings->removeElement((void*) &s); 1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru remove((UChar32)cp, (UChar32)cp); 1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Complements the specified range in this set. Any character in 1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the range will be removed if it is in this set, or will be 1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * added if it is not in this set. If <code>end > start</code> 1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then an empty range is xor'ed, leaving the set unchanged. 1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range to be removed 1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from this set. 1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range to be removed 1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from this set. 1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complement(UChar32 start, UChar32 end) { 1227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pinCodePoint(start) <= pinCodePoint(end)) { 1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; 1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exclusiveOr(range, 2, 0); 1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complement(UChar32 c) { 1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return complement(c, c); 1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is equivalent to 1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>complement(MIN_VALUE, MAX_VALUE)</code>. 1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complement(void) { 1247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1250c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (list[0] == UNICODESET_LOW) { 1252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ensureBufferCapacity(len-1, status); 1253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 1254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; 1255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(buffer, list + 1, (len-1)*sizeof(UChar32)); 1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --len; 1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ensureBufferCapacity(len+1, status); 1260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 1261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; 1262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(buffer + 1, list, len*sizeof(UChar32)); 1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[0] = UNICODESET_LOW; 1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++len; 1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru swapBuffers(); 1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Complement the specified string in this set. 1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set will not contain the specified string once the call 1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns. 1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> 1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s the string to complement 1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return this object, for chaining 1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complement(const UnicodeString& s) { 1281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (s.length() == 0 || isFrozen() || isBogus()) return *this; 1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t cp = getSingleCP(s); 1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cp < 0) { 1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (strings->contains((void*) &s)) { 1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strings->removeElement((void*) &s); 1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _add(s); 1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complement((UChar32)cp, (UChar32)cp); 1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds all of the elements in the specified set to this set if 1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * they're not already present. This operation effectively 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * modifies this set so that its value is the <i>union</i> of the two 1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * sets. The behavior of this operation is unspecified if the specified 1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * collection is modified while the operation is in progress. 1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set whose elements are to be added to this set. 1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #add(char, char) 1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::addAll(const UnicodeSet& c) { 1307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ( c.len>0 && c.list!=NULL ) { 1308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru add(c.list, c.len, 0); 1309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Add strings in order 1312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ( c.strings!=NULL ) { 1313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (int32_t i=0; i<c.strings->size(); ++i) { 1314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UnicodeString* s = (const UnicodeString*)c.strings->elementAt(i); 1315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!strings->contains((void*) s)) { 1316c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru _add(*s); 1317c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Retains only the elements in this set that are contained in the 1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified set. In other words, removes from this set all of 1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * its elements that are not contained in the specified set. This 1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * operation effectively modifies this set so that its value is 1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the <i>intersection</i> of the two sets. 1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set that defines which elements this set will retain. 1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) { 1333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retain(c.list, c.len, 0); 1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strings->retainAll(*c.strings); 1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes from this set all of its elements that are contained in the 1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified set. This operation effectively modifies this 1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * set so that its value is the <i>asymmetric set difference</i> of 1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the two sets. 1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set that defines which elements will be removed from 1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * this set. 1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::removeAll(const UnicodeSet& c) { 1351c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retain(c.list, c.len, 2); 1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strings->removeAll(*c.strings); 1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Complements in this set all elements contained in the specified 1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * set. Any character in the other set will be removed if it is 1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * in this set, or will be added if it is not in this set. 1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set that defines which elements will be xor'ed from 1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * this set. 1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) { 1368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exclusiveOr(c.list, c.len, 0); 1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<c.strings->size(); ++i) { 1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void* e = c.strings->elementAt(i); 1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!strings->removeElement(e)) { 1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _add(*(const UnicodeString*)e); 1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes all of the elements from this set. This set will be 1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * empty after this call returns. 1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::clear(void) { 1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (isFrozen()) { 1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (list != NULL) { 1391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru list[0] = UNICODESET_HIGH; 1392c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = 1; 1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (strings != NULL) { 1396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strings->removeAllElements(); 1397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (list != NULL && strings != NULL) { 1399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Remove bogus 1400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFlags = 0; 1401c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Iteration method that returns the number of ranges contained in 1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * this set. 1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeStart 1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeEnd 1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::getRangeCount() const { 1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return len/2; 1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Iteration method that returns the first character in the 1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified range of this set. 1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeCount 1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeEnd 1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 UnicodeSet::getRangeStart(int32_t index) const { 1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return list[index*2]; 1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Iteration method that returns the last character in the 1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified range of this set. 1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeStart 1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeEnd 1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 UnicodeSet::getRangeEnd(int32_t index) const { 1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return list[index*2 + 1] - 1; 1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::getStringCount() const { 1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return strings->size(); 1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UnicodeString* UnicodeSet::getString(int32_t index) const { 1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (const UnicodeString*) strings->elementAt(index); 1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Reallocate this objects internal structures to take up the least 1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * possible space, without changing this object's value. 1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::compact() { 1448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Delete buffer first to defragment memory less. 1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (buffer != NULL) { 1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(buffer); 1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer = NULL; 1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (len < capacity) { 1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Make the capacity equal to len or 1. 1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We don't want to realloc of 0 size. 1459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t newCapacity = len + (len == 0); 1460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * newCapacity); 1461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (temp) { 1462c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru list = temp; 1463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru capacity = newCapacity; 1464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // else what the heck happened?! We allocated less memory! 1466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Oh well. We'll keep our original array. 1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const { 1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t bmpLength, length, destLength; 1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (destCapacity<0 || (destCapacity>0 && dest==NULL)) { 1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec=U_ILLEGAL_ARGUMENT_ERROR; 1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* count necessary 16-bit units */ 1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=this->len-1; // Subtract 1 to ignore final UNICODESET_HIGH 1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // assert(length>=0); 1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (length==0) { 1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* empty set */ 1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (destCapacity>0) { 1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest=0; 1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec=U_BUFFER_OVERFLOW_ERROR; 1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 1; 1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* now length>0 */ 1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (this->list[length-1]<=0xffff) { 1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* all BMP */ 1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bmpLength=length; 1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (this->list[0]>=0x10000) { 1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* all supplementary */ 1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bmpLength=0; 1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length*=2; 1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* some BMP, some supplementary */ 1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (bmpLength=0; bmpLength<length && this->list[bmpLength]<=0xffff; ++bmpLength) {} 1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=bmpLength+2*(length-bmpLength); 1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* length: number of 16-bit array units */ 1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (length>0x7fff) { 1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* there are only 15 bits for the length in the first serialized word */ 1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec=U_INDEX_OUTOFBOUNDS_ERROR; 1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * total serialized length: 1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * number of 16-bit array units (length) + 1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1 length unit (always) + 1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1 bmpLength unit (if there are supplementary values) 1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=length+((length>bmpLength)?2:1); 1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (destLength<=destCapacity) { 1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar32 *p; 1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest=(uint16_t)length; 1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (length>bmpLength) { 1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest|=0x8000; 1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *++dest=(uint16_t)bmpLength; 1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++dest; 1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* write the BMP part of the array */ 1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p=this->list; 1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<bmpLength; ++i) { 1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest++=(uint16_t)*p++; 1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* write the supplementary part of the array */ 1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (; i<length; i+=2) { 1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest++=(uint16_t)(*p>>16); 1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest++=(uint16_t)*p++; 1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec=U_BUFFER_OVERFLOW_ERROR; 1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destLength; 1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Implementation: Utility methods 1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Allocate our strings vector and return TRUE if successful. 1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::allocateStrings(UErrorCode &status) { 1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1563103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius strings = new UVector(uprv_deleteUObject, 1564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uhash_compareUnicodeString, 1, status); 1565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (strings == NULL) { // Check for memory allocation error. 1566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 1567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 1568c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete strings; 1571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strings = NULL; 1572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 1573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid UnicodeSet::ensureCapacity(int32_t newLen, UErrorCode& ec) { 1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (newLen <= capacity) 1579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1580c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * (newLen + GROW_EXTRA)); 1581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (temp == NULL) { 1582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ec = U_MEMORY_ALLOCATION_ERROR; 1583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 1584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 1585c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list = temp; 1587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru capacity = newLen + GROW_EXTRA; 1588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // else we keep the original contents on the memory failure. 1589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid UnicodeSet::ensureBufferCapacity(int32_t newLen, UErrorCode& ec) { 1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (buffer != NULL && newLen <= bufferCapacity) 1593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32* temp = (UChar32*) uprv_realloc(buffer, sizeof(UChar32) * (newLen + GROW_EXTRA)); 1595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (temp == NULL) { 1596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ec = U_MEMORY_ALLOCATION_ERROR; 1597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 1598c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 1599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru buffer = temp; 1601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufferCapacity = newLen + GROW_EXTRA; 1602c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // else we keep the original contents on the memory failure. 1603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Swap list and buffer. 1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::swapBuffers(void) { 1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // swap list and buffer 1610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32* temp = list; 1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list = buffer; 1612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer = temp; 1613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c = capacity; 1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru capacity = bufferCapacity; 1616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufferCapacity = c; 1617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid UnicodeSet::setToBogus() { 1620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru clear(); // Remove everything in the set. 1621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFlags = kIsBogus; 1622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1623c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Implementation: Fundamental operators 1626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 1627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UChar32 max(UChar32 a, UChar32 b) { 1629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (a > b) ? a : b; 1630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 0, 3 is normal: x xor y 1633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 1, 2: x xor ~y == x === y 1634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity) { 1636c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ensureBufferCapacity(len + otherLen, status); 1641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 1642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 1643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1644c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = 0, j = 0, k = 0; 1646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 a = list[i++]; 1647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 b; 1648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (polarity == 1 || polarity == 2) { 1649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = UNICODESET_LOW; 1650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (other[j] == UNICODESET_LOW) { // skip base if already LOW 1651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++j; 1652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j]; 1653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // simplest of all the routines 1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // sort the values, discarding identicals! 1659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 1660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a < b) { 1661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; 1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (b < a) { 1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = b; 1665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (a != UNICODESET_HIGH) { // at this point, a == b 1667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // discard both values! 1668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // DONE! 1671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = UNICODESET_HIGH; 1672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = k; 1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru swapBuffers(); 1677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 0 is normal: x union y 1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 2: x union ~y 1682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 1: ~x union y 1683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 3: ~x union ~y 1684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) { 1686c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus() || other==NULL) { 1687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ensureBufferCapacity(len + otherLen, status); 1691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 1692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 1693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1694c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = 0, j = 0, k = 0; 1696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 a = list[i++]; 1697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 b = other[j++]; 1698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // change from xor is that we have to check overlapping pairs 1699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // polarity bit 1 means a is second, bit 2 means b is. 1700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 1701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (polarity) { 1702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0: // both first; take lower if unequal 1703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a < b) { // take a 1704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Back up over overlapping ranges in buffer[] 1705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (k > 0 && a <= buffer[k-1]) { 1706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Pick latter end value in buffer[] vs. list[] 1707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = max(list[i], buffer[--k]); 1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // No overlap 1710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; 1711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i]; 1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i++; // Common if/else code factored out 1714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (b < a) { // take b 1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (k > 0 && b <= buffer[k-1]) { 1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = max(other[j], buffer[--k]); 1718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = b; 1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j]; 1721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru j++; 1723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // a == b, take a, drop b 1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a == UNICODESET_HIGH) goto loop_end; 1726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This is symmetrical; it doesn't matter if 1727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we backtrack with a or b. - liu 1728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (k > 0 && a <= buffer[k-1]) { 1729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = max(list[i], buffer[--k]); 1730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // No overlap 1732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; 1733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i]; 1734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i++; 1736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: // both second; take higher if unequal, and drop other 1742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (b <= a) { // take a 1743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a == UNICODESET_HIGH) goto loop_end; 1744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; 1745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // take b 1746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (b == UNICODESET_HIGH) goto loop_end; 1747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = b; 1748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; // factored common code 1751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: // a second, b first; if b < a, overlap 1755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a < b) { // no overlap, take a 1756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; a = list[i++]; polarity ^= 1; 1757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (b < a) { // OVERLAP, drop b 1758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // a == b, drop both! 1761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a == UNICODESET_HIGH) goto loop_end; 1762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: // a first, b second; if a < b, overlap 1769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (b < a) { // no overlap, take b 1770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = b; 1771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (a < b) { // OVERLAP, drop a 1774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // a == b, drop both! 1777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a == UNICODESET_HIGH) goto loop_end; 1778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru loop_end: 1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = UNICODESET_HIGH; // terminate 1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = k; 1789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru swapBuffers(); 1790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 0 is normal: x intersect y 1794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 2: x intersect ~y == set-minus 1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 1: ~x intersect y 1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 3: ~x intersect ~y 1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity) { 1799c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ensureBufferCapacity(len + otherLen, status); 1804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 1805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 1806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1807c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = 0, j = 0, k = 0; 1809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 a = list[i++]; 1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 b = other[j++]; 1811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // change from xor is that we have to check overlapping pairs 1812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // polarity bit 1 means a is second, bit 2 means b is. 1813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 1814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (polarity) { 1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0: // both first; drop the smaller 1816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a < b) { // drop a 1817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (b < a) { // drop b 1820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // a == b, take one, drop other 1823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a == UNICODESET_HIGH) goto loop_end; 1824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; 1825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: // both second; take lower if unequal 1832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a < b) { // take a 1833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; 1834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (b < a) { // take b 1837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = b; 1838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // a == b, take one, drop other 1841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a == UNICODESET_HIGH) goto loop_end; 1842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; 1843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: // a second, b first; 1850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a < b) { // NO OVERLAP, drop a 1851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (b < a) { // OVERLAP, take b 1854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = b; 1855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // a == b, drop both! 1858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a == UNICODESET_HIGH) goto loop_end; 1859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: // a first, b second; if a < b, overlap 1866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (b < a) { // no overlap, drop b 1867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (a < b) { // OVERLAP, take a 1870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; 1871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // a == b, drop both! 1874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a == UNICODESET_HIGH) goto loop_end; 1875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru loop_end: 1884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = UNICODESET_HIGH; // terminate 1885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = k; 1886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru swapBuffers(); 1887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Append the <code>toPattern()</code> representation of a 1892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * string to the given <code>StringBuffer</code>. 1893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::_appendToPat(UnicodeString& buf, const UnicodeString& s, UBool 1895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruescapeUnprintable) { 1896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 cp; 1897103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) { 1898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _appendToPat(buf, cp = s.char32At(i), escapeUnprintable); 1899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Append the <code>toPattern()</code> representation of a 1904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * character to the given <code>StringBuffer</code>. 1905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool 1907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruescapeUnprintable) { 1908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (escapeUnprintable && ICU_Utility::isUnprintable(c)) { 1909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything 1910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // unprintable 1911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ICU_Utility::escapeUnprintable(buf, c)) { 1912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Okay to let ':' pass through 1916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (c) { 1917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case SET_OPEN: 1918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case SET_CLOSE: 1919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case HYPHEN: 1920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case COMPLEMENT: 1921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case INTERSECTION: 1922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case BACKSLASH: 1923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case OPEN_BRACE: 1924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case CLOSE_BRACE: 1925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case COLON: 1926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case SymbolTable::SYMBOL_REF: 1927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf.append(BACKSLASH); 1928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 1930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Escape whitespace 1931b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (PatternProps::isWhiteSpace(c)) { 1932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf.append(BACKSLASH); 1933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf.append(c); 1937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Append a string representation of this set to result. This will be 1941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a cleaned version of the string passed to applyPattern(), if there 1942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is one. Otherwise it will be generated. 1943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& UnicodeSet::_toPattern(UnicodeString& result, 1945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool escapeUnprintable) const 1946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pat != NULL) { 1948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 1949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t backslashCount = 0; 1950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<patLen; ) { 1951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 1952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(pat, i, patLen, c); 1953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (escapeUnprintable && ICU_Utility::isUnprintable(c)) { 1954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If the unprintable character is preceded by an odd 1955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // number of backslashes, then it has been escaped. 1956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Before unescaping it, we delete the final 1957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // backslash. 1958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((backslashCount % 2) == 1) { 1959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.truncate(result.length() - 1); 1960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ICU_Utility::escapeUnprintable(result, c); 1962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backslashCount = 0; 1963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.append(c); 1965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == BACKSLASH) { 1966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++backslashCount; 1967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backslashCount = 0; 1969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return _generatePattern(result, escapeUnprintable); 1976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a string representation of this set. If the result of 1980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * calling this function is passed to a UnicodeSet constructor, it 1981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * will produce another set that is equal to this one. 1982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& UnicodeSet::toPattern(UnicodeString& result, 1984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool escapeUnprintable) const 1985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.truncate(0); 1987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return _toPattern(result, escapeUnprintable); 1988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Generate and append a string representation of this set to result. 1992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This does not use this.pat, the cleaned up copy of the string 1993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * passed to applyPattern(). 1994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& UnicodeSet::_generatePattern(UnicodeString& result, 1996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool escapeUnprintable) const 1997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.append(SET_OPEN); 1999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// // Check against the predefined categories. We implicitly build 2001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// // up ALL category sets the first time toPattern() is called. 2002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) { 2003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// if (*this == getCategorySet(cat)) { 2004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// result.append(COLON); 2005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// result.append(CATEGORY_NAMES, cat*2, 2); 2006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// return result.append(CATEGORY_CLOSE); 2007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// } 2008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// } 2009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count = getRangeCount(); 2011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If the set contains at least 2 intervals and includes both 2013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // MIN_VALUE and MAX_VALUE, then the inverse representation will 2014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // be more economical. 2015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (count > 1 && 2016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru getRangeStart(0) == MIN_VALUE && 2017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru getRangeEnd(count-1) == MAX_VALUE) { 2018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Emit the inverse 2020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.append(COMPLEMENT); 2021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 1; i < count; ++i) { 2023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 start = getRangeEnd(i-1)+1; 2024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 end = getRangeStart(i)-1; 2025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _appendToPat(result, start, escapeUnprintable); 2026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (start != end) { 2027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((start+1) != end) { 2028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.append(HYPHEN); 2029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _appendToPat(result, end, escapeUnprintable); 2031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Default; emit the ranges as pairs 2036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 2037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < count; ++i) { 2038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 start = getRangeStart(i); 2039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 end = getRangeEnd(i); 2040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _appendToPat(result, start, escapeUnprintable); 2041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (start != end) { 2042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((start+1) != end) { 2043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.append(HYPHEN); 2044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _appendToPat(result, end, escapeUnprintable); 2046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i<strings->size(); ++i) { 2051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.append(OPEN_BRACE); 2052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _appendToPat(result, 2053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(const UnicodeString*) strings->elementAt(i), 2054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escapeUnprintable); 2055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.append(CLOSE_BRACE); 2056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result.append(SET_CLOSE); 2058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Release existing cached pattern 2062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::releasePattern() { 2064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pat) { 2065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(pat); 2066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat = NULL; 2067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru patLen = 0; 2068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Set the new pattern to cache. 2073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::setPattern(const UnicodeString& newPat) { 2075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 2076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t newPatLen = newPat.length(); 2077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat = (UChar *)uprv_malloc((newPatLen + 1) * sizeof(UChar)); 2078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pat) { 2079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru patLen = newPatLen; 2080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newPat.extractBetween(0, patLen, pat); 2081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat[patLen] = 0; 2082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // else we don't care if malloc failed. This was just a nice cache. 2084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We can regenerate an equivalent pattern later when requested. 2085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeFunctor *UnicodeSet::freeze() { 2088c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isFrozen() && !isBogus()) { 2089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Do most of what compact() does before freezing because 2090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // compact() will not work when the set is frozen. 2091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Small modification: Don't shrink if the savings would be tiny (<=GROW_EXTRA). 2092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Delete buffer first to defragment memory less. 2094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (buffer != NULL) { 2095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(buffer); 2096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer = NULL; 2097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (capacity > (len + GROW_EXTRA)) { 2099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Make the capacity equal to len or 1. 2100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We don't want to realloc of 0 size. 2101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru capacity = len + (len == 0); 2102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list = (UChar32*) uprv_realloc(list, sizeof(UChar32) * capacity); 2103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (list == NULL) { // Check for memory allocation error. 2104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 2105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return this; 2106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Optimize contains() and span() and similar functions. 2110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!strings->isEmpty()) { 2111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stringSpan = new UnicodeSetStringSpan(*this, *strings, UnicodeSetStringSpan::ALL); 2112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (stringSpan != NULL && !stringSpan->needsStringSpanUTF16()) { 2113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // All strings are irrelevant for span() etc. because 2114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // all of each string's code points are contained in this set. 2115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Do not check needsStringSpanUTF8() because UTF-8 has at most as 2116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // many relevant strings as UTF-16. 2117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (Thus needsStringSpanUTF8() implies needsStringSpanUTF16().) 2118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete stringSpan; 2119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stringSpan = NULL; 2120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (stringSpan == NULL) { 2123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // No span-relevant strings: Optimize for code point spans. 2124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bmpSet=new BMPSet(list, len); 2125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (bmpSet == NULL) { // Check for memory allocation error. 2126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 2127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return this; 2131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const { 2134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>0 && bmpSet!=NULL) { 2135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (int32_t)(bmpSet->span(s, s+length, spanCondition)-s); 2136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 2138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=u_strlen(s); 2139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==0) { 2141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(stringSpan!=NULL) { 2144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return stringSpan->span(s, length, spanCondition); 2145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(!strings->isEmpty()) { 2146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? 2147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan::FWD_UTF16_NOT_CONTAINED : 2148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan::FWD_UTF16_CONTAINED; 2149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan strSpan(*this, *strings, which); 2150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(strSpan.needsStringSpanUTF16()) { 2151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return strSpan.span(s, length, spanCondition); 2152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 2156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. 2157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start=0, prev=0; 2161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(s, start, length, c); 2163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=contains(c)) { 2164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=start)<length); 2167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const { 2171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>0 && bmpSet!=NULL) { 2172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (int32_t)(bmpSet->spanBack(s, s+length, spanCondition)-s); 2173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 2175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=u_strlen(s); 2176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==0) { 2178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(stringSpan!=NULL) { 2181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return stringSpan->spanBack(s, length, spanCondition); 2182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(!strings->isEmpty()) { 2183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? 2184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan::BACK_UTF16_NOT_CONTAINED : 2185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan::BACK_UTF16_CONTAINED; 2186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan strSpan(*this, *strings, which); 2187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(strSpan.needsStringSpanUTF16()) { 2188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return strSpan.spanBack(s, length, spanCondition); 2189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 2193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. 2194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=length; 2198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_PREV(s, 0, length, c); 2200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=contains(c)) { 2201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=length)>0); 2204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const { 2208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>0 && bmpSet!=NULL) { 2209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *s0=(const uint8_t *)s; 2210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (int32_t)(bmpSet->spanUTF8(s0, length, spanCondition)-s0); 2211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 221350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho length=(int32_t)uprv_strlen(s); 2214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==0) { 2216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(stringSpan!=NULL) { 2219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return stringSpan->spanUTF8((const uint8_t *)s, length, spanCondition); 2220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(!strings->isEmpty()) { 2221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? 2222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan::FWD_UTF8_NOT_CONTAINED : 2223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan::FWD_UTF8_CONTAINED; 2224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan strSpan(*this, *strings, which); 2225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(strSpan.needsStringSpanUTF8()) { 2226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return strSpan.spanUTF8((const uint8_t *)s, length, spanCondition); 2227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 2231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. 2232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start=0, prev=0; 2236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 22378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius U8_NEXT_OR_FFFD(s, start, length, c); 2238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=contains(c)) { 2239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=start)<length); 2242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const { 2246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>0 && bmpSet!=NULL) { 2247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *s0=(const uint8_t *)s; 2248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return bmpSet->spanBackUTF8(s0, length, spanCondition); 2249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 225150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho length=(int32_t)uprv_strlen(s); 2252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==0) { 2254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(stringSpan!=NULL) { 2257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return stringSpan->spanBackUTF8((const uint8_t *)s, length, spanCondition); 2258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(!strings->isEmpty()) { 2259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? 2260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan::BACK_UTF8_NOT_CONTAINED : 2261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan::BACK_UTF8_CONTAINED; 2262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan strSpan(*this, *strings, which); 2263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(strSpan.needsStringSpanUTF8()) { 2264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return strSpan.spanBackUTF8((const uint8_t *)s, length, spanCondition); 2265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 2269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. 2270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=length; 2274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 22758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius U8_PREV_OR_FFFD(s, 0, length, c); 2276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=contains(c)) { 2277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=length)>0); 2280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 2284