1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru********************************************************************** 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (C) 1999-2011, International Business Machines 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru********************************************************************** 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Date Name Description 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 10/20/99 alan Creation. 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru********************************************************************** 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h" 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/parsepos.h" 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/symtable.h" 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ruleiter.h" 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "patternprops.h" 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uhash.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "util.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "charstr.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustrfmt.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uassert.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "hash.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "bmpset.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unisetspan.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Define UChar constants using hex for EBCDIC compatibility 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Used #define to reduce private static exports and memory access time. 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define SET_OPEN ((UChar)0x005B) /*[*/ 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define SET_CLOSE ((UChar)0x005D) /*]*/ 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define HYPHEN ((UChar)0x002D) /*-*/ 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define COMPLEMENT ((UChar)0x005E) /*^*/ 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define COLON ((UChar)0x003A) /*:*/ 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define BACKSLASH ((UChar)0x005C) /*\*/ 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define INTERSECTION ((UChar)0x0026) /*&*/ 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UPPER_U ((UChar)0x0055) /*U*/ 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LOWER_U ((UChar)0x0075) /*u*/ 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define OPEN_BRACE ((UChar)123) /*{*/ 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define CLOSE_BRACE ((UChar)125) /*}*/ 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UPPER_P ((UChar)0x0050) /*P*/ 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LOWER_P ((UChar)0x0070) /*p*/ 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UPPER_N ((UChar)78) /*N*/ 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define EQUALS ((UChar)0x003D) /*=*/ 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// HIGH_VALUE > all valid values. 110000 for codepoints 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UNICODESET_HIGH 0x0110000 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// LOW <= all valid values. ZERO for codepoints 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UNICODESET_LOW 0x000000 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// initial storage. Must be >= 0 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define START_EXTRA 16 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// extra amount for growth. Must be >= 0 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define GROW_EXTRA START_EXTRA 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruSymbolTable::~SymbolTable() {} 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSet) 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Modify the given UChar32 variable so that it is in range, by 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pinning values < UNICODESET_LOW to UNICODESET_LOW, and 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pinning values > UNICODESET_HIGH-1 to UNICODESET_HIGH-1. 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It modifies its argument in-place and also returns it. 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UChar32 pinCodePoint(UChar32& c) { 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < UNICODESET_LOW) { 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = UNICODESET_LOW; 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (c > (UNICODESET_HIGH-1)) { 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = (UNICODESET_HIGH-1); 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Debugging 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// DO NOT DELETE THIS CODE. This code is used to debug memory leaks. 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// To enable the debugging, define the symbol DEBUG_MEM in the line 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// below. This will result in text being sent to stdout that looks 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// like this: 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// DEBUG UnicodeSet: ct 0x00A39B20; 397 [\u0A81-\u0A83\u0A85- 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// DEBUG UnicodeSet: dt 0x00A39B20; 396 [\u0A81-\u0A83\u0A85- 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Each line lists a construction (ct) or destruction (dt) event, the 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// object address, the number of outstanding objects after the event, 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// and the pattern of the object in question. 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// #define DEBUG_MEM 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef DEBUG_MEM 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h> 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t _dbgCount = 0; 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline void _dbgct(UnicodeSet* set) { 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str; 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set->toPattern(str, TRUE); 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char buf[40]; 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.extract(0, 39, buf, ""); 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("DEBUG UnicodeSet: ct 0x%08X; %d %s\n", set, ++_dbgCount, buf); 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline void _dbgdt(UnicodeSet* set) { 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str; 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set->toPattern(str, TRUE); 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char buf[40]; 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.extract(0, 39, buf, ""); 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("DEBUG UnicodeSet: dt 0x%08X; %d %s\n", set, --_dbgCount, buf); 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#else 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _dbgct(set) 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define _dbgdt(set) 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// UnicodeString in UVector support 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void U_CALLCONV cloneUnicodeString(UHashTok *dst, UHashTok *src) { 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dst->pointer = new UnicodeString(*(UnicodeString*)src->pointer); 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int8_t U_CALLCONV compareUnicodeString(UHashTok t1, UHashTok t2) { 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString &a = *(const UnicodeString*)t1.pointer; 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString &b = *(const UnicodeString*)t2.pointer; 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return a.compare(b); 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Constructors &c 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Constructs an empty set. 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet() : 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len(1), capacity(1 + START_EXTRA), list(0), bmpSet(0), buffer(0), 146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), 147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFlags(0) 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru allocateStrings(status); 151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(list!=NULL){ 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list[0] = UNICODESET_HIGH; 157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // If memory allocation failed, set to bogus state. 158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _dbgct(this); 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Constructs a set containing the given range. If <code>end > 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * start</code> then an empty set is created. 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet(UChar32 start, UChar32 end) : 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len(1), capacity(1 + START_EXTRA), list(0), bmpSet(0), buffer(0), 173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), 174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFlags(0) 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru allocateStrings(status); 178c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 179c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 180c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(list!=NULL){ 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list[0] = UNICODESET_HIGH; 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complement(start, end); 185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // If memory allocation failed, set to bogus state. 186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _dbgct(this); 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Constructs a set that is identical to the given UnicodeSet. 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet(const UnicodeSet& o) : 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeFilter(o), 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len(0), capacity(o.isFrozen() ? o.len : o.len + GROW_EXTRA), list(0), 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bmpSet(0), 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer(0), bufferCapacity(0), 200c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), 201c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFlags(0) 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru allocateStrings(status); 205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 206c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 207c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(list!=NULL){ 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *this = o; 211c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // If memory allocation failed, set to bogus state. 212c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 213c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _dbgct(this); 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Copy-construct as thawed. 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) : 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeFilter(o), 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len(0), capacity(o.len + GROW_EXTRA), list(0), 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bmpSet(0), 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer(0), bufferCapacity(0), 224c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), 225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFlags(0) 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru allocateStrings(status); 229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 231c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(list!=NULL){ 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // *this = o except for bmpSet and stringSpan 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = o.len; 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(list, o.list, len*sizeof(UChar32)); 237c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (strings != NULL && o.strings != NULL) { 238c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strings->assign(*o.strings, cloneUnicodeString, status); 239c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // Invalid strings. 240c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 241c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 242c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (o.pat) { 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setPattern(UnicodeString(o.pat, o.patLen)); 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // If memory allocation failed, set to bogus state. 247c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 248c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _dbgct(this); 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Destructs the set. 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet::~UnicodeSet() { 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _dbgdt(this); // first! 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(list); 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete bmpSet; 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (buffer) { 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(buffer); 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete strings; 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete stringSpan; 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Assigns this object to be a copy of another. 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) { 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (this == &o) { 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (isFrozen()) { 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (o.isBogus()) { 279c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; 281c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 282c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 283c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ensureCapacity(o.len, ec); 284c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(ec)) { 285c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; // There is no way to report this error :-( 286c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = o.len; 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(list, o.list, len*sizeof(UChar32)); 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (o.bmpSet == NULL) { 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bmpSet = NULL; 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bmpSet = new BMPSet(*o.bmpSet, list, len); 293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (bmpSet == NULL) { // Check for memory allocation error. 294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; 296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (strings != NULL && o.strings != NULL) { 299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strings->assign(*o.strings, cloneUnicodeString, ec); 300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { // Invalid strings. 301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (o.stringSpan == NULL) { 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stringSpan = NULL; 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings); 308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (stringSpan == NULL) { // Check for memory allocation error. 309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; 311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (o.pat) { 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setPattern(UnicodeString(o.pat, o.patLen)); 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a copy of this object. All UnicodeMatcher objects have 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to support cloning in order to allow classes using 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * UnicodeMatchers, such as Transliterator, to implement cloning. 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeFunctor* UnicodeSet::clone() const { 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return new UnicodeSet(*this); 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeFunctor *UnicodeSet::cloneAsThawed() const { 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return new UnicodeSet(*this, TRUE); 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Compares the specified object with this set for equality. Returns 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <tt>true</tt> if the two sets 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * have the same size, and every member of the specified set is 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * contained in this set (or equivalently, every member of this set is 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * contained in the specified set). 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param o set to be compared for equality with this set. 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return <tt>true</tt> if the specified set is equal to this set. 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::operator==(const UnicodeSet& o) const { 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (len != o.len) return FALSE; 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < len; ++i) { 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (list[i] != o.list[i]) return FALSE; 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*strings != *o.strings) return FALSE; 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the hash code value for this set. 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the hash code value for this set. 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see Object#hashCode() 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::hashCode(void) const { 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result = len; 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < len; ++i) { 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result *= 1000003; 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result += list[i]; 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Public API 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the number of elements in this set (its cardinality), 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Note than the elements of a set may include both individual 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * codepoints and strings. 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the number of elements in this set (its cardinality). 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::size(void) const { 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t n = 0; 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count = getRangeCount(); 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < count; ++i) { 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n += getRangeEnd(i) - getRangeStart(i) + 1; 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return n + strings->size(); 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns <tt>true</tt> if this set contains no elements. 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return <tt>true</tt> if this set contains no elements. 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::isEmpty(void) const { 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return len == 1 && strings->size() == 0; 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains the given character. 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c character to be checked for containment 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::contains(UChar32 c) const { 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Set i to the index of the start item greater than ch 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We know we will terminate without length test! 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // LATER: for large sets, add binary search 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //int32_t i = -1; 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //for (;;) { 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if (c < list[++i]) break; 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (bmpSet != NULL) { 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return bmpSet->contains(c); 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (stringSpan != NULL) { 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return stringSpan->contains(c); 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c >= UNICODESET_HIGH) { // Don't need to check LOW bound 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = findCodePoint(c); 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UBool)(i & 1); // return true if odd 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the smallest value i such that c < list[i]. Caller 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * must ensure that c is a legal value or this method will enter 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * an infinite loop. This method performs a binary search. 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c a character in the range MIN_VALUE..MAX_VALUE 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * inclusive 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the smallest integer i in the range 0..len-1, 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * inclusive, such that c < list[i] 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::findCodePoint(UChar32 c) const { 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Examples: 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru findCodePoint(c) 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set list[] c=0 1 3 4 7 8 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru === ============== =========== 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru [] [110000] 0 0 0 0 0 0 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru [:Any:] [0, 110000] 1 1 1 1 1 1 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Return the smallest i such that c < list[i]. Assume 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // list[len - 1] == HIGH and that c is legal (0..HIGH-1). 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < list[0]) 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // High runner test. c is often after the last range, so an 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // initial check for this condition pays off. 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t lo = 0; 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t hi = len - 1; 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lo >= hi || c >= list[hi-1]) 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return hi; 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // invariant: c >= list[lo] 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // invariant: c < list[hi] 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = (lo + hi) >> 1; 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i == lo) { 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; // Found! 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (c < list[i]) { 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hi = i; 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lo = i; 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return hi; 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains every character 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given range. 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of the range 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of the range 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::contains(UChar32 start, UChar32 end) const { 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //int32_t i = -1; 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //for (;;) { 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if (start < list[++i]) break; 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = findCodePoint(start); 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ((i & 1) != 0 && end < list[i]); 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns <tt>true</tt> if this set contains the given 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * multicharacter string. 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s string to be checked for containment 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return <tt>true</tt> if this set contains the specified string 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::contains(const UnicodeString& s) const { 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s.length() == 0) return FALSE; 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t cp = getSingleCP(s); 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cp < 0) { 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return strings->contains((void*) &s); 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return contains((UChar32) cp); 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains all the characters and strings 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given set. 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set to be checked for containment 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsAll(const UnicodeSet& c) const { 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The specified set is a subset if all of its pairs are contained in 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // this set. It's possible to code this more efficiently in terms of 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // direct manipulation of the inversion lists if the need arises. 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t n = c.getRangeCount(); 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int i=0; i<n; ++i) { 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!contains(c.getRangeStart(i), c.getRangeEnd(i))) { 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!strings->containsAll(*c.strings)) return FALSE; 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains all the characters 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given string. 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s string containing characters to be checked for containment 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsAll(const UnicodeString& s) const { 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_CONTAINED) == 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.length()); 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains none of the characters 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given range. 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of the range 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of the range 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsNone(UChar32 start, UChar32 end) const { 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //int32_t i = -1; 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //for (;;) { 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if (start < list[++i]) break; 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = findCodePoint(start); 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ((i & 1) == 0 && end < list[i]); 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains none of the characters and strings 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given set. 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set to be checked for containment 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsNone(const UnicodeSet& c) const { 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The specified set is a subset if all of its pairs are contained in 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // this set. It's possible to code this more efficiently in terms of 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // direct manipulation of the inversion lists if the need arises. 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t n = c.getRangeCount(); 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<n; ++i) { 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!containsNone(c.getRangeStart(i), c.getRangeEnd(i))) { 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!strings->containsNone(*c.strings)) return FALSE; 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns true if this set contains none of the characters 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the given string. 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s string containing characters to be checked for containment 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test condition is met 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::containsNone(const UnicodeString& s) const { 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_NOT_CONTAINED) == 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.length()); 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns <tt>true</tt> if this set contains any character whose low byte 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is the given value. This is used by <tt>RuleBasedTransliterator</tt> for 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * indexing. 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::matchesIndexValue(uint8_t v) const { 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* The index value v, in the range [0,255], is contained in this set if 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * it is contained in any pair of this set. Pairs either have the high 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * bytes equal, or unequal. If the high bytes are equal, then we have 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * aaxx..aayy, where aa is the high byte. Then v is contained if xx <= 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * v <= yy. If the high bytes are unequal we have aaxx..bbyy, bb>aa. 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Then v is contained if xx <= v || v <= yy. (This is identical to the 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * time zone month containment logic.) 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t rangeCount=getRangeCount(); 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<rangeCount; ++i) { 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 low = getRangeStart(i); 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 high = getRangeEnd(i); 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((low & ~0xFF) == (high & ~0xFF)) { 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((low & 0xFF) <= v && v <= (high & 0xFF)) { 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if ((low & 0xFF) <= v || v <= (high & 0xFF)) { 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (strings->size() != 0) { 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<strings->size(); ++i) { 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i); 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if (s.length() == 0) { 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // // Empty strings match everything 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // return TRUE; 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // assert(s.length() != 0); // We enforce this elsewhere 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c = s.char32At(0); 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((c & 0xFF) == v) { 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Implementation of UnicodeMatcher::matches(). Always matches the 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * longest possible multichar string. 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUMatchDegree UnicodeSet::matches(const Replaceable& text, 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t& offset, 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t limit, 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool incremental) { 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (offset == limit) { 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Strings, if any, have length != 0, so we don't worry 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // about them here. If we ever allow zero-length strings 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we much check for them here. 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (contains(U_ETHER)) { 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return incremental ? U_PARTIAL_MATCH : U_MATCH; 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return U_MISMATCH; 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (strings->size() != 0) { // try strings first 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // might separate forward and backward loops later 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for now they are combined 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO Improve efficiency of this, at least in the forward 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // direction, if not in both. In the forward direction we 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // can assume the strings are sorted. 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool forward = offset < limit; 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // firstChar is the leftmost char to match in the 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // forward direction or the rightmost char to match in 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the reverse direction. 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar firstChar = text.charAt(offset); 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If there are multiple strings that can match we 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // return the longest match. 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t highWaterLength = 0; 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<strings->size(); ++i) { 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i); 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if (trial.length() == 0) { 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // return U_MATCH; // null-string always matches 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // assert(trial.length() != 0); // We ensure this elsewhere 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c = trial.charAt(forward ? 0 : trial.length() - 1); 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Strings are sorted, so we can optimize in the 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // forward direction. 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (forward && c > firstChar) break; 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c != firstChar) continue; 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t matchLen = matchRest(text, offset, limit, trial); 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (incremental) { 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t maxLen = forward ? limit-offset : offset-limit; 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (matchLen == maxLen) { 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We have successfully matched but only up to limit. 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return U_PARTIAL_MATCH; 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (matchLen == trial.length()) { 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We have successfully matched the whole string. 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (matchLen > highWaterLength) { 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru highWaterLength = matchLen; 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // In the forward direction we know strings 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // are sorted so we can bail early. 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (forward && matchLen < highWaterLength) { 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We've checked all strings without a partial match. 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we have full matches, return the longest one. 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (highWaterLength != 0) { 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru offset += forward ? highWaterLength : -highWaterLength; 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return U_MATCH; 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UnicodeFilter::matches(text, offset, limit, incremental); 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the longest match for s in text at the given position. 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If limit > start then match forward from start+1 to limit 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * matching all characters except s.charAt(0). If limit < start, 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * go backward starting from start-1 matching all characters 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * except s.charAt(s.length()-1). This method assumes that the 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * first character, text.charAt(start), matches s, so it does not 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * check it. 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param text the text to match 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start the first character to match. In the forward 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * direction, text.charAt(start) is matched against s.charAt(0). 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * In the reverse direction, it is matched against 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * s.charAt(s.length()-1). 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param limit the limit offset for matching, either last+1 in 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the forward direction, or last-1 in the reverse direction, 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * where last is the index of the last character to match. 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return If part of s matches up to the limit, return |limit - 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * start|. If all of s matches before reaching the limit, return 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * s.length(). If there is a mismatch between s and text, return 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::matchRest(const Replaceable& text, 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start, int32_t limit, 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& s) { 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t maxLen; 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t slen = s.length(); 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (start < limit) { 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxLen = limit - start; 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxLen > slen) maxLen = slen; 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 1; i < maxLen; ++i) { 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (text.charAt(start + i) != s.charAt(i)) return 0; 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxLen = start - limit; 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxLen > slen) maxLen = slen; 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --slen; // <=> slen = s.length() - 1; 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 1; i < maxLen; ++i) { 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (text.charAt(start - i) != s.charAt(slen - i)) return 0; 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return maxLen; 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Implement of UnicodeMatcher 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::addMatchSetTo(UnicodeSet& toUnionTo) const { 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru toUnionTo.addAll(*this); 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the index of the given character within this set, where 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the set is ordered by ascending code point. If the character 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is not in this set, return -1. The inverse of this method is 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>charAt()</code>. 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return an index from 0..size()-1, or -1 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::indexOf(UChar32 c) const { 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < MIN_VALUE || c > MAX_VALUE) { 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = 0; 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t n = 0; 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 start = list[i++]; 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < start) { 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 limit = list[i++]; 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c < limit) { 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return n + c - start; 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n += limit - start; 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the character at the given index within this set, where 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the set is ordered by ascending code point. If the index is 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * out of range, return (UChar32)-1. The inverse of this method is 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>indexOf()</code>. 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param index an index from 0..size()-1 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the character at the given index, or (UChar32)-1. 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 UnicodeSet::charAt(int32_t index) const { 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (index >= 0) { 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // len2 is the largest even integer <= len, that is, it is len 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for even values and len-1 for odd values. With odd values 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the last entry is UNICODESET_HIGH. 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t len2 = len & ~1; 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i < len2;) { 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 start = list[i++]; 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count = list[i++] - start; 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (index < count) { 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UChar32)(start + index); 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index -= count; 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UChar32)-1; 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Make this object represent the range <code>start - end</code>. 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If <code>end > start</code> then this object is set to an 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * an empty range. 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character in the set, inclusive 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @rparam end last character in the set, inclusive 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) { 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru clear(); 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complement(start, end); 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds the specified range to this set if it is not already 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * present. If this set already contains the specified range, 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the call leaves this set unchanged. If <code>end > start</code> 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then an empty range is added, leaving the set unchanged. 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range to be added 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this set. 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range to be added 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this set. 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::add(UChar32 start, UChar32 end) { 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pinCodePoint(start) < pinCodePoint(end)) { 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru add(range, 2, 0); 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (start == end) { 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru add(start); 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// #define DEBUG_US_ADD 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef DEBUG_US_ADD 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h> 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid dump(UChar32 c) { 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c <= 0xFF) { 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%c", (char)c); 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("U+%04X", c); 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid dump(const UChar32* list, int32_t len) { 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("["); 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<len; ++i) { 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i != 0) printf(", "); 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dump(list[i]); 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("]"); 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds the specified character to this set if it is not already 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * present. If this set already contains the specified character, 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the call leaves this set unchanged. 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::add(UChar32 c) { 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find smallest i such that c < list[i] 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if odd, then it is IN the set 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if even, then it is OUT of the set 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = findCodePoint(pinCodePoint(c)); 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // already in set? 881c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ((i & 1) != 0 || isFrozen() || isBogus()) return *this; 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // HIGH is 0x110000 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // assert(list[len-1] == HIGH); 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // empty = [HIGH] 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // [start_0, limit_0, start_1, limit_1, HIGH] 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH] 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ^ 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // list[i] 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // i == 0 means c is before the first range 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef DEBUG_US_ADD 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("Add of "); 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dump(c); 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf(" found at %d", i); 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf(": "); 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dump(list, len); 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf(" => "); 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == list[i]-1) { 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // c is before start of next range 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list[i] = c; 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if we touched the HIGH mark, then add a new one 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == (UNICODESET_HIGH - 1)) { 909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 910c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ensureCapacity(len+1, status); 911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; // There is no way to report this error :-( 913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list[len++] = UNICODESET_HIGH; 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i > 0 && c == list[i-1]) { 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // collapse adjacent ranges 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // [..., start_k-1, c, c, limit_k, ..., HIGH] 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ^ 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // list[i] 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //for (int32_t k=i-1; k<len-2; ++k) { 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // list[k] = list[k+2]; 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32* dst = list + i - 1; 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32* src = dst + 2; 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32* srclimit = list + len; 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (src < srclimit) *(dst++) = *(src++); 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len -= 2; 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (i > 0 && c == list[i-1]) { 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // c is after end of prior range 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list[i-1]++; 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // no need to check for collapse here 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // At this point we know the new char is not adjacent to 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // any existing ranges, and it is not 10FFFF. 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH] 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ^ 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // list[i] 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH] 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ^ 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // list[i] 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ensureCapacity(len+2, status); 956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; // There is no way to report this error :-( 958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //for (int32_t k=len-1; k>=i; --k) { 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // list[k+2] = list[k]; 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32* src = list + len; 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32* dst = src + 2; 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32* srclimit = list + i; 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (src > srclimit) *(--dst) = *(--src); 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list[i] = c; 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list[i+1] = c+1; 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len += 2; 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef DEBUG_US_ADD 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dump(list, len); 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("\n"); 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=1; i<len; ++i) { 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (list[i] <= list[i-1]) { 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Corrupt array! 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("ERROR: list has been corrupted\n"); 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exit(1); 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds the specified multicharacter to this set if it is not already 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * present. If this set already contains the multicharacter, 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the call leaves this set unchanged. 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Thus "ch" => {"ch"} 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s the source string 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining 998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::add(const UnicodeString& s) { 1000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (s.length() == 0 || isFrozen() || isBogus()) return *this; 1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t cp = getSingleCP(s); 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cp < 0) { 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!strings->contains((void*) &s)) { 1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _add(s); 1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru add((UChar32)cp); 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds the given string, in order, to 'strings'. The given string 1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * must have been checked by the caller to not be empty and to not 1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * already be in 'strings'. 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::_add(const UnicodeString& s) { 1019c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString* t = new UnicodeString(s); 1023c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (t == NULL) { // Check for memory allocation error. 1024c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 1025c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 1026c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strings->sortedInsert(t, compareUnicodeString, ec); 1029c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(ec)) { 1030c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 1031c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete t; 1032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a code point IF the string consists of a single one. 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * otherwise returns -1. 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param string to test 1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::getSingleCP(const UnicodeString& s) { 1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if (s.length() < 1) { 1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet"); 1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s.length() > 2) return -1; 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s.length() == 1) return s.charAt(0); 1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // at this point, len = 2 1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 cp = s.char32At(0); 1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cp > 0xFFFF) { // is surrogate pair 1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return cp; 1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} 1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this set already any particular character, it has no effect on that character. 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string 1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining 1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::addAll(const UnicodeString& s) { 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 cp; 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < s.length(); i += UTF_CHAR_LENGTH(cp)) { 1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp = s.char32At(i); 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru add(cp); 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"} 1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this set already any particular character, it has no effect on that character. 1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining 1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::retainAll(const UnicodeString& s) { 1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set; 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(s); 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retainAll(set); 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"} 1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this set already any particular character, it has no effect on that character. 1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string 1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining 1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complementAll(const UnicodeString& s) { 1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set; 1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(s); 1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complementAll(set); 1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"} 1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this set already any particular character, it has no effect on that character. 1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string 1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining 1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::removeAll(const UnicodeString& s) { 1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set; 1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru set.addAll(s); 1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru removeAll(set); 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUnicodeSet& UnicodeSet::removeAllStrings() { 1110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strings->removeAllElements(); 1111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; 1112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Makes a set from a multicharacter string. Thus "ch" => {"ch"} 1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> 1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string 1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a newly created set containing the given string 1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet* U_EXPORT2 UnicodeSet::createFrom(const UnicodeString& s) { 1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *set = new UnicodeSet(); 1123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (set != NULL) { // Check for memory allocation error. 1124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru set->add(s); 1125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return set; 1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"} 1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string 1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a newly created set containing the given characters 1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet* U_EXPORT2 UnicodeSet::createFromAll(const UnicodeString& s) { 1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *set = new UnicodeSet(); 1137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (set != NULL) { // Check for memory allocation error. 1138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru set->addAll(s); 1139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return set; 1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Retain only the elements in this set that are contained in the 1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified range. If <code>end > start</code> then an empty range is 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * retained, leaving the set empty. 1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range to be retained 1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this set. 1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range to be retained 1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this set. 1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::retain(UChar32 start, UChar32 end) { 1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pinCodePoint(start) <= pinCodePoint(end)) { 1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; 1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retain(range, 2, 0); 1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru clear(); 1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::retain(UChar32 c) { 1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return retain(c, c); 1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes the specified range from this set if it is present. 1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set will not contain the specified range once the call 1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns. If <code>end > start</code> then an empty range is 1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * removed, leaving the set unchanged. 1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range to be removed 1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from this set. 1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range to be removed 1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from this set. 1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::remove(UChar32 start, UChar32 end) { 1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pinCodePoint(start) <= pinCodePoint(end)) { 1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; 1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retain(range, 2, 2); 1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes the specified character from this set if it is present. 1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set will not contain the specified range once the call 1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns. 1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::remove(UChar32 c) { 1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return remove(c, c); 1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes the specified string from this set if it is present. 1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set will not contain the specified character once the call 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns. 1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param the source string 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the modified set, for chaining 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::remove(const UnicodeString& s) { 1203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (s.length() == 0 || isFrozen() || isBogus()) return *this; 1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t cp = getSingleCP(s); 1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cp < 0) { 1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strings->removeElement((void*) &s); 1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru remove((UChar32)cp, (UChar32)cp); 1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Complements the specified range in this set. Any character in 1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the range will be removed if it is in this set, or will be 1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * added if it is not in this set. If <code>end > start</code> 1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then an empty range is xor'ed, leaving the set unchanged. 1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start first character, inclusive, of range to be removed 1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from this set. 1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param end last character, inclusive, of range to be removed 1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from this set. 1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complement(UChar32 start, UChar32 end) { 1226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pinCodePoint(start) <= pinCodePoint(end)) { 1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; 1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exclusiveOr(range, 2, 0); 1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complement(UChar32 c) { 1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return complement(c, c); 1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is equivalent to 1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>complement(MIN_VALUE, MAX_VALUE)</code>. 1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complement(void) { 1246c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1249c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (list[0] == UNICODESET_LOW) { 1251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ensureBufferCapacity(len-1, status); 1252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 1253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; 1254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(buffer, list + 1, (len-1)*sizeof(UChar32)); 1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --len; 1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ensureBufferCapacity(len+1, status); 1259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 1260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return *this; 1261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(buffer + 1, list, len*sizeof(UChar32)); 1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[0] = UNICODESET_LOW; 1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++len; 1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru swapBuffers(); 1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Complement the specified string in this set. 1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The set will not contain the specified string once the call 1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns. 1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> 1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s the string to complement 1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return this object, for chaining 1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complement(const UnicodeString& s) { 1280c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (s.length() == 0 || isFrozen() || isBogus()) return *this; 1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t cp = getSingleCP(s); 1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cp < 0) { 1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (strings->contains((void*) &s)) { 1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strings->removeElement((void*) &s); 1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _add(s); 1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru complement((UChar32)cp, (UChar32)cp); 1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Adds all of the elements in the specified set to this set if 1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * they're not already present. This operation effectively 1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * modifies this set so that its value is the <i>union</i> of the two 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * sets. The behavior of this operation is unspecified if the specified 1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * collection is modified while the operation is in progress. 1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set whose elements are to be added to this set. 1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #add(char, char) 1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::addAll(const UnicodeSet& c) { 1306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ( c.len>0 && c.list!=NULL ) { 1307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru add(c.list, c.len, 0); 1308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Add strings in order 1311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if ( c.strings!=NULL ) { 1312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (int32_t i=0; i<c.strings->size(); ++i) { 1313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru const UnicodeString* s = (const UnicodeString*)c.strings->elementAt(i); 1314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!strings->contains((void*) s)) { 1315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru _add(*s); 1316c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Retains only the elements in this set that are contained in the 1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified set. In other words, removes from this set all of 1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * its elements that are not contained in the specified set. This 1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * operation effectively modifies this set so that its value is 1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the <i>intersection</i> of the two sets. 1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set that defines which elements this set will retain. 1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) { 1332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retain(c.list, c.len, 0); 1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strings->retainAll(*c.strings); 1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes from this set all of its elements that are contained in the 1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified set. This operation effectively modifies this 1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * set so that its value is the <i>asymmetric set difference</i> of 1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the two sets. 1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set that defines which elements will be removed from 1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * this set. 1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::removeAll(const UnicodeSet& c) { 1350c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru retain(c.list, c.len, 2); 1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strings->removeAll(*c.strings); 1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Complements in this set all elements contained in the specified 1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * set. Any character in the other set will be removed if it is 1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * in this set, or will be added if it is not in this set. 1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param c set that defines which elements will be xor'ed from 1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * this set. 1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) { 1367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exclusiveOr(c.list, c.len, 0); 1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<c.strings->size(); ++i) { 1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void* e = c.strings->elementAt(i); 1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!strings->removeElement(e)) { 1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _add(*(const UnicodeString*)e); 1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Removes all of the elements from this set. This set will be 1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * empty after this call returns. 1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::clear(void) { 1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (isFrozen()) { 1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (list != NULL) { 1390c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru list[0] = UNICODESET_HIGH; 1391c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = 1; 1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1394c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (strings != NULL) { 1395c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru strings->removeAllElements(); 1396c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1397c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (list != NULL && strings != NULL) { 1398c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Remove bogus 1399c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFlags = 0; 1400c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Iteration method that returns the number of ranges contained in 1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * this set. 1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeStart 1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeEnd 1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::getRangeCount() const { 1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return len/2; 1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Iteration method that returns the first character in the 1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified range of this set. 1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeCount 1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeEnd 1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 UnicodeSet::getRangeStart(int32_t index) const { 1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return list[index*2]; 1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Iteration method that returns the last character in the 1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * specified range of this set. 1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeStart 1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getRangeEnd 1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 UnicodeSet::getRangeEnd(int32_t index) const { 1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return list[index*2 + 1] - 1; 1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::getStringCount() const { 1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return strings->size(); 1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UnicodeString* UnicodeSet::getString(int32_t index) const { 1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (const UnicodeString*) strings->elementAt(index); 1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Reallocate this objects internal structures to take up the least 1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * possible space, without changing this object's value. 1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& UnicodeSet::compact() { 1447c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Delete buffer first to defragment memory less. 1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (buffer != NULL) { 1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(buffer); 1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer = NULL; 1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (len < capacity) { 1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Make the capacity equal to len or 1. 1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We don't want to realloc of 0 size. 1458c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t newCapacity = len + (len == 0); 1459c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * newCapacity); 1460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (temp) { 1461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru list = temp; 1462c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru capacity = newCapacity; 1463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // else what the heck happened?! We allocated less memory! 1465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Oh well. We'll keep our original array. 1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const { 1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t bmpLength, length, destLength; 1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (destCapacity<0 || (destCapacity>0 && dest==NULL)) { 1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec=U_ILLEGAL_ARGUMENT_ERROR; 1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* count necessary 16-bit units */ 1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=this->len-1; // Subtract 1 to ignore final UNICODESET_HIGH 1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // assert(length>=0); 1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (length==0) { 1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* empty set */ 1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (destCapacity>0) { 1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest=0; 1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec=U_BUFFER_OVERFLOW_ERROR; 1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 1; 1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* now length>0 */ 1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (this->list[length-1]<=0xffff) { 1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* all BMP */ 1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bmpLength=length; 1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (this->list[0]>=0x10000) { 1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* all supplementary */ 1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bmpLength=0; 1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length*=2; 1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* some BMP, some supplementary */ 1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (bmpLength=0; bmpLength<length && this->list[bmpLength]<=0xffff; ++bmpLength) {} 1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=bmpLength+2*(length-bmpLength); 1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* length: number of 16-bit array units */ 1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (length>0x7fff) { 1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* there are only 15 bits for the length in the first serialized word */ 1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec=U_INDEX_OUTOFBOUNDS_ERROR; 1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * total serialized length: 1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * number of 16-bit array units (length) + 1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1 length unit (always) + 1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1 bmpLength unit (if there are supplementary values) 1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=length+((length>bmpLength)?2:1); 1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (destLength<=destCapacity) { 1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar32 *p; 1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest=(uint16_t)length; 1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (length>bmpLength) { 1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest|=0x8000; 1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *++dest=(uint16_t)bmpLength; 1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++dest; 1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* write the BMP part of the array */ 1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p=this->list; 1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<bmpLength; ++i) { 1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest++=(uint16_t)*p++; 1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* write the supplementary part of the array */ 1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (; i<length; i+=2) { 1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest++=(uint16_t)(*p>>16); 1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *dest++=(uint16_t)*p++; 1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec=U_BUFFER_OVERFLOW_ERROR; 1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destLength; 1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Implementation: Utility methods 1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Allocate our strings vector and return TRUE if successful. 1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool UnicodeSet::allocateStrings(UErrorCode &status) { 1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strings = new UVector(uhash_deleteUnicodeString, 1563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uhash_compareUnicodeString, 1, status); 1564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (strings == NULL) { // Check for memory allocation error. 1565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 1566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 1567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete strings; 1570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strings = NULL; 1571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 1572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid UnicodeSet::ensureCapacity(int32_t newLen, UErrorCode& ec) { 1577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (newLen <= capacity) 1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * (newLen + GROW_EXTRA)); 1580c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (temp == NULL) { 1581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ec = U_MEMORY_ALLOCATION_ERROR; 1582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 1583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 1584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list = temp; 1586c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru capacity = newLen + GROW_EXTRA; 1587c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // else we keep the original contents on the memory failure. 1588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid UnicodeSet::ensureBufferCapacity(int32_t newLen, UErrorCode& ec) { 1591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (buffer != NULL && newLen <= bufferCapacity) 1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar32* temp = (UChar32*) uprv_realloc(buffer, sizeof(UChar32) * (newLen + GROW_EXTRA)); 1594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (temp == NULL) { 1595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ec = U_MEMORY_ALLOCATION_ERROR; 1596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 1597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 1598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru buffer = temp; 1600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufferCapacity = newLen + GROW_EXTRA; 1601c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // else we keep the original contents on the memory failure. 1602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Swap list and buffer. 1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::swapBuffers(void) { 1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // swap list and buffer 1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32* temp = list; 1610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list = buffer; 1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer = temp; 1612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t c = capacity; 1614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru capacity = bufferCapacity; 1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufferCapacity = c; 1616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1618c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid UnicodeSet::setToBogus() { 1619c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru clear(); // Remove everything in the set. 1620c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fFlags = kIsBogus; 1621c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 1622c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 1624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Implementation: Fundamental operators 1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 1626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UChar32 max(UChar32 a, UChar32 b) { 1628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (a > b) ? a : b; 1629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 0, 3 is normal: x xor y 1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 1, 2: x xor ~y == x === y 1633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity) { 1635c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1638c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1639c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ensureBufferCapacity(len + otherLen, status); 1640c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 1641c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 1642c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1643c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = 0, j = 0, k = 0; 1645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 a = list[i++]; 1646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 b; 1647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (polarity == 1 || polarity == 2) { 1648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = UNICODESET_LOW; 1649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (other[j] == UNICODESET_LOW) { // skip base if already LOW 1650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++j; 1651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j]; 1652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // simplest of all the routines 1657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // sort the values, discarding identicals! 1658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 1659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a < b) { 1660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; 1661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (b < a) { 1663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = b; 1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (a != UNICODESET_HIGH) { // at this point, a == b 1666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // discard both values! 1667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // DONE! 1670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = UNICODESET_HIGH; 1671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = k; 1672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru swapBuffers(); 1676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 0 is normal: x union y 1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 2: x union ~y 1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 1: ~x union y 1682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 3: ~x union ~y 1683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) { 1685c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus() || other==NULL) { 1686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1688c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1689c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ensureBufferCapacity(len + otherLen, status); 1690c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 1691c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 1692c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1693c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = 0, j = 0, k = 0; 1695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 a = list[i++]; 1696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 b = other[j++]; 1697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // change from xor is that we have to check overlapping pairs 1698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // polarity bit 1 means a is second, bit 2 means b is. 1699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 1700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (polarity) { 1701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0: // both first; take lower if unequal 1702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a < b) { // take a 1703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Back up over overlapping ranges in buffer[] 1704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (k > 0 && a <= buffer[k-1]) { 1705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Pick latter end value in buffer[] vs. list[] 1706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = max(list[i], buffer[--k]); 1707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // No overlap 1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; 1710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i]; 1711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i++; // Common if/else code factored out 1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (b < a) { // take b 1715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (k > 0 && b <= buffer[k-1]) { 1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = max(other[j], buffer[--k]); 1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = b; 1719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j]; 1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru j++; 1722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // a == b, take a, drop b 1724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a == UNICODESET_HIGH) goto loop_end; 1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This is symmetrical; it doesn't matter if 1726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we backtrack with a or b. - liu 1727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (k > 0 && a <= buffer[k-1]) { 1728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = max(list[i], buffer[--k]); 1729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // No overlap 1731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; 1732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i]; 1733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i++; 1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: // both second; take higher if unequal, and drop other 1741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (b <= a) { // take a 1742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a == UNICODESET_HIGH) goto loop_end; 1743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; 1744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // take b 1745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (b == UNICODESET_HIGH) goto loop_end; 1746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = b; 1747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; // factored common code 1750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: // a second, b first; if b < a, overlap 1754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a < b) { // no overlap, take a 1755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; a = list[i++]; polarity ^= 1; 1756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (b < a) { // OVERLAP, drop b 1757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // a == b, drop both! 1760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a == UNICODESET_HIGH) goto loop_end; 1761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: // a first, b second; if a < b, overlap 1768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (b < a) { // no overlap, take b 1769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = b; 1770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (a < b) { // OVERLAP, drop a 1773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // a == b, drop both! 1776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a == UNICODESET_HIGH) goto loop_end; 1777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru loop_end: 1786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = UNICODESET_HIGH; // terminate 1787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = k; 1788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru swapBuffers(); 1789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 0 is normal: x intersect y 1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 2: x intersect ~y == set-minus 1794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 1: ~x intersect y 1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// polarity = 3: ~x intersect ~y 1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity) { 1798c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (isFrozen() || isBogus()) { 1799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1801c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1802c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ensureBufferCapacity(len + otherLen, status); 1803c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(status)) { 1804c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 1805c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1806c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = 0, j = 0, k = 0; 1808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 a = list[i++]; 1809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 b = other[j++]; 1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // change from xor is that we have to check overlapping pairs 1811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // polarity bit 1 means a is second, bit 2 means b is. 1812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 1813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (polarity) { 1814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0: // both first; drop the smaller 1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a < b) { // drop a 1816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (b < a) { // drop b 1819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // a == b, take one, drop other 1822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a == UNICODESET_HIGH) goto loop_end; 1823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; 1824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: // both second; take lower if unequal 1831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a < b) { // take a 1832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; 1833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (b < a) { // take b 1836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = b; 1837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // a == b, take one, drop other 1840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a == UNICODESET_HIGH) goto loop_end; 1841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; 1842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: // a second, b first; 1849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a < b) { // NO OVERLAP, drop a 1850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (b < a) { // OVERLAP, take b 1853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = b; 1854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // a == b, drop both! 1857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a == UNICODESET_HIGH) goto loop_end; 1858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: // a first, b second; if a < b, overlap 1865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (b < a) { // no overlap, drop b 1866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (a < b) { // OVERLAP, take a 1869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = a; 1870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // a == b, drop both! 1873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (a == UNICODESET_HIGH) goto loop_end; 1874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = list[i++]; 1875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 1; 1876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b = other[j++]; 1877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru polarity ^= 2; 1878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru loop_end: 1883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer[k++] = UNICODESET_HIGH; // terminate 1884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = k; 1885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru swapBuffers(); 1886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 1887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Append the <code>toPattern()</code> representation of a 1891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * string to the given <code>StringBuffer</code>. 1892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::_appendToPat(UnicodeString& buf, const UnicodeString& s, UBool 1894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruescapeUnprintable) { 1895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 cp; 1896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < s.length(); i += UTF_CHAR_LENGTH(cp)) { 1897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _appendToPat(buf, cp = s.char32At(i), escapeUnprintable); 1898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Append the <code>toPattern()</code> representation of a 1903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * character to the given <code>StringBuffer</code>. 1904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool 1906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruescapeUnprintable) { 1907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (escapeUnprintable && ICU_Utility::isUnprintable(c)) { 1908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything 1909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // unprintable 1910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ICU_Utility::escapeUnprintable(buf, c)) { 1911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Okay to let ':' pass through 1915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (c) { 1916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case SET_OPEN: 1917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case SET_CLOSE: 1918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case HYPHEN: 1919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case COMPLEMENT: 1920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case INTERSECTION: 1921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case BACKSLASH: 1922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case OPEN_BRACE: 1923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case CLOSE_BRACE: 1924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case COLON: 1925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case SymbolTable::SYMBOL_REF: 1926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf.append(BACKSLASH); 1927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 1929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Escape whitespace 1930b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if (PatternProps::isWhiteSpace(c)) { 1931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf.append(BACKSLASH); 1932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf.append(c); 1936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Append a string representation of this set to result. This will be 1940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a cleaned version of the string passed to applyPattern(), if there 1941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is one. Otherwise it will be generated. 1942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& UnicodeSet::_toPattern(UnicodeString& result, 1944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool escapeUnprintable) const 1945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pat != NULL) { 1947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 1948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t backslashCount = 0; 1949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<patLen; ) { 1950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 1951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(pat, i, patLen, c); 1952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (escapeUnprintable && ICU_Utility::isUnprintable(c)) { 1953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If the unprintable character is preceded by an odd 1954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // number of backslashes, then it has been escaped. 1955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Before unescaping it, we delete the final 1956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // backslash. 1957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((backslashCount % 2) == 1) { 1958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.truncate(result.length() - 1); 1959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ICU_Utility::escapeUnprintable(result, c); 1961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backslashCount = 0; 1962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.append(c); 1964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == BACKSLASH) { 1965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++backslashCount; 1966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backslashCount = 0; 1968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return _generatePattern(result, escapeUnprintable); 1975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a string representation of this set. If the result of 1979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * calling this function is passed to a UnicodeSet constructor, it 1980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * will produce another set that is equal to this one. 1981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& UnicodeSet::toPattern(UnicodeString& result, 1983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool escapeUnprintable) const 1984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.truncate(0); 1986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return _toPattern(result, escapeUnprintable); 1987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Generate and append a string representation of this set to result. 1991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This does not use this.pat, the cleaned up copy of the string 1992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * passed to applyPattern(). 1993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& UnicodeSet::_generatePattern(UnicodeString& result, 1995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool escapeUnprintable) const 1996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.append(SET_OPEN); 1998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// // Check against the predefined categories. We implicitly build 2000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// // up ALL category sets the first time toPattern() is called. 2001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) { 2002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// if (*this == getCategorySet(cat)) { 2003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// result.append(COLON); 2004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// result.append(CATEGORY_NAMES, cat*2, 2); 2005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// return result.append(CATEGORY_CLOSE); 2006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// } 2007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// } 2008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count = getRangeCount(); 2010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If the set contains at least 2 intervals and includes both 2012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // MIN_VALUE and MAX_VALUE, then the inverse representation will 2013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // be more economical. 2014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (count > 1 && 2015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru getRangeStart(0) == MIN_VALUE && 2016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru getRangeEnd(count-1) == MAX_VALUE) { 2017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Emit the inverse 2019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.append(COMPLEMENT); 2020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 1; i < count; ++i) { 2022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 start = getRangeEnd(i-1)+1; 2023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 end = getRangeStart(i)-1; 2024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _appendToPat(result, start, escapeUnprintable); 2025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (start != end) { 2026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((start+1) != end) { 2027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.append(HYPHEN); 2028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _appendToPat(result, end, escapeUnprintable); 2030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Default; emit the ranges as pairs 2035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 2036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < count; ++i) { 2037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 start = getRangeStart(i); 2038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 end = getRangeEnd(i); 2039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _appendToPat(result, start, escapeUnprintable); 2040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (start != end) { 2041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((start+1) != end) { 2042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.append(HYPHEN); 2043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _appendToPat(result, end, escapeUnprintable); 2045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i<strings->size(); ++i) { 2050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.append(OPEN_BRACE); 2051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _appendToPat(result, 2052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(const UnicodeString*) strings->elementAt(i), 2053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru escapeUnprintable); 2054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.append(CLOSE_BRACE); 2055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result.append(SET_CLOSE); 2057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Release existing cached pattern 2061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::releasePattern() { 2063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pat) { 2064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(pat); 2065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat = NULL; 2066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru patLen = 0; 2067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 2071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Set the new pattern to cache. 2072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 2073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeSet::setPattern(const UnicodeString& newPat) { 2074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru releasePattern(); 2075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t newPatLen = newPat.length(); 2076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat = (UChar *)uprv_malloc((newPatLen + 1) * sizeof(UChar)); 2077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pat) { 2078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru patLen = newPatLen; 2079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newPat.extractBetween(0, patLen, pat); 2080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pat[patLen] = 0; 2081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // else we don't care if malloc failed. This was just a nice cache. 2083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We can regenerate an equivalent pattern later when requested. 2084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeFunctor *UnicodeSet::freeze() { 2087c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!isFrozen() && !isBogus()) { 2088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Do most of what compact() does before freezing because 2089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // compact() will not work when the set is frozen. 2090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Small modification: Don't shrink if the savings would be tiny (<=GROW_EXTRA). 2091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Delete buffer first to defragment memory less. 2093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (buffer != NULL) { 2094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(buffer); 2095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer = NULL; 2096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (capacity > (len + GROW_EXTRA)) { 2098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Make the capacity equal to len or 1. 2099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We don't want to realloc of 0 size. 2100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru capacity = len + (len == 0); 2101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list = (UChar32*) uprv_realloc(list, sizeof(UChar32) * capacity); 2102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (list == NULL) { // Check for memory allocation error. 2103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 2104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return this; 2105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Optimize contains() and span() and similar functions. 2109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!strings->isEmpty()) { 2110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stringSpan = new UnicodeSetStringSpan(*this, *strings, UnicodeSetStringSpan::ALL); 2111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (stringSpan != NULL && !stringSpan->needsStringSpanUTF16()) { 2112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // All strings are irrelevant for span() etc. because 2113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // all of each string's code points are contained in this set. 2114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Do not check needsStringSpanUTF8() because UTF-8 has at most as 2115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // many relevant strings as UTF-16. 2116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (Thus needsStringSpanUTF8() implies needsStringSpanUTF16().) 2117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete stringSpan; 2118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stringSpan = NULL; 2119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (stringSpan == NULL) { 2122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // No span-relevant strings: Optimize for code point spans. 2123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bmpSet=new BMPSet(list, len); 2124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (bmpSet == NULL) { // Check for memory allocation error. 2125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setToBogus(); 2126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return this; 2130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const { 2133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>0 && bmpSet!=NULL) { 2134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (int32_t)(bmpSet->span(s, s+length, spanCondition)-s); 2135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 2137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=u_strlen(s); 2138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==0) { 2140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(stringSpan!=NULL) { 2143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return stringSpan->span(s, length, spanCondition); 2144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(!strings->isEmpty()) { 2145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? 2146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan::FWD_UTF16_NOT_CONTAINED : 2147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan::FWD_UTF16_CONTAINED; 2148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan strSpan(*this, *strings, which); 2149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(strSpan.needsStringSpanUTF16()) { 2150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return strSpan.span(s, length, spanCondition); 2151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 2155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. 2156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start=0, prev=0; 2160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(s, start, length, c); 2162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=contains(c)) { 2163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=start)<length); 2166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const { 2170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>0 && bmpSet!=NULL) { 2171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (int32_t)(bmpSet->spanBack(s, s+length, spanCondition)-s); 2172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 2174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=u_strlen(s); 2175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==0) { 2177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(stringSpan!=NULL) { 2180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return stringSpan->spanBack(s, length, spanCondition); 2181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(!strings->isEmpty()) { 2182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? 2183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan::BACK_UTF16_NOT_CONTAINED : 2184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan::BACK_UTF16_CONTAINED; 2185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan strSpan(*this, *strings, which); 2186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(strSpan.needsStringSpanUTF16()) { 2187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return strSpan.spanBack(s, length, spanCondition); 2188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 2192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. 2193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=length; 2197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_PREV(s, 0, length, c); 2199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=contains(c)) { 2200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=length)>0); 2203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const { 2207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>0 && bmpSet!=NULL) { 2208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *s0=(const uint8_t *)s; 2209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (int32_t)(bmpSet->spanUTF8(s0, length, spanCondition)-s0); 2210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 221250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho length=(int32_t)uprv_strlen(s); 2213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==0) { 2215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(stringSpan!=NULL) { 2218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return stringSpan->spanUTF8((const uint8_t *)s, length, spanCondition); 2219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(!strings->isEmpty()) { 2220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? 2221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan::FWD_UTF8_NOT_CONTAINED : 2222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan::FWD_UTF8_CONTAINED; 2223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan strSpan(*this, *strings, which); 2224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(strSpan.needsStringSpanUTF8()) { 2225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return strSpan.spanUTF8((const uint8_t *)s, length, spanCondition); 2226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 2230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. 2231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start=0, prev=0; 2235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U8_NEXT(s, start, length, c); 2237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<0) { 2238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=0xfffd; 2239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=contains(c)) { 2241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=start)<length); 2244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t UnicodeSet::spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const { 2248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>0 && bmpSet!=NULL) { 2249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *s0=(const uint8_t *)s; 2250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return bmpSet->spanBackUTF8(s0, length, spanCondition); 2251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 225350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho length=(int32_t)uprv_strlen(s); 2254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==0) { 2256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 2257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(stringSpan!=NULL) { 2259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return stringSpan->spanBackUTF8((const uint8_t *)s, length, spanCondition); 2260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(!strings->isEmpty()) { 2261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? 2262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan::BACK_UTF8_NOT_CONTAINED : 2263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan::BACK_UTF8_CONTAINED; 2264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetStringSpan strSpan(*this, *strings, which); 2265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(strSpan.needsStringSpanUTF8()) { 2266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return strSpan.spanBackUTF8((const uint8_t *)s, length, spanCondition); 2267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=USET_SPAN_NOT_CONTAINED) { 2271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. 2272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 2275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t prev=length; 2276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 2277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U8_PREV(s, 0, length, c); 2278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<0) { 2279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=0xfffd; 2280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(spanCondition!=contains(c)) { 2282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 2283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while((prev=length)>0); 2285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return prev; 2286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 2289