183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius/* 283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius******************************************************************************* 383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* 483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* Copyright (C) 2011, International Business Machines 583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* Corporation and others. All Rights Reserved. 683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* 783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius******************************************************************************* 883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* file name: uniset_closure.cpp 983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* encoding: US-ASCII 1083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* tab size: 8 (not used) 1183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* indentation:4 1283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* 1383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* created on: 2011may30 1483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* created by: Markus W. Scherer 1583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* 1683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* UnicodeSet::closeOver() and related methods moved here from uniset_props.cpp 1783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* to simplify dependencies. 1883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* In particular, this depends on the BreakIterator, but the BreakIterator 1983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* code also builds UnicodeSets from patterns and needs uniset_props. 2083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*/ 2183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 2283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/brkiter.h" 2383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/locid.h" 2483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/parsepos.h" 2583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/uniset.h" 2683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "cmemory.h" 2783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "ruleiter.h" 2883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "ucase.h" 2983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "util.h" 3083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "uvector.h" 3183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 3283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// initial storage. Must be >= 0 3383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// *** same as in uniset.cpp ! *** 3483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#define START_EXTRA 16 3583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 3683a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusU_NAMESPACE_BEGIN 3783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 3883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// TODO memory debugging provided inside uniset.cpp 3983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// could be made available here but probably obsolete with use of modern 4083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// memory leak checker tools 4183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#define _dbgct(me) 4283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 4383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius//---------------------------------------------------------------- 4483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// Constructors &c 4583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius//---------------------------------------------------------------- 4683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 4783a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUnicodeSet::UnicodeSet(const UnicodeString& pattern, 4883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius uint32_t options, 4983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const SymbolTable* symbols, 5083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UErrorCode& status) : 5183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0), 5283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), 5383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fFlags(0) 5483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius{ 5583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U_SUCCESS(status)){ 5683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); 5783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /* test for NULL */ 5883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(list == NULL) { 5983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 6083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius }else{ 6183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius allocateStrings(status); 6283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius applyPattern(pattern, options, symbols, status); 6383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 6483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 6583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius _dbgct(this); 6683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 6783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 6883a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos, 6983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius uint32_t options, 7083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const SymbolTable* symbols, 7183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UErrorCode& status) : 7283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0), 7383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), 7483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fFlags(0) 7583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius{ 7683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U_SUCCESS(status)){ 7783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); 7883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius /* test for NULL */ 7983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(list == NULL) { 8083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius status = U_MEMORY_ALLOCATION_ERROR; 8183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius }else{ 8283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius allocateStrings(status); 8383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius applyPattern(pattern, pos, options, symbols, status); 8483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 8583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 8683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius _dbgct(this); 8783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 8883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 8983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius//---------------------------------------------------------------- 9083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// Public API 9183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius//---------------------------------------------------------------- 9283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 9383a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, 9483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius uint32_t options, 9583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const SymbolTable* symbols, 9683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UErrorCode& status) { 9783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ParsePosition pos(0); 9883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius applyPattern(pattern, pos, options, symbols, status); 9983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (U_FAILURE(status)) return *this; 10083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 10183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius int32_t i = pos.getIndex(); 10283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 10383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (options & USET_IGNORE_SPACE) { 10483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // Skip over trailing whitespace 10583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ICU_Utility::skipWhitespace(pattern, i, TRUE); 10683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 10783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 10883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (i != pattern.length()) { 10983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius status = U_ILLEGAL_ARGUMENT_ERROR; 11083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 11183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return *this; 11283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 11383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 11483a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, 11583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ParsePosition& pos, 11683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius uint32_t options, 11783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const SymbolTable* symbols, 11883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UErrorCode& status) { 11983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (U_FAILURE(status)) { 12083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return *this; 12183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 12283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (isFrozen()) { 12383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius status = U_NO_WRITE_PERMISSION; 12483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return *this; 12583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 12683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // Need to build the pattern in a temporary string because 12783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // _applyPattern calls add() etc., which set pat to empty. 12883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UnicodeString rebuiltPat; 12983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius RuleCharacterIterator chars(pattern, symbols, pos); 13083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status); 13183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (U_FAILURE(status)) return *this; 13283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (chars.inVariable()) { 13383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // syntaxError(chars, "Extra chars in variable value"); 13483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius status = U_MALFORMED_SET; 13583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return *this; 13683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 13783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius setPattern(rebuiltPat); 13883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return *this; 13983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 14083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 14183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// USetAdder implementation 14283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// Does not use uset.h to reduce code dependencies 14383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic void U_CALLCONV 14483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius_set_add(USet *set, UChar32 c) { 14583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ((UnicodeSet *)set)->add(c); 14683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 14783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 14883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic void U_CALLCONV 14983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius_set_addRange(USet *set, UChar32 start, UChar32 end) { 15083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ((UnicodeSet *)set)->add(start, end); 15183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 15283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 15383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic void U_CALLCONV 15483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius_set_addString(USet *set, const UChar *str, int32_t length) { 15583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length)); 15683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 15783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 15883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius//---------------------------------------------------------------- 15983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// Case folding API 16083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius//---------------------------------------------------------------- 16183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 16283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// add the result of a full case mapping to the set 16383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius// use str as a temporary string to avoid constructing one 16483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic inline void 16583a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusaddCaseMapping(UnicodeSet &set, int32_t result, const UChar *full, UnicodeString &str) { 16683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(result >= 0) { 16783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(result > UCASE_MAX_STRING_LENGTH) { 16883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // add a single-code point case mapping 16983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius set.add(result); 17083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } else { 17183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // add a string case mapping from full with length result 17283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius str.setTo((UBool)FALSE, full, result); 17383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius set.add(str); 17483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 17583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 17683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // result < 0: the code point mapped to itself, no need to add it 17783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // see ucase.h 17883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 17983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 18083a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUnicodeSet& UnicodeSet::closeOver(int32_t attribute) { 18183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (isFrozen() || isBogus()) { 18283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return *this; 18383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 18483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) { 18583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const UCaseProps *csp = ucase_getSingleton(); 18683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius { 18783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UnicodeSet foldSet(*this); 18883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UnicodeString str; 18983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius USetAdder sa = { 19083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius foldSet.toUSet(), 19183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius _set_add, 19283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius _set_addRange, 19383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius _set_addString, 19483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius NULL, // don't need remove() 19583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius NULL // don't need removeRange() 19683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius }; 19783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 19883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // start with input set to guarantee inclusion 19983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // USET_CASE: remove strings because the strings will actually be reduced (folded); 20083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // therefore, start with no strings and add only those needed 20183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (attribute & USET_CASE_INSENSITIVE) { 20283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius foldSet.strings->removeAllElements(); 20383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 20483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 20583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius int32_t n = getRangeCount(); 20683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar32 result; 20783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const UChar *full; 20883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius int32_t locCache = 0; 20983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 21083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius for (int32_t i=0; i<n; ++i) { 21183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar32 start = getRangeStart(i); 21283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar32 end = getRangeEnd(i); 21383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 21483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (attribute & USET_CASE_INSENSITIVE) { 21583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // full case closure 21683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius for (UChar32 cp=start; cp<=end; ++cp) { 21783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ucase_addCaseClosure(csp, cp, &sa); 21883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 21983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } else { 22083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // add case mappings 22183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // (does not add long s for regular s, or Kelvin for k, for example) 22283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius for (UChar32 cp=start; cp<=end; ++cp) { 22383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache); 22483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius addCaseMapping(foldSet, result, full, str); 22583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 22683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache); 22783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius addCaseMapping(foldSet, result, full, str); 22883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 22983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache); 23083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius addCaseMapping(foldSet, result, full, str); 23183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 23283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius result = ucase_toFullFolding(csp, cp, &full, 0); 23383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius addCaseMapping(foldSet, result, full, str); 23483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 23583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 23683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 23783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (strings != NULL && strings->size() > 0) { 23883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (attribute & USET_CASE_INSENSITIVE) { 23983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius for (int32_t j=0; j<strings->size(); ++j) { 24083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius str = *(const UnicodeString *) strings->elementAt(j); 24183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius str.foldCase(); 24283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) { 24383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius foldSet.add(str); // does not map to code points: add the folded string itself 24483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 24583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 24683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } else { 24783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius Locale root(""); 24883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#if !UCONFIG_NO_BREAK_ITERATION 24983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UErrorCode status = U_ZERO_ERROR; 25083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius BreakIterator *bi = BreakIterator::createWordInstance(root, status); 25183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (U_SUCCESS(status)) { 25283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#endif 25383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const UnicodeString *pStr; 25483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 25583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius for (int32_t j=0; j<strings->size(); ++j) { 25683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius pStr = (const UnicodeString *) strings->elementAt(j); 25783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius (str = *pStr).toLower(root); 25883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius foldSet.add(str); 25983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#if !UCONFIG_NO_BREAK_ITERATION 26083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius (str = *pStr).toTitle(bi, root); 26183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius foldSet.add(str); 26283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#endif 26383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius (str = *pStr).toUpper(root); 26483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius foldSet.add(str); 26583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius (str = *pStr).foldCase(); 26683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius foldSet.add(str); 26783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 26883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#if !UCONFIG_NO_BREAK_ITERATION 26983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 27083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius delete bi; 27183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#endif 27283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 27383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 27483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius *this = foldSet; 27583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 27683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 27783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return *this; 27883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 27983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 28083a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusU_NAMESPACE_END 281