16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (C) 2011, International Business Machines 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Corporation and others. All Rights Reserved. 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* file name: uniset_closure.cpp 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* encoding: US-ASCII 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* tab size: 8 (not used) 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* indentation:4 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created on: 2011may30 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created by: Markus W. Scherer 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* UnicodeSet::closeOver() and related methods moved here from uniset_props.cpp 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* to simplify dependencies. 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* In particular, this depends on the BreakIterator, but the BreakIterator 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* code also builds UnicodeSets from patterns and needs uniset_props. 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/brkiter.h" 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/locid.h" 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/parsepos.h" 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uniset.h" 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h" 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ruleiter.h" 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucase.h" 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "util.h" 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uvector.h" 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// initial storage. Must be >= 0 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// *** same as in uniset.cpp ! *** 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define START_EXTRA 16 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// TODO memory debugging provided inside uniset.cpp 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// could be made available here but probably obsolete with use of modern 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// memory leak checker tools 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define _dbgct(me) 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Constructors &c 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet::UnicodeSet(const UnicodeString& pattern, 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t options, 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const SymbolTable* symbols, 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode& status) : 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0), 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFlags(0) 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(status)){ 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* test for NULL */ 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(list == NULL) { 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org allocateStrings(status); 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org applyPattern(pattern, options, symbols, status); 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _dbgct(this); 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos, 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t options, 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const SymbolTable* symbols, 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode& status) : 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org len(0), capacity(START_EXTRA), list(0), bmpSet(0), buffer(0), 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bufferCapacity(0), patLen(0), pat(NULL), strings(NULL), stringSpan(NULL), 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fFlags(0) 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(status)){ 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* test for NULL */ 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(list == NULL) { 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org allocateStrings(status); 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org applyPattern(pattern, pos, options, symbols, status); 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _dbgct(this); 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Public API 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t options, 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const SymbolTable* symbols, 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode& status) { 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ParsePosition pos(0); 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org applyPattern(pattern, pos, options, symbols, status); 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) return *this; 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i = pos.getIndex(); 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (options & USET_IGNORE_SPACE) { 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Skip over trailing whitespace 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ICU_Utility::skipWhitespace(pattern, i, TRUE); 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (i != pattern.length()) { 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_ILLEGAL_ARGUMENT_ERROR; 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ParsePosition& pos, 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t options, 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const SymbolTable* symbols, 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode& status) { 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (isFrozen()) { 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_NO_WRITE_PERMISSION; 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Need to build the pattern in a temporary string because 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // _applyPattern calls add() etc., which set pat to empty. 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString rebuiltPat; 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org RuleCharacterIterator chars(pattern, symbols, pos); 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, status); 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) return *this; 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (chars.inVariable()) { 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // syntaxError(chars, "Extra chars in variable value"); 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MALFORMED_SET; 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org setPattern(rebuiltPat); 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// USetAdder implementation 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Does not use uset.h to reduce code dependencies 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void U_CALLCONV 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org_set_add(USet *set, UChar32 c) { 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ((UnicodeSet *)set)->add(c); 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void U_CALLCONV 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org_set_addRange(USet *set, UChar32 start, UChar32 end) { 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ((UnicodeSet *)set)->add(start, end); 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void U_CALLCONV 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org_set_addString(USet *set, const UChar *str, int32_t length) { 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length)); 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Case folding API 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//---------------------------------------------------------------- 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// add the result of a full case mapping to the set 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// use str as a temporary string to avoid constructing one 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic inline void 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgaddCaseMapping(UnicodeSet &set, int32_t result, const UChar *full, UnicodeString &str) { 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(result >= 0) { 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(result > UCASE_MAX_STRING_LENGTH) { 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // add a single-code point case mapping 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org set.add(result); 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // add a string case mapping from full with length result 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org str.setTo((UBool)FALSE, full, result); 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org set.add(str); 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // result < 0: the code point mapped to itself, no need to add it 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // see ucase.h 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet& UnicodeSet::closeOver(int32_t attribute) { 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (isFrozen() || isBogus()) { 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) { 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UCaseProps *csp = ucase_getSingleton(); 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeSet foldSet(*this); 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString str; 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org USetAdder sa = { 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org foldSet.toUSet(), 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _set_add, 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _set_addRange, 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _set_addString, 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, // don't need remove() 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL // don't need removeRange() 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }; 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // start with input set to guarantee inclusion 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // USET_CASE: remove strings because the strings will actually be reduced (folded); 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // therefore, start with no strings and add only those needed 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (attribute & USET_CASE_INSENSITIVE) { 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org foldSet.strings->removeAllElements(); 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t n = getRangeCount(); 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 result; 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *full; 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t locCache = 0; 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (int32_t i=0; i<n; ++i) { 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 start = getRangeStart(i); 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 end = getRangeEnd(i); 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (attribute & USET_CASE_INSENSITIVE) { 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // full case closure 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (UChar32 cp=start; cp<=end; ++cp) { 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucase_addCaseClosure(csp, cp, &sa); 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // add case mappings 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (does not add long s for regular s, or Kelvin for k, for example) 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (UChar32 cp=start; cp<=end; ++cp) { 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = ucase_toFullLower(csp, cp, NULL, NULL, &full, "", &locCache); 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org addCaseMapping(foldSet, result, full, str); 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = ucase_toFullTitle(csp, cp, NULL, NULL, &full, "", &locCache); 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org addCaseMapping(foldSet, result, full, str); 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = ucase_toFullUpper(csp, cp, NULL, NULL, &full, "", &locCache); 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org addCaseMapping(foldSet, result, full, str); 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result = ucase_toFullFolding(csp, cp, &full, 0); 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org addCaseMapping(foldSet, result, full, str); 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (strings != NULL && strings->size() > 0) { 2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (attribute & USET_CASE_INSENSITIVE) { 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (int32_t j=0; j<strings->size(); ++j) { 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org str = *(const UnicodeString *) strings->elementAt(j); 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org str.foldCase(); 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!ucase_addStringCaseClosure(csp, str.getBuffer(), str.length(), &sa)) { 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org foldSet.add(str); // does not map to code points: add the folded string itself 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Locale root(""); 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_BREAK_ITERATION 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode status = U_ZERO_ERROR; 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org BreakIterator *bi = BreakIterator::createWordInstance(root, status); 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_SUCCESS(status)) { 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString *pStr; 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (int32_t j=0; j<strings->size(); ++j) { 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pStr = (const UnicodeString *) strings->elementAt(j); 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (str = *pStr).toLower(root); 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org foldSet.add(str); 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_BREAK_ITERATION 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (str = *pStr).toTitle(bi, root); 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org foldSet.add(str); 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (str = *pStr).toUpper(root); 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org foldSet.add(str); 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (str = *pStr).foldCase(); 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org foldSet.add(str); 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_BREAK_ITERATION 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete bi; 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *this = foldSet; 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END 281