16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (C) 1999-2011, International Business Machines 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Corporation and others. All Rights Reserved. 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* file name: unistr_case.cpp 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* encoding: US-ASCII 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* tab size: 8 (not used) 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* indentation:2 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created on: 2004aug19 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created by: Markus W. Scherer 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Case-mapping functions moved here from unistr.cpp 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/putil.h" 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cstring.h" 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h" 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ustring.h" 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/unistr.h" 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uchar.h" 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uelement.h" 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ustr_imp.h" 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//======================================== 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Read-only implementation 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//======================================== 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint8_t 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString::doCaseCompare(int32_t start, 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length, 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *srcChars, 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t srcStart, 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t srcLength, 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t options) const 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // compare illegal string values 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // treat const UChar *srcChars==NULL as an empty string 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(isBogus()) { 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return -1; 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // pin indices to legal values 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pinIndices(start, length); 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(srcChars == NULL) { 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org srcStart = srcLength = 0; 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // get the correct pointer 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *chars = getArrayStart(); 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org chars += start; 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(srcStart!=0) { 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org srcChars += srcStart; 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(chars != srcChars) { 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode errorCode=U_ZERO_ERROR; 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t result=u_strcmpFold(chars, length, srcChars, srcLength, 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org options|U_COMPARE_IGNORE_CASE, &errorCode); 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(result!=0) { 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (int8_t)(result >> 24 | 1); 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // get the srcLength if necessary 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(srcLength < 0) { 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org srcLength = u_strlen(srcChars + srcStart); 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length != srcLength) { 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (int8_t)((length - srcLength) >> 24 | 1); 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//======================================== 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Write implementation 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//======================================== 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString & 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString::caseMap(const UCaseMap *csm, 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UStringCaseMapper *stringCaseMapper) { 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(isEmpty() || !isWritable()) { 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // nothing to do 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // We need to allocate a new buffer for the internal string case mapping function. 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // This is very similar to how doReplace() keeps the old array pointer 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // and deletes the old array itself after it is done. 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar oldStackBuffer[US_STACKBUF_SIZE]; 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *oldArray; 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t oldLength; 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(fFlags&kUsingStackBuffer) { 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // copy the stack buffer contents because it will be overwritten 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength); 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oldArray = oldStackBuffer; 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oldLength = fShortLength; 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oldArray = getArrayStart(); 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oldLength = length(); 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t capacity; 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(oldLength <= US_STACKBUF_SIZE) { 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org capacity = US_STACKBUF_SIZE; 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org capacity = oldLength + 20; 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t *bufferToDelete = 0; 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) { 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Case-map, and if the result is too long, then reallocate and repeat. 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode errorCode; 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t newLength; 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode = U_ZERO_ERROR; 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(), 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oldArray, oldLength, &errorCode); 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org setLength(newLength); 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE)); 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (bufferToDelete) { 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(bufferToDelete); 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org setToBogus(); 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString & 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString::foldCase(uint32_t options) { 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UCaseMap csm=UCASEMAP_INITIALIZER; 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org csm.csp=ucase_getSingleton(); 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org csm.options=options; 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return caseMap(&csm, ustrcase_internalFold); 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Defined here to reduce dependencies on break iterator 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguhash_hashCaselessUnicodeString(const UElement key) { 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_NAMESPACE_USE 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString *str = (const UnicodeString*) key.pointer; 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (str == NULL) { 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Inefficient; a better way would be to have a hash function in 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // UnicodeString that does case folding on the fly. 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString copy(*str); 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return copy.foldCase().hashCode(); 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Defined here to reduce dependencies on break iterator 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UBool U_EXPORT2 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orguhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) { 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_NAMESPACE_USE 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString *str1 = (const UnicodeString*) key1.pointer; 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString *str2 = (const UnicodeString*) key2.pointer; 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (str1 == str2) { 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (str1 == NULL || str2 == NULL) { 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0; 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 181