1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* Copyright (C) 1999-2011, International Business Machines 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* file name: unistr_case.cpp 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* encoding: US-ASCII 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* tab size: 8 (not used) 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* indentation:2 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created on: 2004aug19 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created by: Markus W. Scherer 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Case-mapping functions moved here from unistr.cpp 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/putil.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unistr.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 2683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "uelement.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustr_imp.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//======================================== 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Read-only implementation 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//======================================== 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint8_t 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString::doCaseCompare(int32_t start, 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length, 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *srcChars, 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcStart, 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t options) const 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // compare illegal string values 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // treat const UChar *srcChars==NULL as an empty string 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isBogus()) { 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // pin indices to legal values 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pinIndices(start, length); 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(srcChars == NULL) { 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcStart = srcLength = 0; 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // get the correct pointer 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *chars = getArrayStart(); 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru chars += start; 6083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(srcStart!=0) { 6183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius srcChars += srcStart; 6283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(chars != srcChars) { 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result=u_strcmpFold(chars, length, srcChars, srcLength, 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru options|U_COMPARE_IGNORE_CASE, &errorCode); 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(result!=0) { 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (int8_t)(result >> 24 | 1); 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // get the srcLength if necessary 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(srcLength < 0) { 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcLength = u_strlen(srcChars + srcStart); 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length != srcLength) { 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (int8_t)((length - srcLength) >> 24 | 1); 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//======================================== 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Write implementation 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//======================================== 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString & 8883a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUnicodeString::caseMap(const UCaseMap *csm, 8983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UStringCaseMapper *stringCaseMapper) { 90c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(isEmpty() || !isWritable()) { 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // nothing to do 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We need to allocate a new buffer for the internal string case mapping function. 96c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This is very similar to how doReplace() keeps the old array pointer 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and deletes the old array itself after it is done. 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. 99c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar oldStackBuffer[US_STACKBUF_SIZE]; 100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UChar *oldArray; 101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t oldLength; 102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(fFlags&kUsingStackBuffer) { 104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // copy the stack buffer contents because it will be overwritten 105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength); 106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru oldArray = oldStackBuffer; 107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru oldLength = fShortLength; 108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru oldArray = getArrayStart(); 110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru oldLength = length(); 111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t capacity; 114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(oldLength <= US_STACKBUF_SIZE) { 115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru capacity = US_STACKBUF_SIZE; 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru capacity = oldLength + 20; 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 119c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t *bufferToDelete = 0; 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) { 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Case-map, and if the result is too long, then reallocate and repeat. 12527f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode errorCode; 126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t newLength; 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode = U_ZERO_ERROR; 12983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(), 13083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius oldArray, oldLength, &errorCode); 131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru setLength(newLength); 132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE)); 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (bufferToDelete) { 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(bufferToDelete); 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setToBogus(); 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString & 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString::foldCase(uint32_t options) { 14583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UCaseMap csm=UCASEMAP_INITIALIZER; 14683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius csm.csp=ucase_getSingleton(); 14783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius csm.options=options; 14883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return caseMap(&csm, ustrcase_internalFold); 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Defined here to reduce dependencies on break iterator 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 15583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusuhash_hashCaselessUnicodeString(const UElement key) { 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_NAMESPACE_USE 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *str = (const UnicodeString*) key.pointer; 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (str == NULL) { 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Inefficient; a better way would be to have a hash function in 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // UnicodeString that does case folding on the fly. 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString copy(*str); 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return copy.foldCase().hashCode(); 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Defined here to reduce dependencies on break iterator 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 16983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusuhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) { 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_NAMESPACE_USE 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *str1 = (const UnicodeString*) key1.pointer; 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString *str2 = (const UnicodeString*) key2.pointer; 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (str1 == str2) { 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (str1 == NULL || str2 == NULL) { 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0; 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 181