unistr_case.cpp revision b13da9df870a61b11249bf741347908dbea0edd8
1/* 2******************************************************************************* 3* 4* Copyright (C) 1999-2007, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: unistr_case.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:2 12* 13* created on: 2004aug19 14* created by: Markus W. Scherer 15* 16* Case-mapping functions moved here from unistr.cpp 17*/ 18 19#include "unicode/utypes.h" 20#include "unicode/putil.h" 21#include "unicode/locid.h" 22#include "cstring.h" 23#include "cmemory.h" 24#include "unicode/ustring.h" 25#include "unicode/unistr.h" 26#include "unicode/uchar.h" 27#include "unicode/ubrk.h" 28#include "ustr_imp.h" 29#include "unormimp.h" 30#include "uhash.h" 31 32U_NAMESPACE_BEGIN 33 34//======================================== 35// Read-only implementation 36//======================================== 37 38int8_t 39UnicodeString::doCaseCompare(int32_t start, 40 int32_t length, 41 const UChar *srcChars, 42 int32_t srcStart, 43 int32_t srcLength, 44 uint32_t options) const 45{ 46 // compare illegal string values 47 // treat const UChar *srcChars==NULL as an empty string 48 if(isBogus()) { 49 return -1; 50 } 51 52 // pin indices to legal values 53 pinIndices(start, length); 54 55 if(srcChars == NULL) { 56 srcStart = srcLength = 0; 57 } 58 59 // get the correct pointer 60 const UChar *chars = getArrayStart(); 61 62 chars += start; 63 srcChars += srcStart; 64 65 if(chars != srcChars) { 66 UErrorCode errorCode=U_ZERO_ERROR; 67 int32_t result=u_strcmpFold(chars, length, srcChars, srcLength, 68 options|U_COMPARE_IGNORE_CASE, &errorCode); 69 if(result!=0) { 70 return (int8_t)(result >> 24 | 1); 71 } 72 } else { 73 // get the srcLength if necessary 74 if(srcLength < 0) { 75 srcLength = u_strlen(srcChars + srcStart); 76 } 77 if(length != srcLength) { 78 return (int8_t)((length - srcLength) >> 24 | 1); 79 } 80 } 81 return 0; 82} 83 84//======================================== 85// Write implementation 86//======================================== 87 88/* 89 * Implement argument checking and buffer handling 90 * for string case mapping as a common function. 91 */ 92 93UnicodeString & 94UnicodeString::caseMap(BreakIterator *titleIter, 95 const char *locale, 96 uint32_t options, 97 int32_t toWhichCase) { 98 if(fLength <= 0) { 99 // nothing to do 100 return *this; 101 } 102 103 UErrorCode errorCode; 104 105 errorCode = U_ZERO_ERROR; 106 const UCaseProps *csp=ucase_getSingleton(&errorCode); 107 if(U_FAILURE(errorCode)) { 108 setToBogus(); 109 return *this; 110 } 111 112 // We need to allocate a new buffer for the internal string case mapping function. 113 // This is very similar to how doReplace() below keeps the old array pointer 114 // and deletes the old array itself after it is done. 115 // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. 116 UChar *oldArray = fArray; 117 int32_t oldLength = fLength; 118 int32_t *bufferToDelete = 0; 119 120 // Make sure that if the string is in fStackBuffer we do not overwrite it! 121 int32_t capacity; 122 if(fLength <= US_STACKBUF_SIZE) { 123 if(fArray == fStackBuffer) { 124 capacity = 2 * US_STACKBUF_SIZE; // make sure that cloneArrayIfNeeded() allocates a new buffer 125 } else { 126 capacity = US_STACKBUF_SIZE; 127 } 128 } else { 129 capacity = fLength + 20; 130 } 131 if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) { 132 return *this; 133 } 134 135 // Case-map, and if the result is too long, then reallocate and repeat. 136 do { 137 errorCode = U_ZERO_ERROR; 138 if(toWhichCase==TO_LOWER) { 139 fLength = ustr_toLower(csp, fArray, fCapacity, 140 oldArray, oldLength, 141 locale, &errorCode); 142 } else if(toWhichCase==TO_UPPER) { 143 fLength = ustr_toUpper(csp, fArray, fCapacity, 144 oldArray, oldLength, 145 locale, &errorCode); 146 } else if(toWhichCase==TO_TITLE) { 147#if UCONFIG_NO_BREAK_ITERATION 148 errorCode=U_UNSUPPORTED_ERROR; 149#else 150 fLength = ustr_toTitle(csp, fArray, fCapacity, 151 oldArray, oldLength, 152 (UBreakIterator *)titleIter, locale, options, &errorCode); 153#endif 154 } else { 155 fLength = ustr_foldCase(csp, fArray, fCapacity, 156 oldArray, oldLength, 157 options, 158 &errorCode); 159 } 160 } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(fLength, fLength, FALSE)); 161 162 if (bufferToDelete) { 163 uprv_free(bufferToDelete); 164 } 165 if(U_FAILURE(errorCode)) { 166 setToBogus(); 167 } 168 return *this; 169} 170 171UnicodeString & 172UnicodeString::toLower() { 173 return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER); 174} 175 176UnicodeString & 177UnicodeString::toLower(const Locale &locale) { 178 return caseMap(0, locale.getName(), 0, TO_LOWER); 179} 180 181UnicodeString & 182UnicodeString::toUpper() { 183 return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER); 184} 185 186UnicodeString & 187UnicodeString::toUpper(const Locale &locale) { 188 return caseMap(0, locale.getName(), 0, TO_UPPER); 189} 190 191#if !UCONFIG_NO_BREAK_ITERATION 192 193UnicodeString & 194UnicodeString::toTitle(BreakIterator *titleIter) { 195 return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE); 196} 197 198UnicodeString & 199UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) { 200 return caseMap(titleIter, locale.getName(), 0, TO_TITLE); 201} 202 203UnicodeString & 204UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) { 205 return caseMap(titleIter, locale.getName(), options, TO_TITLE); 206} 207 208#endif 209 210UnicodeString & 211UnicodeString::foldCase(uint32_t options) { 212 /* The Locale parameter isn't used. Use "" instead. */ 213 return caseMap(0, "", options, FOLD_CASE); 214} 215 216U_NAMESPACE_END 217 218// Defined here to reduce dependencies on break iterator 219U_CAPI int32_t U_EXPORT2 220uhash_hashCaselessUnicodeString(const UHashTok key) { 221 U_NAMESPACE_USE 222 const UnicodeString *str = (const UnicodeString*) key.pointer; 223 if (str == NULL) { 224 return 0; 225 } 226 // Inefficient; a better way would be to have a hash function in 227 // UnicodeString that does case folding on the fly. 228 UnicodeString copy(*str); 229 return copy.foldCase().hashCode(); 230} 231 232// Defined here to reduce dependencies on break iterator 233U_CAPI UBool U_EXPORT2 234uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2) { 235 U_NAMESPACE_USE 236 const UnicodeString *str1 = (const UnicodeString*) key1.pointer; 237 const UnicodeString *str2 = (const UnicodeString*) key2.pointer; 238 if (str1 == str2) { 239 return TRUE; 240 } 241 if (str1 == NULL || str2 == NULL) { 242 return FALSE; 243 } 244 return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0; 245} 246 247