unistr_case.cpp revision b13da9df870a61b11249bf741347908dbea0edd8
1/*
2*******************************************************************************
3*
4*   Copyright (C) 1999-2007, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  unistr_case.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:2
12*
13*   created on: 2004aug19
14*   created by: Markus W. Scherer
15*
16*   Case-mapping functions moved here from unistr.cpp
17*/
18
19#include "unicode/utypes.h"
20#include "unicode/putil.h"
21#include "unicode/locid.h"
22#include "cstring.h"
23#include "cmemory.h"
24#include "unicode/ustring.h"
25#include "unicode/unistr.h"
26#include "unicode/uchar.h"
27#include "unicode/ubrk.h"
28#include "ustr_imp.h"
29#include "unormimp.h"
30#include "uhash.h"
31
32U_NAMESPACE_BEGIN
33
34//========================================
35// Read-only implementation
36//========================================
37
38int8_t
39UnicodeString::doCaseCompare(int32_t start,
40                             int32_t length,
41                             const UChar *srcChars,
42                             int32_t srcStart,
43                             int32_t srcLength,
44                             uint32_t options) const
45{
46  // compare illegal string values
47  // treat const UChar *srcChars==NULL as an empty string
48  if(isBogus()) {
49    return -1;
50  }
51
52  // pin indices to legal values
53  pinIndices(start, length);
54
55  if(srcChars == NULL) {
56    srcStart = srcLength = 0;
57  }
58
59  // get the correct pointer
60  const UChar *chars = getArrayStart();
61
62  chars += start;
63  srcChars += srcStart;
64
65  if(chars != srcChars) {
66    UErrorCode errorCode=U_ZERO_ERROR;
67    int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
68                                options|U_COMPARE_IGNORE_CASE, &errorCode);
69    if(result!=0) {
70      return (int8_t)(result >> 24 | 1);
71    }
72  } else {
73    // get the srcLength if necessary
74    if(srcLength < 0) {
75      srcLength = u_strlen(srcChars + srcStart);
76    }
77    if(length != srcLength) {
78      return (int8_t)((length - srcLength) >> 24 | 1);
79    }
80  }
81  return 0;
82}
83
84//========================================
85// Write implementation
86//========================================
87
88/*
89 * Implement argument checking and buffer handling
90 * for string case mapping as a common function.
91 */
92
93UnicodeString &
94UnicodeString::caseMap(BreakIterator *titleIter,
95                       const char *locale,
96                       uint32_t options,
97                       int32_t toWhichCase) {
98  if(fLength <= 0) {
99    // nothing to do
100    return *this;
101  }
102
103  UErrorCode errorCode;
104
105  errorCode = U_ZERO_ERROR;
106  const UCaseProps *csp=ucase_getSingleton(&errorCode);
107  if(U_FAILURE(errorCode)) {
108    setToBogus();
109    return *this;
110  }
111
112  // We need to allocate a new buffer for the internal string case mapping function.
113  // This is very similar to how doReplace() below keeps the old array pointer
114  // and deletes the old array itself after it is done.
115  // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
116  UChar *oldArray = fArray;
117  int32_t oldLength = fLength;
118  int32_t *bufferToDelete = 0;
119
120  // Make sure that if the string is in fStackBuffer we do not overwrite it!
121  int32_t capacity;
122  if(fLength <= US_STACKBUF_SIZE) {
123    if(fArray == fStackBuffer) {
124      capacity = 2 * US_STACKBUF_SIZE; // make sure that cloneArrayIfNeeded() allocates a new buffer
125    } else {
126      capacity = US_STACKBUF_SIZE;
127    }
128  } else {
129    capacity = fLength + 20;
130  }
131  if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
132    return *this;
133  }
134
135  // Case-map, and if the result is too long, then reallocate and repeat.
136  do {
137    errorCode = U_ZERO_ERROR;
138    if(toWhichCase==TO_LOWER) {
139      fLength = ustr_toLower(csp, fArray, fCapacity,
140                             oldArray, oldLength,
141                             locale, &errorCode);
142    } else if(toWhichCase==TO_UPPER) {
143      fLength = ustr_toUpper(csp, fArray, fCapacity,
144                             oldArray, oldLength,
145                             locale, &errorCode);
146    } else if(toWhichCase==TO_TITLE) {
147#if UCONFIG_NO_BREAK_ITERATION
148        errorCode=U_UNSUPPORTED_ERROR;
149#else
150      fLength = ustr_toTitle(csp, fArray, fCapacity,
151                             oldArray, oldLength,
152                             (UBreakIterator *)titleIter, locale, options, &errorCode);
153#endif
154    } else {
155      fLength = ustr_foldCase(csp, fArray, fCapacity,
156                              oldArray, oldLength,
157                              options,
158                              &errorCode);
159    }
160  } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(fLength, fLength, FALSE));
161
162  if (bufferToDelete) {
163    uprv_free(bufferToDelete);
164  }
165  if(U_FAILURE(errorCode)) {
166    setToBogus();
167  }
168  return *this;
169}
170
171UnicodeString &
172UnicodeString::toLower() {
173  return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER);
174}
175
176UnicodeString &
177UnicodeString::toLower(const Locale &locale) {
178  return caseMap(0, locale.getName(), 0, TO_LOWER);
179}
180
181UnicodeString &
182UnicodeString::toUpper() {
183  return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER);
184}
185
186UnicodeString &
187UnicodeString::toUpper(const Locale &locale) {
188  return caseMap(0, locale.getName(), 0, TO_UPPER);
189}
190
191#if !UCONFIG_NO_BREAK_ITERATION
192
193UnicodeString &
194UnicodeString::toTitle(BreakIterator *titleIter) {
195  return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE);
196}
197
198UnicodeString &
199UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
200  return caseMap(titleIter, locale.getName(), 0, TO_TITLE);
201}
202
203UnicodeString &
204UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) {
205  return caseMap(titleIter, locale.getName(), options, TO_TITLE);
206}
207
208#endif
209
210UnicodeString &
211UnicodeString::foldCase(uint32_t options) {
212    /* The Locale parameter isn't used. Use "" instead. */
213    return caseMap(0, "", options, FOLD_CASE);
214}
215
216U_NAMESPACE_END
217
218// Defined here to reduce dependencies on break iterator
219U_CAPI int32_t U_EXPORT2
220uhash_hashCaselessUnicodeString(const UHashTok key) {
221    U_NAMESPACE_USE
222    const UnicodeString *str = (const UnicodeString*) key.pointer;
223    if (str == NULL) {
224        return 0;
225    }
226    // Inefficient; a better way would be to have a hash function in
227    // UnicodeString that does case folding on the fly.
228    UnicodeString copy(*str);
229    return copy.foldCase().hashCode();
230}
231
232// Defined here to reduce dependencies on break iterator
233U_CAPI UBool U_EXPORT2
234uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2) {
235    U_NAMESPACE_USE
236    const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
237    const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
238    if (str1 == str2) {
239        return TRUE;
240    }
241    if (str1 == NULL || str2 == NULL) {
242        return FALSE;
243    }
244    return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
245}
246
247