1/*
2*******************************************************************************
3*
4*   Copyright (C) 1999-2010, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  unistr_case.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:2
12*
13*   created on: 2004aug19
14*   created by: Markus W. Scherer
15*
16*   Case-mapping functions moved here from unistr.cpp
17*/
18
19#include "unicode/utypes.h"
20#include "unicode/putil.h"
21#include "unicode/locid.h"
22#include "cstring.h"
23#include "cmemory.h"
24#include "unicode/ustring.h"
25#include "unicode/unistr.h"
26#include "unicode/uchar.h"
27#include "unicode/ubrk.h"
28#include "ustr_imp.h"
29#include "uhash.h"
30
31U_NAMESPACE_BEGIN
32
33//========================================
34// Read-only implementation
35//========================================
36
37int8_t
38UnicodeString::doCaseCompare(int32_t start,
39                             int32_t length,
40                             const UChar *srcChars,
41                             int32_t srcStart,
42                             int32_t srcLength,
43                             uint32_t options) const
44{
45  // compare illegal string values
46  // treat const UChar *srcChars==NULL as an empty string
47  if(isBogus()) {
48    return -1;
49  }
50
51  // pin indices to legal values
52  pinIndices(start, length);
53
54  if(srcChars == NULL) {
55    srcStart = srcLength = 0;
56  }
57
58  // get the correct pointer
59  const UChar *chars = getArrayStart();
60
61  chars += start;
62  srcChars += srcStart;
63
64  if(chars != srcChars) {
65    UErrorCode errorCode=U_ZERO_ERROR;
66    int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
67                                options|U_COMPARE_IGNORE_CASE, &errorCode);
68    if(result!=0) {
69      return (int8_t)(result >> 24 | 1);
70    }
71  } else {
72    // get the srcLength if necessary
73    if(srcLength < 0) {
74      srcLength = u_strlen(srcChars + srcStart);
75    }
76    if(length != srcLength) {
77      return (int8_t)((length - srcLength) >> 24 | 1);
78    }
79  }
80  return 0;
81}
82
83//========================================
84// Write implementation
85//========================================
86
87/*
88 * Implement argument checking and buffer handling
89 * for string case mapping as a common function.
90 */
91
92UnicodeString &
93UnicodeString::caseMap(BreakIterator *titleIter,
94                       const char *locale,
95                       uint32_t options,
96                       int32_t toWhichCase) {
97  if(isEmpty() || !isWritable()) {
98    // nothing to do
99    return *this;
100  }
101
102  const UCaseProps *csp=ucase_getSingleton();
103
104  // We need to allocate a new buffer for the internal string case mapping function.
105  // This is very similar to how doReplace() keeps the old array pointer
106  // and deletes the old array itself after it is done.
107  // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
108  UChar oldStackBuffer[US_STACKBUF_SIZE];
109  UChar *oldArray;
110  int32_t oldLength;
111
112  if(fFlags&kUsingStackBuffer) {
113    // copy the stack buffer contents because it will be overwritten
114    u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength);
115    oldArray = oldStackBuffer;
116    oldLength = fShortLength;
117  } else {
118    oldArray = getArrayStart();
119    oldLength = length();
120  }
121
122  int32_t capacity;
123  if(oldLength <= US_STACKBUF_SIZE) {
124    capacity = US_STACKBUF_SIZE;
125  } else {
126    capacity = oldLength + 20;
127  }
128  int32_t *bufferToDelete = 0;
129  if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
130    return *this;
131  }
132
133  // Case-map, and if the result is too long, then reallocate and repeat.
134  UErrorCode errorCode;
135  int32_t newLength;
136  do {
137    errorCode = U_ZERO_ERROR;
138    if(toWhichCase==TO_LOWER) {
139      newLength = ustr_toLower(csp, getArrayStart(), getCapacity(),
140                               oldArray, oldLength,
141                               locale, &errorCode);
142    } else if(toWhichCase==TO_UPPER) {
143      newLength = ustr_toUpper(csp, getArrayStart(), getCapacity(),
144                               oldArray, oldLength,
145                               locale, &errorCode);
146    } else if(toWhichCase==TO_TITLE) {
147#if UCONFIG_NO_BREAK_ITERATION
148        errorCode=U_UNSUPPORTED_ERROR;
149#else
150      newLength = ustr_toTitle(csp, getArrayStart(), getCapacity(),
151                               oldArray, oldLength,
152                               (UBreakIterator *)titleIter, locale, options, &errorCode);
153#endif
154    } else {
155      newLength = ustr_foldCase(csp, getArrayStart(), getCapacity(),
156                                oldArray, oldLength,
157                                options,
158                                &errorCode);
159    }
160    setLength(newLength);
161  } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
162
163  if (bufferToDelete) {
164    uprv_free(bufferToDelete);
165  }
166  if(U_FAILURE(errorCode)) {
167    setToBogus();
168  }
169  return *this;
170}
171
172UnicodeString &
173UnicodeString::toLower() {
174  return caseMap(0, Locale::getDefault().getName(), 0, TO_LOWER);
175}
176
177UnicodeString &
178UnicodeString::toLower(const Locale &locale) {
179  return caseMap(0, locale.getName(), 0, TO_LOWER);
180}
181
182UnicodeString &
183UnicodeString::toUpper() {
184  return caseMap(0, Locale::getDefault().getName(), 0, TO_UPPER);
185}
186
187UnicodeString &
188UnicodeString::toUpper(const Locale &locale) {
189  return caseMap(0, locale.getName(), 0, TO_UPPER);
190}
191
192#if !UCONFIG_NO_BREAK_ITERATION
193
194UnicodeString &
195UnicodeString::toTitle(BreakIterator *titleIter) {
196  return caseMap(titleIter, Locale::getDefault().getName(), 0, TO_TITLE);
197}
198
199UnicodeString &
200UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale) {
201  return caseMap(titleIter, locale.getName(), 0, TO_TITLE);
202}
203
204UnicodeString &
205UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options) {
206  return caseMap(titleIter, locale.getName(), options, TO_TITLE);
207}
208
209#endif
210
211UnicodeString &
212UnicodeString::foldCase(uint32_t options) {
213    /* The Locale parameter isn't used. Use "" instead. */
214    return caseMap(0, "", options, FOLD_CASE);
215}
216
217U_NAMESPACE_END
218
219// Defined here to reduce dependencies on break iterator
220U_CAPI int32_t U_EXPORT2
221uhash_hashCaselessUnicodeString(const UHashTok key) {
222    U_NAMESPACE_USE
223    const UnicodeString *str = (const UnicodeString*) key.pointer;
224    if (str == NULL) {
225        return 0;
226    }
227    // Inefficient; a better way would be to have a hash function in
228    // UnicodeString that does case folding on the fly.
229    UnicodeString copy(*str);
230    return copy.foldCase().hashCode();
231}
232
233// Defined here to reduce dependencies on break iterator
234U_CAPI UBool U_EXPORT2
235uhash_compareCaselessUnicodeString(const UHashTok key1, const UHashTok key2) {
236    U_NAMESPACE_USE
237    const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
238    const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
239    if (str1 == str2) {
240        return TRUE;
241    }
242    if (str1 == NULL || str2 == NULL) {
243        return FALSE;
244    }
245    return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
246}
247
248