unistr_case.cpp revision c73f511526464f8e56c242df80552e9b0d94ae3d
1/*
2*******************************************************************************
3*
4*   Copyright (C) 1999-2011, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  unistr_case.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:2
12*
13*   created on: 2004aug19
14*   created by: Markus W. Scherer
15*
16*   Case-mapping functions moved here from unistr.cpp
17*/
18
19#include "unicode/utypes.h"
20#include "unicode/putil.h"
21#include "cstring.h"
22#include "cmemory.h"
23#include "unicode/ustring.h"
24#include "unicode/unistr.h"
25#include "unicode/uchar.h"
26#include "uelement.h"
27#include "ustr_imp.h"
28
29U_NAMESPACE_BEGIN
30
31//========================================
32// Read-only implementation
33//========================================
34
35int8_t
36UnicodeString::doCaseCompare(int32_t start,
37                             int32_t length,
38                             const UChar *srcChars,
39                             int32_t srcStart,
40                             int32_t srcLength,
41                             uint32_t options) const
42{
43  // compare illegal string values
44  // treat const UChar *srcChars==NULL as an empty string
45  if(isBogus()) {
46    return -1;
47  }
48
49  // pin indices to legal values
50  pinIndices(start, length);
51
52  if(srcChars == NULL) {
53    srcStart = srcLength = 0;
54  }
55
56  // get the correct pointer
57  const UChar *chars = getArrayStart();
58
59  chars += start;
60  if(srcStart!=0) {
61    srcChars += srcStart;
62  }
63
64  if(chars != srcChars) {
65    UErrorCode errorCode=U_ZERO_ERROR;
66    int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
67                                options|U_COMPARE_IGNORE_CASE, &errorCode);
68    if(result!=0) {
69      return (int8_t)(result >> 24 | 1);
70    }
71  } else {
72    // get the srcLength if necessary
73    if(srcLength < 0) {
74      srcLength = u_strlen(srcChars + srcStart);
75    }
76    if(length != srcLength) {
77      return (int8_t)((length - srcLength) >> 24 | 1);
78    }
79  }
80  return 0;
81}
82
83//========================================
84// Write implementation
85//========================================
86
87UnicodeString &
88UnicodeString::caseMap(const UCaseMap *csm,
89                       UStringCaseMapper *stringCaseMapper) {
90  if(isEmpty() || !isWritable()) {
91    // nothing to do
92    return *this;
93  }
94
95  // We need to allocate a new buffer for the internal string case mapping function.
96  // This is very similar to how doReplace() keeps the old array pointer
97  // and deletes the old array itself after it is done.
98  // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
99  UChar oldStackBuffer[US_STACKBUF_SIZE];
100  UChar *oldArray;
101  int32_t oldLength;
102
103  if(fFlags&kUsingStackBuffer) {
104    // copy the stack buffer contents because it will be overwritten
105    u_memcpy(oldStackBuffer, fUnion.fStackBuffer, fShortLength);
106    oldArray = oldStackBuffer;
107    oldLength = fShortLength;
108  } else {
109    oldArray = getArrayStart();
110    oldLength = length();
111  }
112
113  int32_t capacity;
114  if(oldLength <= US_STACKBUF_SIZE) {
115    capacity = US_STACKBUF_SIZE;
116  } else {
117    capacity = oldLength + 20;
118  }
119  int32_t *bufferToDelete = 0;
120  if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
121    return *this;
122  }
123
124  // Case-map, and if the result is too long, then reallocate and repeat.
125  UErrorCode errorCode;
126  int32_t newLength;
127  do {
128    errorCode = U_ZERO_ERROR;
129    newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(),
130                                 oldArray, oldLength, &errorCode);
131    setLength(newLength);
132  } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
133
134  if (bufferToDelete) {
135    uprv_free(bufferToDelete);
136  }
137  if(U_FAILURE(errorCode)) {
138    setToBogus();
139  }
140  return *this;
141}
142
143UnicodeString &
144UnicodeString::foldCase(uint32_t options) {
145  UCaseMap csm=UCASEMAP_INITIALIZER;
146  csm.csp=ucase_getSingleton();
147  csm.options=options;
148  return caseMap(&csm, ustrcase_internalFold);
149}
150
151U_NAMESPACE_END
152
153// Defined here to reduce dependencies on break iterator
154U_CAPI int32_t U_EXPORT2
155uhash_hashCaselessUnicodeString(const UElement key) {
156    U_NAMESPACE_USE
157    const UnicodeString *str = (const UnicodeString*) key.pointer;
158    if (str == NULL) {
159        return 0;
160    }
161    // Inefficient; a better way would be to have a hash function in
162    // UnicodeString that does case folding on the fly.
163    UnicodeString copy(*str);
164    return copy.foldCase().hashCode();
165}
166
167// Defined here to reduce dependencies on break iterator
168U_CAPI UBool U_EXPORT2
169uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) {
170    U_NAMESPACE_USE
171    const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
172    const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
173    if (str1 == str2) {
174        return TRUE;
175    }
176    if (str1 == NULL || str2 == NULL) {
177        return FALSE;
178    }
179    return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
180}
181