1/*
2*******************************************************************************
3* Copyright (C) 1996-2012, International Business Machines Corporation and
4* others. All Rights Reserved.
5*******************************************************************************
6*/
7//===============================================================================
8//
9// File sortkey.cpp
10//
11//
12//
13// Created by: Helena Shih
14//
15// Modification History:
16//
17//  Date         Name          Description
18//
19//  6/20/97      helena        Java class name change.
20//  6/23/97      helena        Added comments to make code more readable.
21//  6/26/98      erm           Canged to use byte arrays instead of UnicodeString
22//  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
23//                             Cleaned up operator=
24// 07/12/99      helena        HPUX 11 CC port.
25// 03/06/01      synwee        Modified compareTo, to handle the result of
26//                             2 string similar in contents, but one is longer
27//                             than the other
28//===============================================================================
29
30#include "unicode/utypes.h"
31
32#if !UCONFIG_NO_COLLATION
33
34#include "unicode/sortkey.h"
35#include "cmemory.h"
36#include "uelement.h"
37#include "ustr_imp.h"
38
39U_NAMESPACE_BEGIN
40
41// A hash code of kInvalidHashCode indicates that the hash code needs
42// to be computed. A hash code of kEmptyHashCode is used for empty keys
43// and for any key whose computed hash code is kInvalidHashCode.
44static const int32_t kInvalidHashCode = 0;
45static const int32_t kEmptyHashCode = 1;
46// The "bogus hash code" replaces a separate fBogus flag.
47static const int32_t kBogusHashCode = 2;
48
49UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
50
51CollationKey::CollationKey()
52    : UObject(), fFlagAndLength(0),
53      fHashCode(kEmptyHashCode)
54{
55}
56
57// Create a collation key from a bit array.
58CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
59    : UObject(), fFlagAndLength(count),
60      fHashCode(kInvalidHashCode)
61{
62    if (count < 0 || (newValues == NULL && count != 0) ||
63            (count > getCapacity() && reallocate(count, 0) == NULL)) {
64        setToBogus();
65        return;
66    }
67
68    if (count > 0) {
69        uprv_memcpy(getBytes(), newValues, count);
70    }
71}
72
73CollationKey::CollationKey(const CollationKey& other)
74    : UObject(other), fFlagAndLength(other.getLength()),
75      fHashCode(other.fHashCode)
76{
77    if (other.isBogus())
78    {
79        setToBogus();
80        return;
81    }
82
83    int32_t length = fFlagAndLength;
84    if (length > getCapacity() && reallocate(length, 0) == NULL) {
85        setToBogus();
86        return;
87    }
88
89    if (length > 0) {
90        uprv_memcpy(getBytes(), other.getBytes(), length);
91    }
92}
93
94CollationKey::~CollationKey()
95{
96    if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
97}
98
99uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
100    uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity));
101    if(newBytes == NULL) { return NULL; }
102    if(length > 0) {
103        uprv_memcpy(newBytes, getBytes(), length);
104    }
105    if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
106    fUnion.fFields.fBytes = newBytes;
107    fUnion.fFields.fCapacity = newCapacity;
108    fFlagAndLength |= 0x80000000;
109    return newBytes;
110}
111
112void CollationKey::setLength(int32_t newLength) {
113    // U_ASSERT(newLength >= 0 && newLength <= getCapacity());
114    fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength;
115    fHashCode = kInvalidHashCode;
116}
117
118// set the key to an empty state
119CollationKey&
120CollationKey::reset()
121{
122    fFlagAndLength &= 0x80000000;
123    fHashCode = kEmptyHashCode;
124
125    return *this;
126}
127
128// set the key to a "bogus" or invalid state
129CollationKey&
130CollationKey::setToBogus()
131{
132    fFlagAndLength &= 0x80000000;
133    fHashCode = kBogusHashCode;
134
135    return *this;
136}
137
138UBool
139CollationKey::operator==(const CollationKey& source) const
140{
141    return getLength() == source.getLength() &&
142            (this == &source ||
143             uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
144}
145
146const CollationKey&
147CollationKey::operator=(const CollationKey& other)
148{
149    if (this != &other)
150    {
151        if (other.isBogus())
152        {
153            return setToBogus();
154        }
155
156        int32_t length = other.getLength();
157        if (length > getCapacity() && reallocate(length, 0) == NULL) {
158            return setToBogus();
159        }
160        if (length > 0) {
161            uprv_memcpy(getBytes(), other.getBytes(), length);
162        }
163        fFlagAndLength = (fFlagAndLength & 0x80000000) | length;
164        fHashCode = other.fHashCode;
165    }
166
167    return *this;
168}
169
170// Bitwise comparison for the collation keys.
171Collator::EComparisonResult
172CollationKey::compareTo(const CollationKey& target) const
173{
174    UErrorCode errorCode = U_ZERO_ERROR;
175    return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
176}
177
178// Bitwise comparison for the collation keys.
179UCollationResult
180CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
181{
182  if(U_SUCCESS(status)) {
183    const uint8_t *src = getBytes();
184    const uint8_t *tgt = target.getBytes();
185
186    // are we comparing the same string
187    if (src == tgt)
188        return  UCOL_EQUAL;
189
190    UCollationResult result;
191
192    // are we comparing different lengths?
193    int32_t minLength = getLength();
194    int32_t targetLength = target.getLength();
195    if (minLength < targetLength) {
196        result = UCOL_LESS;
197    } else if (minLength == targetLength) {
198        result = UCOL_EQUAL;
199    } else {
200        minLength = targetLength;
201        result = UCOL_GREATER;
202    }
203
204    if (minLength > 0) {
205        int diff = uprv_memcmp(src, tgt, minLength);
206        if (diff > 0) {
207            return UCOL_GREATER;
208        }
209        else
210            if (diff < 0) {
211                return UCOL_LESS;
212            }
213    }
214
215    return result;
216  } else {
217    return UCOL_EQUAL;
218  }
219}
220
221#ifdef U_USE_COLLATION_KEY_DEPRECATES
222// Create a copy of the byte array.
223uint8_t*
224CollationKey::toByteArray(int32_t& count) const
225{
226    uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
227
228    if (result == NULL)
229    {
230        count = 0;
231    }
232    else
233    {
234        count = fCount;
235        if (count > 0) {
236            uprv_memcpy(result, fBytes, fCount);
237        }
238    }
239
240    return result;
241}
242#endif
243
244static int32_t
245computeHashCode(const uint8_t *key, int32_t  length) {
246    const char *s = reinterpret_cast<const char *>(key);
247    int32_t hash;
248    if (s == NULL || length == 0) {
249        hash = kEmptyHashCode;
250    } else {
251        hash = ustr_hashCharsN(s, length);
252        if (hash == kInvalidHashCode || hash == kBogusHashCode) {
253            hash = kEmptyHashCode;
254        }
255    }
256    return hash;
257}
258
259int32_t
260CollationKey::hashCode() const
261{
262    // (Cribbed from UnicodeString)
263    // We cache the hashCode; when it becomes invalid, due to any change to the
264    // string, we note this by setting it to kInvalidHashCode. [LIU]
265
266    // Note: This method is semantically const, but physically non-const.
267
268    if (fHashCode == kInvalidHashCode)
269    {
270        fHashCode = computeHashCode(getBytes(), getLength());
271    }
272
273    return fHashCode;
274}
275
276U_NAMESPACE_END
277
278U_CAPI int32_t U_EXPORT2
279ucol_keyHashCode(const uint8_t *key,
280                       int32_t  length)
281{
282    return icu::computeHashCode(key, length);
283}
284
285#endif /* #if !UCONFIG_NO_COLLATION */
286