1/*
2*******************************************************************************
3* Copyright (C) 1996-2011, International Business Machines Corporation and    *
4* others. All Rights Reserved.                                                *
5*******************************************************************************
6*/
7//===============================================================================
8//
9// File sortkey.cpp
10//
11//
12//
13// Created by: Helena Shih
14//
15// Modification History:
16//
17//  Date         Name          Description
18//
19//  6/20/97      helena        Java class name change.
20//  6/23/97      helena        Added comments to make code more readable.
21//  6/26/98      erm           Canged to use byte arrays instead of UnicodeString
22//  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
23//                             Cleaned up operator=
24// 07/12/99      helena        HPUX 11 CC port.
25// 03/06/01      synwee        Modified compareTo, to handle the result of
26//                             2 string similar in contents, but one is longer
27//                             than the other
28//===============================================================================
29
30#include "unicode/utypes.h"
31
32#if !UCONFIG_NO_COLLATION
33
34#include "unicode/sortkey.h"
35#include "cmemory.h"
36#include "uhash.h"
37
38U_NAMESPACE_BEGIN
39
40// A hash code of kInvalidHashCode indicates that the has code needs
41// to be computed. A hash code of kEmptyHashCode is used for empty keys
42// and for any key whose computed hash code is kInvalidHashCode.
43#define kInvalidHashCode ((int32_t)0)
44#define kEmptyHashCode ((int32_t)1)
45
46UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
47
48CollationKey::CollationKey()
49    : UObject(), fBogus(FALSE), fCount(0), fCapacity(0),
50      fHashCode(kEmptyHashCode), fBytes(NULL)
51{
52}
53
54// Create a collation key from a bit array.
55CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
56    : UObject(), fBogus(FALSE), fCount(count), fCapacity(count),
57      fHashCode(kInvalidHashCode)
58{
59    fBytes = (uint8_t *)uprv_malloc(count);
60
61    if (fBytes == NULL)
62    {
63        setToBogus();
64        return;
65    }
66
67    uprv_memcpy(fBytes, newValues, fCount);
68}
69
70CollationKey::CollationKey(const CollationKey& other)
71: UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity),
72  fHashCode(other.fHashCode), fBytes(NULL)
73{
74    if (other.fBogus)
75    {
76        setToBogus();
77        return;
78    }
79
80    fBytes = (uint8_t *)uprv_malloc(fCapacity);
81
82    if (fBytes == NULL)
83    {
84        setToBogus();
85        return;
86    }
87
88    uprv_memcpy(fBytes, other.fBytes, other.fCount);
89    if(fCapacity>fCount) {
90        uprv_memset(fBytes+fCount, 0, fCapacity-fCount);
91    }
92}
93
94CollationKey::~CollationKey()
95{
96        uprv_free(fBytes);
97}
98
99void CollationKey::adopt(uint8_t *values, int32_t capacity, int32_t count) {
100    if(fBytes != NULL) {
101        uprv_free(fBytes);
102    }
103    fBytes = values;
104    fCapacity = capacity;
105    setLength(count);
106}
107
108void CollationKey::setLength(int32_t newLength) {
109    fBogus = FALSE;
110    fCount = newLength;
111    fHashCode = kInvalidHashCode;
112}
113
114// set the key to an empty state
115CollationKey&
116CollationKey::reset()
117{
118    fCount = 0;
119    fBogus = FALSE;
120    fHashCode = kEmptyHashCode;
121
122    return *this;
123}
124
125// set the key to a "bogus" or invalid state
126CollationKey&
127CollationKey::setToBogus()
128{
129    uprv_free(fBytes);
130    fBytes = NULL;
131
132    fCapacity = 0;
133    fCount = 0;
134    fHashCode = kInvalidHashCode;
135
136    return *this;
137}
138
139UBool
140CollationKey::operator==(const CollationKey& source) const
141{
142    return (this->fCount == source.fCount &&
143            (this->fBytes == source.fBytes ||
144             uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0));
145}
146
147const CollationKey&
148CollationKey::operator=(const CollationKey& other)
149{
150    if (this != &other)
151    {
152        if (other.isBogus())
153        {
154            return setToBogus();
155        }
156
157        if (other.fBytes != NULL)
158        {
159            ensureCapacity(other.fCount);
160
161            if (isBogus())
162            {
163                return *this;
164            }
165
166            fHashCode = other.fHashCode;
167            uprv_memcpy(fBytes, other.fBytes, fCount);
168        }
169        else
170        {
171            fCount = 0;
172            fBogus = FALSE;
173            fHashCode = kEmptyHashCode;
174        }
175    }
176
177    return *this;
178}
179
180// Bitwise comparison for the collation keys.
181// NOTE: this is somewhat messy 'cause we can't count
182// on memcmp returning the exact values which match
183// Collator::EComparisonResult
184Collator::EComparisonResult
185CollationKey::compareTo(const CollationKey& target) const
186{
187    uint8_t *src = this->fBytes;
188    uint8_t *tgt = target.fBytes;
189
190    // are we comparing the same string
191    if (src == tgt)
192        return  Collator::EQUAL;
193
194        /*
195        int count = (this->fCount < target.fCount) ? this->fCount : target.fCount;
196        if (count == 0)
197        {
198        // If count is 0, at least one of the keys is empty.
199        // An empty key is always LESS than a non-empty one
200        // and EQUAL to another empty
201        if (this->fCount < target.fCount)
202        {
203        return Collator::LESS;
204        }
205
206          if (this->fCount > target.fCount)
207          {
208          return Collator::GREATER;
209          }
210          return Collator::EQUAL;
211          }
212    */
213
214    int                         minLength;
215    Collator::EComparisonResult result;
216
217    // are we comparing different lengths?
218    if (this->fCount != target.fCount) {
219        if (this->fCount < target.fCount) {
220            minLength = this->fCount;
221            result    =  Collator::LESS;
222        }
223        else {
224            minLength = target.fCount;
225            result    =  Collator::GREATER;
226        }
227    }
228    else {
229        minLength = target.fCount;
230        result    =  Collator::EQUAL;
231    }
232
233    if (minLength > 0) {
234        int diff = uprv_memcmp(src, tgt, minLength);
235        if (diff > 0) {
236            return Collator::GREATER;
237        }
238        else
239            if (diff < 0) {
240                return Collator::LESS;
241            }
242    }
243
244    return result;
245    /*
246    if (result < 0)
247    {
248    return Collator::LESS;
249    }
250
251      if (result > 0)
252      {
253      return Collator::GREATER;
254      }
255      return Collator::EQUAL;
256    */
257}
258
259// Bitwise comparison for the collation keys.
260UCollationResult
261CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
262{
263  if(U_SUCCESS(status)) {
264    uint8_t *src = this->fBytes;
265    uint8_t *tgt = target.fBytes;
266
267    // are we comparing the same string
268    if (src == tgt)
269        return  UCOL_EQUAL;
270
271    int                         minLength;
272    UCollationResult result;
273
274    // are we comparing different lengths?
275    if (this->fCount != target.fCount) {
276        if (this->fCount < target.fCount) {
277            minLength = this->fCount;
278            result    =  UCOL_LESS;
279        }
280        else {
281            minLength = target.fCount;
282            result    =  UCOL_GREATER;
283        }
284    }
285    else {
286        minLength = target.fCount;
287        result    =  UCOL_EQUAL;
288    }
289
290    if (minLength > 0) {
291        int diff = uprv_memcmp(src, tgt, minLength);
292        if (diff > 0) {
293            return UCOL_GREATER;
294        }
295        else
296            if (diff < 0) {
297                return UCOL_LESS;
298            }
299    }
300
301    return result;
302  } else {
303    return UCOL_EQUAL;
304  }
305}
306
307CollationKey&
308CollationKey::ensureCapacity(int32_t newSize)
309{
310    if (fCapacity < newSize)
311    {
312        uprv_free(fBytes);
313
314        fBytes = (uint8_t *)uprv_malloc(newSize);
315
316        if (fBytes == NULL)
317        {
318            return setToBogus();
319        }
320
321        uprv_memset(fBytes, 0, fCapacity);
322        fCapacity = newSize;
323    }
324
325    fBogus = FALSE;
326    fCount = newSize;
327    fHashCode = kInvalidHashCode;
328
329    return *this;
330}
331
332#ifdef U_USE_COLLATION_KEY_DEPRECATES
333// Create a copy of the byte array.
334uint8_t*
335CollationKey::toByteArray(int32_t& count) const
336{
337    uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
338
339    if (result == NULL)
340    {
341        count = 0;
342    }
343    else
344    {
345        count = fCount;
346        uprv_memcpy(result, fBytes, fCount);
347    }
348
349    return result;
350}
351#endif
352
353int32_t
354CollationKey::hashCode() const
355{
356    // (Cribbed from UnicodeString)
357    // We cache the hashCode; when it becomes invalid, due to any change to the
358    // string, we note this by setting it to kInvalidHashCode. [LIU]
359
360    // Note: This method is semantically const, but physically non-const.
361
362    if (fHashCode == kInvalidHashCode)
363    {
364        UHashTok key;
365        key.pointer = fBytes;
366        ((CollationKey *)this)->fHashCode = uhash_hashChars(key);
367#if 0
368        // We compute the hash by iterating sparsely over 64 (at most) characters
369        // spaced evenly through the string.  For each character, we multiply the
370        // previous hash value by a prime number and add the new character in,
371        // in the manner of a additive linear congruential random number generator,
372        // thus producing a pseudorandom deterministic value which should be well
373        // distributed over the output range. [LIU]
374        const uint8_t   *p = fBytes, *limit = fBytes + fCount;
375        int32_t         inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1);
376        int32_t         hash = 0;
377
378        while (p < limit)
379        {
380            hash = ( hash * 37 ) + ((p[0] << 8) + p[1]);
381            p += inc;
382        }
383
384        // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode
385        if (hash == kInvalidHashCode)
386        {
387            hash = kEmptyHashCode;
388        }
389
390        ((CollationKey *)this)->fHashCode = hash; // cast away const
391#endif
392    }
393
394    return fHashCode;
395}
396
397U_NAMESPACE_END
398
399U_CAPI int32_t U_EXPORT2
400ucol_keyHashCode(const uint8_t *key,
401                       int32_t  length)
402{
403    U_NAMESPACE_QUALIFIER CollationKey newKey(key, length);
404    return newKey.hashCode();
405}
406
407#endif /* #if !UCONFIG_NO_COLLATION */
408