1/*
2*******************************************************************************
3* Copyright (C) 1996-2006, International Business Machines Corporation and    *
4* others. All Rights Reserved.                                                *
5*******************************************************************************
6*/
7//===============================================================================
8//
9// File sortkey.cpp
10//
11//
12//
13// Created by: Helena Shih
14//
15// Modification History:
16//
17//  Date         Name          Description
18//
19//  6/20/97      helena        Java class name change.
20//  6/23/97      helena        Added comments to make code more readable.
21//  6/26/98      erm           Canged to use byte arrays instead of UnicodeString
22//  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
23//                             Cleaned up operator=
24// 07/12/99      helena        HPUX 11 CC port.
25// 03/06/01      synwee        Modified compareTo, to handle the result of
26//                             2 string similar in contents, but one is longer
27//                             than the other
28//===============================================================================
29
30#include "unicode/utypes.h"
31
32#if !UCONFIG_NO_COLLATION
33
34#include "unicode/sortkey.h"
35#include "cmemory.h"
36#include "uhash.h"
37
38U_NAMESPACE_BEGIN
39
40// A hash code of kInvalidHashCode indicates that the has code needs
41// to be computed. A hash code of kEmptyHashCode is used for empty keys
42// and for any key whose computed hash code is kInvalidHashCode.
43#define kInvalidHashCode ((int32_t)0)
44#define kEmptyHashCode ((int32_t)1)
45
46UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
47
48CollationKey::CollationKey()
49    : UObject(), fBogus(FALSE), fCount(0), fCapacity(0),
50      fHashCode(kEmptyHashCode), fBytes(NULL)
51{
52}
53
54// Create a collation key from a bit array.
55CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
56    : UObject(), fBogus(FALSE), fCount(count), fCapacity(count),
57      fHashCode(kInvalidHashCode)
58{
59    fBytes = (uint8_t *)uprv_malloc(count);
60
61    if (fBytes == NULL)
62    {
63        setToBogus();
64        return;
65    }
66
67    uprv_memcpy(fBytes, newValues, fCount);
68}
69
70CollationKey::CollationKey(const CollationKey& other)
71: UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity),
72  fHashCode(other.fHashCode), fBytes(NULL)
73{
74    if (other.fBogus)
75    {
76        setToBogus();
77        return;
78    }
79
80    fBytes = (uint8_t *)uprv_malloc(fCapacity);
81
82    if (fBytes == NULL)
83    {
84        setToBogus();
85        return;
86    }
87
88    uprv_memcpy(fBytes, other.fBytes, other.fCount);
89    if(fCapacity>fCount) {
90        uprv_memset(fBytes+fCount, 0, fCapacity-fCount);
91    }
92}
93
94CollationKey::~CollationKey()
95{
96        uprv_free(fBytes);
97}
98
99void CollationKey::adopt(uint8_t *values, int32_t count) {
100    if(fBytes != NULL) {
101        uprv_free(fBytes);
102    }
103    fBogus = FALSE;
104    fBytes = values;
105    fCount = count;
106    fCapacity = count;
107    fHashCode = kInvalidHashCode;
108}
109
110// set the key to an empty state
111CollationKey&
112CollationKey::reset()
113{
114    fCount = 0;
115    fBogus = FALSE;
116    fHashCode = kEmptyHashCode;
117
118    return *this;
119}
120
121// set the key to a "bogus" or invalid state
122CollationKey&
123CollationKey::setToBogus()
124{
125    uprv_free(fBytes);
126    fBytes = NULL;
127
128    fCapacity = 0;
129    fCount = 0;
130    fHashCode = kInvalidHashCode;
131
132    return *this;
133}
134
135UBool
136CollationKey::operator==(const CollationKey& source) const
137{
138    return (this->fCount == source.fCount &&
139            (this->fBytes == source.fBytes ||
140             uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0));
141}
142
143const CollationKey&
144CollationKey::operator=(const CollationKey& other)
145{
146    if (this != &other)
147    {
148        if (other.isBogus())
149        {
150            return setToBogus();
151        }
152
153        if (other.fBytes != NULL)
154        {
155            ensureCapacity(other.fCount);
156
157            if (isBogus())
158            {
159                return *this;
160            }
161
162            fHashCode = other.fHashCode;
163            uprv_memcpy(fBytes, other.fBytes, fCount);
164        }
165        else
166        {
167            fCount = 0;
168            fBogus = FALSE;
169            fHashCode = kEmptyHashCode;
170        }
171    }
172
173    return *this;
174}
175
176// Bitwise comparison for the collation keys.
177// NOTE: this is somewhat messy 'cause we can't count
178// on memcmp returning the exact values which match
179// Collator::EComparisonResult
180Collator::EComparisonResult
181CollationKey::compareTo(const CollationKey& target) const
182{
183    uint8_t *src = this->fBytes;
184    uint8_t *tgt = target.fBytes;
185
186    // are we comparing the same string
187    if (src == tgt)
188        return  Collator::EQUAL;
189
190        /*
191        int count = (this->fCount < target.fCount) ? this->fCount : target.fCount;
192        if (count == 0)
193        {
194        // If count is 0, at least one of the keys is empty.
195        // An empty key is always LESS than a non-empty one
196        // and EQUAL to another empty
197        if (this->fCount < target.fCount)
198        {
199        return Collator::LESS;
200        }
201
202          if (this->fCount > target.fCount)
203          {
204          return Collator::GREATER;
205          }
206          return Collator::EQUAL;
207          }
208    */
209
210    int                         minLength;
211    Collator::EComparisonResult result;
212
213    // are we comparing different lengths?
214    if (this->fCount != target.fCount) {
215        if (this->fCount < target.fCount) {
216            minLength = this->fCount;
217            result    =  Collator::LESS;
218        }
219        else {
220            minLength = target.fCount;
221            result    =  Collator::GREATER;
222        }
223    }
224    else {
225        minLength = target.fCount;
226        result    =  Collator::EQUAL;
227    }
228
229    if (minLength > 0) {
230        int diff = uprv_memcmp(src, tgt, minLength);
231        if (diff > 0) {
232            return Collator::GREATER;
233        }
234        else
235            if (diff < 0) {
236                return Collator::LESS;
237            }
238    }
239
240    return result;
241    /*
242    if (result < 0)
243    {
244    return Collator::LESS;
245    }
246
247      if (result > 0)
248      {
249      return Collator::GREATER;
250      }
251      return Collator::EQUAL;
252    */
253}
254
255// Bitwise comparison for the collation keys.
256UCollationResult
257CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
258{
259  if(U_SUCCESS(status)) {
260    uint8_t *src = this->fBytes;
261    uint8_t *tgt = target.fBytes;
262
263    // are we comparing the same string
264    if (src == tgt)
265        return  UCOL_EQUAL;
266
267    int                         minLength;
268    UCollationResult result;
269
270    // are we comparing different lengths?
271    if (this->fCount != target.fCount) {
272        if (this->fCount < target.fCount) {
273            minLength = this->fCount;
274            result    =  UCOL_LESS;
275        }
276        else {
277            minLength = target.fCount;
278            result    =  UCOL_GREATER;
279        }
280    }
281    else {
282        minLength = target.fCount;
283        result    =  UCOL_EQUAL;
284    }
285
286    if (minLength > 0) {
287        int diff = uprv_memcmp(src, tgt, minLength);
288        if (diff > 0) {
289            return UCOL_GREATER;
290        }
291        else
292            if (diff < 0) {
293                return UCOL_LESS;
294            }
295    }
296
297    return result;
298  } else {
299    return UCOL_EQUAL;
300  }
301}
302
303CollationKey&
304CollationKey::ensureCapacity(int32_t newSize)
305{
306    if (fCapacity < newSize)
307    {
308        uprv_free(fBytes);
309
310        fBytes = (uint8_t *)uprv_malloc(newSize);
311
312        if (fBytes == NULL)
313        {
314            return setToBogus();
315        }
316
317        uprv_memset(fBytes, 0, fCapacity);
318        fCapacity = newSize;
319    }
320
321    fBogus = FALSE;
322    fCount = newSize;
323    fHashCode = kInvalidHashCode;
324
325    return *this;
326}
327
328#ifdef U_USE_COLLATION_KEY_DEPRECATES
329// Create a copy of the byte array.
330uint8_t*
331CollationKey::toByteArray(int32_t& count) const
332{
333    uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
334
335    if (result == NULL)
336    {
337        count = 0;
338    }
339    else
340    {
341        count = fCount;
342        uprv_memcpy(result, fBytes, fCount);
343    }
344
345    return result;
346}
347#endif
348
349int32_t
350CollationKey::hashCode() const
351{
352    // (Cribbed from UnicodeString)
353    // We cache the hashCode; when it becomes invalid, due to any change to the
354    // string, we note this by setting it to kInvalidHashCode. [LIU]
355
356    // Note: This method is semantically const, but physically non-const.
357
358    if (fHashCode == kInvalidHashCode)
359    {
360        UHashTok key;
361        key.pointer = fBytes;
362        ((CollationKey *)this)->fHashCode = uhash_hashChars(key);
363#if 0
364        // We compute the hash by iterating sparsely over 64 (at most) characters
365        // spaced evenly through the string.  For each character, we multiply the
366        // previous hash value by a prime number and add the new character in,
367        // in the manner of a additive linear congruential random number generator,
368        // thus producing a pseudorandom deterministic value which should be well
369        // distributed over the output range. [LIU]
370        const uint8_t   *p = fBytes, *limit = fBytes + fCount;
371        int32_t         inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1);
372        int32_t         hash = 0;
373
374        while (p < limit)
375        {
376            hash = ( hash * 37 ) + ((p[0] << 8) + p[1]);
377            p += inc;
378        }
379
380        // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode
381        if (hash == kInvalidHashCode)
382        {
383            hash = kEmptyHashCode;
384        }
385
386        ((CollationKey *)this)->fHashCode = hash; // cast away const
387#endif
388    }
389
390    return fHashCode;
391}
392
393U_NAMESPACE_END
394
395U_CAPI int32_t U_EXPORT2
396ucol_keyHashCode(const uint8_t *key,
397                       int32_t  length)
398{
399    U_NAMESPACE_QUALIFIER CollationKey newKey(key, length);
400    return newKey.hashCode();
401}
402
403#endif /* #if !UCONFIG_NO_COLLATION */
404