1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (C) 1996-2011, International Business Machines Corporation and    *
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* others. All Rights Reserved.                                                *
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//===============================================================================
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// File sortkey.cpp
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Created by: Helena Shih
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Modification History:
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  Date         Name          Description
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  6/20/97      helena        Java class name change.
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  6/23/97      helena        Added comments to make code more readable.
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  6/26/98      erm           Canged to use byte arrays instead of UnicodeString
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                             Cleaned up operator=
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 07/12/99      helena        HPUX 11 CC port.
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 03/06/01      synwee        Modified compareTo, to handle the result of
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                             2 string similar in contents, but one is longer
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                             than the other
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//===============================================================================
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/sortkey.h"
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h"
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uhash.h"
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// A hash code of kInvalidHashCode indicates that the has code needs
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// to be computed. A hash code of kEmptyHashCode is used for empty keys
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// and for any key whose computed hash code is kInvalidHashCode.
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define kInvalidHashCode ((int32_t)0)
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define kEmptyHashCode ((int32_t)1)
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::CollationKey()
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    : UObject(), fBogus(FALSE), fCount(0), fCapacity(0),
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      fHashCode(kEmptyHashCode), fBytes(NULL)
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Create a collation key from a bit array.
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::CollationKey(const uint8_t* newValues, int32_t count)
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    : UObject(), fBogus(FALSE), fCount(count), fCapacity(count),
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      fHashCode(kInvalidHashCode)
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fBytes = (uint8_t *)uprv_malloc(count);
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (fBytes == NULL)
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        setToBogus();
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_memcpy(fBytes, newValues, fCount);
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::CollationKey(const CollationKey& other)
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru: UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity),
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  fHashCode(other.fHashCode), fBytes(NULL)
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (other.fBogus)
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        setToBogus();
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fBytes = (uint8_t *)uprv_malloc(fCapacity);
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (fBytes == NULL)
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        setToBogus();
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_memcpy(fBytes, other.fBytes, other.fCount);
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(fCapacity>fCount) {
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_memset(fBytes+fCount, 0, fCapacity-fCount);
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::~CollationKey()
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_free(fBytes);
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
99b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid CollationKey::adopt(uint8_t *values, int32_t capacity, int32_t count) {
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(fBytes != NULL) {
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_free(fBytes);
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fBytes = values;
104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    fCapacity = capacity;
105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    setLength(count);
106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid CollationKey::setLength(int32_t newLength) {
109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    fBogus = FALSE;
110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    fCount = newLength;
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fHashCode = kInvalidHashCode;
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// set the key to an empty state
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey&
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::reset()
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fCount = 0;
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fBogus = FALSE;
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fHashCode = kEmptyHashCode;
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return *this;
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// set the key to a "bogus" or invalid state
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey&
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::setToBogus()
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_free(fBytes);
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fBytes = NULL;
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fCapacity = 0;
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fCount = 0;
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fHashCode = kInvalidHashCode;
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return *this;
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::operator==(const CollationKey& source) const
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (this->fCount == source.fCount &&
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            (this->fBytes == source.fBytes ||
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0));
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst CollationKey&
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::operator=(const CollationKey& other)
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (this != &other)
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (other.isBogus())
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return setToBogus();
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (other.fBytes != NULL)
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ensureCapacity(other.fCount);
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (isBogus())
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            {
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return *this;
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fHashCode = other.fHashCode;
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            uprv_memcpy(fBytes, other.fBytes, fCount);
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fCount = 0;
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fBogus = FALSE;
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fHashCode = kEmptyHashCode;
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return *this;
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Bitwise comparison for the collation keys.
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// NOTE: this is somewhat messy 'cause we can't count
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// on memcmp returning the exact values which match
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Collator::EComparisonResult
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollator::EComparisonResult
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::compareTo(const CollationKey& target) const
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *src = this->fBytes;
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *tgt = target.fBytes;
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // are we comparing the same string
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (src == tgt)
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return  Collator::EQUAL;
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /*
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int count = (this->fCount < target.fCount) ? this->fCount : target.fCount;
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (count == 0)
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // If count is 0, at least one of the keys is empty.
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // An empty key is always LESS than a non-empty one
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // and EQUAL to another empty
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (this->fCount < target.fCount)
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return Collator::LESS;
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          if (this->fCount > target.fCount)
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          {
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          return Collator::GREATER;
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          }
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          return Collator::EQUAL;
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          }
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    */
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int                         minLength;
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Collator::EComparisonResult result;
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // are we comparing different lengths?
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (this->fCount != target.fCount) {
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (this->fCount < target.fCount) {
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            minLength = this->fCount;
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result    =  Collator::LESS;
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else {
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            minLength = target.fCount;
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result    =  Collator::GREATER;
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    else {
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        minLength = target.fCount;
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        result    =  Collator::EQUAL;
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (minLength > 0) {
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int diff = uprv_memcmp(src, tgt, minLength);
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (diff > 0) {
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return Collator::GREATER;
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (diff < 0) {
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return Collator::LESS;
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return result;
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (result < 0)
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return Collator::LESS;
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      if (result > 0)
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      {
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      return Collator::GREATER;
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      }
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      return Collator::EQUAL;
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    */
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Bitwise comparison for the collation keys.
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUCollationResult
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  if(U_SUCCESS(status)) {
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *src = this->fBytes;
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *tgt = target.fBytes;
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // are we comparing the same string
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (src == tgt)
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return  UCOL_EQUAL;
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int                         minLength;
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCollationResult result;
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // are we comparing different lengths?
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (this->fCount != target.fCount) {
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (this->fCount < target.fCount) {
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            minLength = this->fCount;
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result    =  UCOL_LESS;
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else {
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            minLength = target.fCount;
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result    =  UCOL_GREATER;
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    else {
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        minLength = target.fCount;
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        result    =  UCOL_EQUAL;
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (minLength > 0) {
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int diff = uprv_memcmp(src, tgt, minLength);
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (diff > 0) {
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return UCOL_GREATER;
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (diff < 0) {
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return UCOL_LESS;
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return result;
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  } else {
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return UCOL_EQUAL;
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey&
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::ensureCapacity(int32_t newSize)
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (fCapacity < newSize)
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_free(fBytes);
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fBytes = (uint8_t *)uprv_malloc(newSize);
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (fBytes == NULL)
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return setToBogus();
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_memset(fBytes, 0, fCapacity);
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fCapacity = newSize;
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fBogus = FALSE;
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fCount = newSize;
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fHashCode = kInvalidHashCode;
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return *this;
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_USE_COLLATION_KEY_DEPRECATES
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Create a copy of the byte array.
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuint8_t*
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::toByteArray(int32_t& count) const
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (result == NULL)
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        count = 0;
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    else
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        count = fCount;
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_memcpy(result, fBytes, fCount);
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return result;
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::hashCode() const
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // (Cribbed from UnicodeString)
357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // We cache the hashCode; when it becomes invalid, due to any change to the
358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // string, we note this by setting it to kInvalidHashCode. [LIU]
359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Note: This method is semantically const, but physically non-const.
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (fHashCode == kInvalidHashCode)
363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UHashTok key;
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        key.pointer = fBytes;
366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ((CollationKey *)this)->fHashCode = uhash_hashChars(key);
367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0
368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // We compute the hash by iterating sparsely over 64 (at most) characters
369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // spaced evenly through the string.  For each character, we multiply the
370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // previous hash value by a prime number and add the new character in,
371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // in the manner of a additive linear congruential random number generator,
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // thus producing a pseudorandom deterministic value which should be well
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // distributed over the output range. [LIU]
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const uint8_t   *p = fBytes, *limit = fBytes + fCount;
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t         inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1);
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t         hash = 0;
377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while (p < limit)
379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            hash = ( hash * 37 ) + ((p[0] << 8) + p[1]);
381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            p += inc;
382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode
385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (hash == kInvalidHashCode)
386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            hash = kEmptyHashCode;
388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ((CollationKey *)this)->fHashCode = hash; // cast away const
391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return fHashCode;
395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucol_keyHashCode(const uint8_t *key,
401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                       int32_t  length)
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_NAMESPACE_QUALIFIER CollationKey newKey(key, length);
404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return newKey.hashCode();
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_COLLATION */
408