sortkey.cpp revision ac04d0bbe12b3ef54518635711412f178cb4d16
1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (C) 1996-2006, International Business Machines Corporation and    *
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* others. All Rights Reserved.                                                *
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//===============================================================================
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// File sortkey.cpp
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Created by: Helena Shih
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Modification History:
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  Date         Name          Description
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  6/20/97      helena        Java class name change.
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  6/23/97      helena        Added comments to make code more readable.
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  6/26/98      erm           Canged to use byte arrays instead of UnicodeString
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//  7/31/98      erm           hashCode: minimum inc should be 2 not 1,
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                             Cleaned up operator=
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 07/12/99      helena        HPUX 11 CC port.
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 03/06/01      synwee        Modified compareTo, to handle the result of
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                             2 string similar in contents, but one is longer
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//                             than the other
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//===============================================================================
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/sortkey.h"
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h"
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uhash.h"
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// A hash code of kInvalidHashCode indicates that the has code needs
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// to be computed. A hash code of kEmptyHashCode is used for empty keys
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// and for any key whose computed hash code is kInvalidHashCode.
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define kInvalidHashCode ((int32_t)0)
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define kEmptyHashCode ((int32_t)1)
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::CollationKey()
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    : UObject(), fBogus(FALSE), fCount(0), fCapacity(0),
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      fHashCode(kEmptyHashCode), fBytes(NULL)
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Create a collation key from a bit array.
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::CollationKey(const uint8_t* newValues, int32_t count)
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    : UObject(), fBogus(FALSE), fCount(count), fCapacity(count),
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      fHashCode(kInvalidHashCode)
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fBytes = (uint8_t *)uprv_malloc(count);
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (fBytes == NULL)
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        setToBogus();
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_memcpy(fBytes, newValues, fCount);
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::CollationKey(const CollationKey& other)
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru: UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity),
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  fHashCode(other.fHashCode), fBytes(NULL)
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (other.fBogus)
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        setToBogus();
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fBytes = (uint8_t *)uprv_malloc(fCapacity);
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (fBytes == NULL)
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        setToBogus();
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_memcpy(fBytes, other.fBytes, other.fCount);
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(fCapacity>fCount) {
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_memset(fBytes+fCount, 0, fCapacity-fCount);
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::~CollationKey()
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_free(fBytes);
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CollationKey::adopt(uint8_t *values, int32_t count) {
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(fBytes != NULL) {
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_free(fBytes);
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fBogus = FALSE;
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fBytes = values;
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fCount = count;
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fCapacity = count;
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fHashCode = kInvalidHashCode;
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// set the key to an empty state
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey&
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::reset()
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fCount = 0;
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fBogus = FALSE;
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fHashCode = kEmptyHashCode;
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return *this;
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// set the key to a "bogus" or invalid state
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey&
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::setToBogus()
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_free(fBytes);
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fBytes = NULL;
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fCapacity = 0;
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fCount = 0;
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fHashCode = kInvalidHashCode;
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return *this;
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::operator==(const CollationKey& source) const
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (this->fCount == source.fCount &&
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            (this->fBytes == source.fBytes ||
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0));
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst CollationKey&
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::operator=(const CollationKey& other)
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (this != &other)
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (other.isBogus())
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return setToBogus();
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (other.fBytes != NULL)
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ensureCapacity(other.fCount);
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (isBogus())
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            {
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return *this;
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fHashCode = other.fHashCode;
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            uprv_memcpy(fBytes, other.fBytes, fCount);
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fCount = 0;
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fBogus = FALSE;
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fHashCode = kEmptyHashCode;
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return *this;
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Bitwise comparison for the collation keys.
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// NOTE: this is somewhat messy 'cause we can't count
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// on memcmp returning the exact values which match
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Collator::EComparisonResult
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollator::EComparisonResult
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::compareTo(const CollationKey& target) const
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *src = this->fBytes;
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *tgt = target.fBytes;
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // are we comparing the same string
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (src == tgt)
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return  Collator::EQUAL;
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /*
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int count = (this->fCount < target.fCount) ? this->fCount : target.fCount;
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (count == 0)
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // If count is 0, at least one of the keys is empty.
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // An empty key is always LESS than a non-empty one
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // and EQUAL to another empty
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (this->fCount < target.fCount)
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return Collator::LESS;
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          if (this->fCount > target.fCount)
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          {
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          return Collator::GREATER;
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          }
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          return Collator::EQUAL;
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          }
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    */
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int                         minLength;
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Collator::EComparisonResult result;
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // are we comparing different lengths?
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (this->fCount != target.fCount) {
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (this->fCount < target.fCount) {
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            minLength = this->fCount;
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result    =  Collator::LESS;
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else {
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            minLength = target.fCount;
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result    =  Collator::GREATER;
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    else {
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        minLength = target.fCount;
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        result    =  Collator::EQUAL;
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (minLength > 0) {
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int diff = uprv_memcmp(src, tgt, minLength);
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (diff > 0) {
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return Collator::GREATER;
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (diff < 0) {
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return Collator::LESS;
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return result;
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (result < 0)
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return Collator::LESS;
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      if (result > 0)
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      {
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      return Collator::GREATER;
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      }
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru      return Collator::EQUAL;
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    */
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Bitwise comparison for the collation keys.
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUCollationResult
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  if(U_SUCCESS(status)) {
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *src = this->fBytes;
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *tgt = target.fBytes;
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // are we comparing the same string
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (src == tgt)
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return  UCOL_EQUAL;
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int                         minLength;
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UCollationResult result;
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // are we comparing different lengths?
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (this->fCount != target.fCount) {
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (this->fCount < target.fCount) {
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            minLength = this->fCount;
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result    =  UCOL_LESS;
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else {
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            minLength = target.fCount;
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result    =  UCOL_GREATER;
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    else {
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        minLength = target.fCount;
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        result    =  UCOL_EQUAL;
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (minLength > 0) {
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int diff = uprv_memcmp(src, tgt, minLength);
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (diff > 0) {
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return UCOL_GREATER;
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        else
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (diff < 0) {
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return UCOL_LESS;
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return result;
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  } else {
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return UCOL_EQUAL;
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru  }
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey&
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::ensureCapacity(int32_t newSize)
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (fCapacity < newSize)
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_free(fBytes);
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fBytes = (uint8_t *)uprv_malloc(newSize);
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (fBytes == NULL)
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return setToBogus();
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_memset(fBytes, 0, fCapacity);
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fCapacity = newSize;
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fBogus = FALSE;
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fCount = newSize;
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fHashCode = kInvalidHashCode;
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return *this;
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_USE_COLLATION_KEY_DEPRECATES
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Create a copy of the byte array.
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuint8_t*
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::toByteArray(int32_t& count) const
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount );
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (result == NULL)
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        count = 0;
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    else
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        count = fCount;
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_memcpy(result, fBytes, fCount);
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return result;
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::hashCode() const
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // (Cribbed from UnicodeString)
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // We cache the hashCode; when it becomes invalid, due to any change to the
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // string, we note this by setting it to kInvalidHashCode. [LIU]
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Note: This method is semantically const, but physically non-const.
357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (fHashCode == kInvalidHashCode)
359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UHashTok key;
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        key.pointer = fBytes;
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ((CollationKey *)this)->fHashCode = uhash_hashChars(key);
363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // We compute the hash by iterating sparsely over 64 (at most) characters
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // spaced evenly through the string.  For each character, we multiply the
366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // previous hash value by a prime number and add the new character in,
367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // in the manner of a additive linear congruential random number generator,
368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // thus producing a pseudorandom deterministic value which should be well
369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // distributed over the output range. [LIU]
370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const uint8_t   *p = fBytes, *limit = fBytes + fCount;
371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t         inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1);
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t         hash = 0;
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while (p < limit)
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            hash = ( hash * 37 ) + ((p[0] << 8) + p[1]);
377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            p += inc;
378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode
381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (hash == kInvalidHashCode)
382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            hash = kEmptyHashCode;
384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ((CollationKey *)this)->fHashCode = hash; // cast away const
387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return fHashCode;
391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucol_keyHashCode(const uint8_t *key,
397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                       int32_t  length)
398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_NAMESPACE_QUALIFIER CollationKey newKey(key, length);
400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return newKey.hashCode();
401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_COLLATION */
404