1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (C) 1996-2011, International Business Machines Corporation and * 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* others. All Rights Reserved. * 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//=============================================================================== 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// File sortkey.cpp 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Created by: Helena Shih 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Modification History: 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Date Name Description 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 6/20/97 helena Java class name change. 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 6/23/97 helena Added comments to make code more readable. 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 6/26/98 erm Canged to use byte arrays instead of UnicodeString 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 7/31/98 erm hashCode: minimum inc should be 2 not 1, 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Cleaned up operator= 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 07/12/99 helena HPUX 11 CC port. 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 03/06/01 synwee Modified compareTo, to handle the result of 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// 2 string similar in contents, but one is longer 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// than the other 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//=============================================================================== 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/sortkey.h" 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uhash.h" 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// A hash code of kInvalidHashCode indicates that the has code needs 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// to be computed. A hash code of kEmptyHashCode is used for empty keys 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// and for any key whose computed hash code is kInvalidHashCode. 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define kInvalidHashCode ((int32_t)0) 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define kEmptyHashCode ((int32_t)1) 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::CollationKey() 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru : UObject(), fBogus(FALSE), fCount(0), fCapacity(0), 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fHashCode(kEmptyHashCode), fBytes(NULL) 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Create a collation key from a bit array. 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::CollationKey(const uint8_t* newValues, int32_t count) 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru : UObject(), fBogus(FALSE), fCount(count), fCapacity(count), 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fHashCode(kInvalidHashCode) 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fBytes = (uint8_t *)uprv_malloc(count); 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fBytes == NULL) 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setToBogus(); 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(fBytes, newValues, fCount); 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::CollationKey(const CollationKey& other) 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru: UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity), 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fHashCode(other.fHashCode), fBytes(NULL) 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (other.fBogus) 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setToBogus(); 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fBytes = (uint8_t *)uprv_malloc(fCapacity); 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fBytes == NULL) 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setToBogus(); 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(fBytes, other.fBytes, other.fCount); 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(fCapacity>fCount) { 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(fBytes+fCount, 0, fCapacity-fCount); 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::~CollationKey() 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(fBytes); 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 99b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid CollationKey::adopt(uint8_t *values, int32_t capacity, int32_t count) { 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(fBytes != NULL) { 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(fBytes); 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fBytes = values; 104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho fCapacity = capacity; 105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho setLength(count); 106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid CollationKey::setLength(int32_t newLength) { 109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho fBogus = FALSE; 110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho fCount = newLength; 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fHashCode = kInvalidHashCode; 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// set the key to an empty state 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey& 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::reset() 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCount = 0; 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fBogus = FALSE; 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fHashCode = kEmptyHashCode; 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return *this; 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// set the key to a "bogus" or invalid state 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey& 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::setToBogus() 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(fBytes); 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fBytes = NULL; 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCapacity = 0; 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCount = 0; 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fHashCode = kInvalidHashCode; 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return *this; 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::operator==(const CollationKey& source) const 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (this->fCount == source.fCount && 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (this->fBytes == source.fBytes || 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0)); 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst CollationKey& 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::operator=(const CollationKey& other) 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (this != &other) 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (other.isBogus()) 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return setToBogus(); 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (other.fBytes != NULL) 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ensureCapacity(other.fCount); 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (isBogus()) 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return *this; 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fHashCode = other.fHashCode; 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(fBytes, other.fBytes, fCount); 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCount = 0; 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fBogus = FALSE; 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fHashCode = kEmptyHashCode; 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return *this; 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Bitwise comparison for the collation keys. 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// NOTE: this is somewhat messy 'cause we can't count 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// on memcmp returning the exact values which match 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Collator::EComparisonResult 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollator::EComparisonResult 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::compareTo(const CollationKey& target) const 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *src = this->fBytes; 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *tgt = target.fBytes; 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // are we comparing the same string 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (src == tgt) 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return Collator::EQUAL; 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int count = (this->fCount < target.fCount) ? this->fCount : target.fCount; 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (count == 0) 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If count is 0, at least one of the keys is empty. 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // An empty key is always LESS than a non-empty one 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // and EQUAL to another empty 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (this->fCount < target.fCount) 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return Collator::LESS; 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (this->fCount > target.fCount) 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return Collator::GREATER; 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return Collator::EQUAL; 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int minLength; 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Collator::EComparisonResult result; 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // are we comparing different lengths? 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (this->fCount != target.fCount) { 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (this->fCount < target.fCount) { 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru minLength = this->fCount; 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result = Collator::LESS; 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru minLength = target.fCount; 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result = Collator::GREATER; 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru minLength = target.fCount; 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result = Collator::EQUAL; 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (minLength > 0) { 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int diff = uprv_memcmp(src, tgt, minLength); 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (diff > 0) { 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return Collator::GREATER; 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (diff < 0) { 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return Collator::LESS; 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return result; 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (result < 0) 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return Collator::LESS; 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (result > 0) 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return Collator::GREATER; 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return Collator::EQUAL; 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Bitwise comparison for the collation keys. 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUCollationResult 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::compareTo(const CollationKey& target, UErrorCode &status) const 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(status)) { 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *src = this->fBytes; 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *tgt = target.fBytes; 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // are we comparing the same string 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (src == tgt) 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UCOL_EQUAL; 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int minLength; 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCollationResult result; 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // are we comparing different lengths? 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (this->fCount != target.fCount) { 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (this->fCount < target.fCount) { 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru minLength = this->fCount; 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result = UCOL_LESS; 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru minLength = target.fCount; 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result = UCOL_GREATER; 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru minLength = target.fCount; 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result = UCOL_EQUAL; 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (minLength > 0) { 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int diff = uprv_memcmp(src, tgt, minLength); 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (diff > 0) { 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UCOL_GREATER; 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (diff < 0) { 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UCOL_LESS; 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return result; 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UCOL_EQUAL; 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey& 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::ensureCapacity(int32_t newSize) 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fCapacity < newSize) 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(fBytes); 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fBytes = (uint8_t *)uprv_malloc(newSize); 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fBytes == NULL) 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return setToBogus(); 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(fBytes, 0, fCapacity); 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCapacity = newSize; 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fBogus = FALSE; 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fCount = newSize; 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fHashCode = kInvalidHashCode; 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return *this; 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef U_USE_COLLATION_KEY_DEPRECATES 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Create a copy of the byte array. 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruuint8_t* 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::toByteArray(int32_t& count) const 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (result == NULL) 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count = 0; 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count = fCount; 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(result, fBytes, fCount); 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return result; 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCollationKey::hashCode() const 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // (Cribbed from UnicodeString) 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // We cache the hashCode; when it becomes invalid, due to any change to the 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // string, we note this by setting it to kInvalidHashCode. [LIU] 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Note: This method is semantically const, but physically non-const. 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (fHashCode == kInvalidHashCode) 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UHashTok key; 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru key.pointer = fBytes; 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((CollationKey *)this)->fHashCode = uhash_hashChars(key); 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // We compute the hash by iterating sparsely over 64 (at most) characters 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // spaced evenly through the string. For each character, we multiply the 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // previous hash value by a prime number and add the new character in, 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // in the manner of a additive linear congruential random number generator, 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // thus producing a pseudorandom deterministic value which should be well 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // distributed over the output range. [LIU] 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *p = fBytes, *limit = fBytes + fCount; 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1); 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t hash = 0; 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (p < limit) 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru hash = ( hash * 37 ) + ((p[0] << 8) + p[1]); 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru p += inc; 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (hash == kInvalidHashCode) 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru hash = kEmptyHashCode; 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((CollationKey *)this)->fHashCode = hash; // cast away const 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return fHashCode; 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucol_keyHashCode(const uint8_t *key, 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length) 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_NAMESPACE_QUALIFIER CollationKey newKey(key, length); 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return newKey.hashCode(); 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_COLLATION */ 408