1/* 2******************************************************************************* 3* Copyright (C) 1996-2006, International Business Machines Corporation and * 4* others. All Rights Reserved. * 5******************************************************************************* 6*/ 7//=============================================================================== 8// 9// File sortkey.cpp 10// 11// 12// 13// Created by: Helena Shih 14// 15// Modification History: 16// 17// Date Name Description 18// 19// 6/20/97 helena Java class name change. 20// 6/23/97 helena Added comments to make code more readable. 21// 6/26/98 erm Canged to use byte arrays instead of UnicodeString 22// 7/31/98 erm hashCode: minimum inc should be 2 not 1, 23// Cleaned up operator= 24// 07/12/99 helena HPUX 11 CC port. 25// 03/06/01 synwee Modified compareTo, to handle the result of 26// 2 string similar in contents, but one is longer 27// than the other 28//=============================================================================== 29 30#include "unicode/utypes.h" 31 32#if !UCONFIG_NO_COLLATION 33 34#include "unicode/sortkey.h" 35#include "cmemory.h" 36#include "uhash.h" 37 38U_NAMESPACE_BEGIN 39 40// A hash code of kInvalidHashCode indicates that the has code needs 41// to be computed. A hash code of kEmptyHashCode is used for empty keys 42// and for any key whose computed hash code is kInvalidHashCode. 43#define kInvalidHashCode ((int32_t)0) 44#define kEmptyHashCode ((int32_t)1) 45 46UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) 47 48CollationKey::CollationKey() 49 : UObject(), fBogus(FALSE), fCount(0), fCapacity(0), 50 fHashCode(kEmptyHashCode), fBytes(NULL) 51{ 52} 53 54// Create a collation key from a bit array. 55CollationKey::CollationKey(const uint8_t* newValues, int32_t count) 56 : UObject(), fBogus(FALSE), fCount(count), fCapacity(count), 57 fHashCode(kInvalidHashCode) 58{ 59 fBytes = (uint8_t *)uprv_malloc(count); 60 61 if (fBytes == NULL) 62 { 63 setToBogus(); 64 return; 65 } 66 67 uprv_memcpy(fBytes, newValues, fCount); 68} 69 70CollationKey::CollationKey(const CollationKey& other) 71: UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity), 72 fHashCode(other.fHashCode), fBytes(NULL) 73{ 74 if (other.fBogus) 75 { 76 setToBogus(); 77 return; 78 } 79 80 fBytes = (uint8_t *)uprv_malloc(fCapacity); 81 82 if (fBytes == NULL) 83 { 84 setToBogus(); 85 return; 86 } 87 88 uprv_memcpy(fBytes, other.fBytes, other.fCount); 89 if(fCapacity>fCount) { 90 uprv_memset(fBytes+fCount, 0, fCapacity-fCount); 91 } 92} 93 94CollationKey::~CollationKey() 95{ 96 uprv_free(fBytes); 97} 98 99void CollationKey::adopt(uint8_t *values, int32_t count) { 100 if(fBytes != NULL) { 101 uprv_free(fBytes); 102 } 103 fBogus = FALSE; 104 fBytes = values; 105 fCount = count; 106 fCapacity = count; 107 fHashCode = kInvalidHashCode; 108} 109 110// set the key to an empty state 111CollationKey& 112CollationKey::reset() 113{ 114 fCount = 0; 115 fBogus = FALSE; 116 fHashCode = kEmptyHashCode; 117 118 return *this; 119} 120 121// set the key to a "bogus" or invalid state 122CollationKey& 123CollationKey::setToBogus() 124{ 125 uprv_free(fBytes); 126 fBytes = NULL; 127 128 fCapacity = 0; 129 fCount = 0; 130 fHashCode = kInvalidHashCode; 131 132 return *this; 133} 134 135UBool 136CollationKey::operator==(const CollationKey& source) const 137{ 138 return (this->fCount == source.fCount && 139 (this->fBytes == source.fBytes || 140 uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0)); 141} 142 143const CollationKey& 144CollationKey::operator=(const CollationKey& other) 145{ 146 if (this != &other) 147 { 148 if (other.isBogus()) 149 { 150 return setToBogus(); 151 } 152 153 if (other.fBytes != NULL) 154 { 155 ensureCapacity(other.fCount); 156 157 if (isBogus()) 158 { 159 return *this; 160 } 161 162 fHashCode = other.fHashCode; 163 uprv_memcpy(fBytes, other.fBytes, fCount); 164 } 165 else 166 { 167 fCount = 0; 168 fBogus = FALSE; 169 fHashCode = kEmptyHashCode; 170 } 171 } 172 173 return *this; 174} 175 176// Bitwise comparison for the collation keys. 177// NOTE: this is somewhat messy 'cause we can't count 178// on memcmp returning the exact values which match 179// Collator::EComparisonResult 180Collator::EComparisonResult 181CollationKey::compareTo(const CollationKey& target) const 182{ 183 uint8_t *src = this->fBytes; 184 uint8_t *tgt = target.fBytes; 185 186 // are we comparing the same string 187 if (src == tgt) 188 return Collator::EQUAL; 189 190 /* 191 int count = (this->fCount < target.fCount) ? this->fCount : target.fCount; 192 if (count == 0) 193 { 194 // If count is 0, at least one of the keys is empty. 195 // An empty key is always LESS than a non-empty one 196 // and EQUAL to another empty 197 if (this->fCount < target.fCount) 198 { 199 return Collator::LESS; 200 } 201 202 if (this->fCount > target.fCount) 203 { 204 return Collator::GREATER; 205 } 206 return Collator::EQUAL; 207 } 208 */ 209 210 int minLength; 211 Collator::EComparisonResult result; 212 213 // are we comparing different lengths? 214 if (this->fCount != target.fCount) { 215 if (this->fCount < target.fCount) { 216 minLength = this->fCount; 217 result = Collator::LESS; 218 } 219 else { 220 minLength = target.fCount; 221 result = Collator::GREATER; 222 } 223 } 224 else { 225 minLength = target.fCount; 226 result = Collator::EQUAL; 227 } 228 229 if (minLength > 0) { 230 int diff = uprv_memcmp(src, tgt, minLength); 231 if (diff > 0) { 232 return Collator::GREATER; 233 } 234 else 235 if (diff < 0) { 236 return Collator::LESS; 237 } 238 } 239 240 return result; 241 /* 242 if (result < 0) 243 { 244 return Collator::LESS; 245 } 246 247 if (result > 0) 248 { 249 return Collator::GREATER; 250 } 251 return Collator::EQUAL; 252 */ 253} 254 255// Bitwise comparison for the collation keys. 256UCollationResult 257CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const 258{ 259 if(U_SUCCESS(status)) { 260 uint8_t *src = this->fBytes; 261 uint8_t *tgt = target.fBytes; 262 263 // are we comparing the same string 264 if (src == tgt) 265 return UCOL_EQUAL; 266 267 int minLength; 268 UCollationResult result; 269 270 // are we comparing different lengths? 271 if (this->fCount != target.fCount) { 272 if (this->fCount < target.fCount) { 273 minLength = this->fCount; 274 result = UCOL_LESS; 275 } 276 else { 277 minLength = target.fCount; 278 result = UCOL_GREATER; 279 } 280 } 281 else { 282 minLength = target.fCount; 283 result = UCOL_EQUAL; 284 } 285 286 if (minLength > 0) { 287 int diff = uprv_memcmp(src, tgt, minLength); 288 if (diff > 0) { 289 return UCOL_GREATER; 290 } 291 else 292 if (diff < 0) { 293 return UCOL_LESS; 294 } 295 } 296 297 return result; 298 } else { 299 return UCOL_EQUAL; 300 } 301} 302 303CollationKey& 304CollationKey::ensureCapacity(int32_t newSize) 305{ 306 if (fCapacity < newSize) 307 { 308 uprv_free(fBytes); 309 310 fBytes = (uint8_t *)uprv_malloc(newSize); 311 312 if (fBytes == NULL) 313 { 314 return setToBogus(); 315 } 316 317 uprv_memset(fBytes, 0, fCapacity); 318 fCapacity = newSize; 319 } 320 321 fBogus = FALSE; 322 fCount = newSize; 323 fHashCode = kInvalidHashCode; 324 325 return *this; 326} 327 328#ifdef U_USE_COLLATION_KEY_DEPRECATES 329// Create a copy of the byte array. 330uint8_t* 331CollationKey::toByteArray(int32_t& count) const 332{ 333 uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); 334 335 if (result == NULL) 336 { 337 count = 0; 338 } 339 else 340 { 341 count = fCount; 342 uprv_memcpy(result, fBytes, fCount); 343 } 344 345 return result; 346} 347#endif 348 349int32_t 350CollationKey::hashCode() const 351{ 352 // (Cribbed from UnicodeString) 353 // We cache the hashCode; when it becomes invalid, due to any change to the 354 // string, we note this by setting it to kInvalidHashCode. [LIU] 355 356 // Note: This method is semantically const, but physically non-const. 357 358 if (fHashCode == kInvalidHashCode) 359 { 360 UHashTok key; 361 key.pointer = fBytes; 362 ((CollationKey *)this)->fHashCode = uhash_hashChars(key); 363#if 0 364 // We compute the hash by iterating sparsely over 64 (at most) characters 365 // spaced evenly through the string. For each character, we multiply the 366 // previous hash value by a prime number and add the new character in, 367 // in the manner of a additive linear congruential random number generator, 368 // thus producing a pseudorandom deterministic value which should be well 369 // distributed over the output range. [LIU] 370 const uint8_t *p = fBytes, *limit = fBytes + fCount; 371 int32_t inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1); 372 int32_t hash = 0; 373 374 while (p < limit) 375 { 376 hash = ( hash * 37 ) + ((p[0] << 8) + p[1]); 377 p += inc; 378 } 379 380 // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode 381 if (hash == kInvalidHashCode) 382 { 383 hash = kEmptyHashCode; 384 } 385 386 ((CollationKey *)this)->fHashCode = hash; // cast away const 387#endif 388 } 389 390 return fHashCode; 391} 392 393U_NAMESPACE_END 394 395U_CAPI int32_t U_EXPORT2 396ucol_keyHashCode(const uint8_t *key, 397 int32_t length) 398{ 399 U_NAMESPACE_QUALIFIER CollationKey newKey(key, length); 400 return newKey.hashCode(); 401} 402 403#endif /* #if !UCONFIG_NO_COLLATION */ 404