1/* 2******************************************************************************* 3* Copyright (C) 1996-2012, International Business Machines Corporation and 4* others. All Rights Reserved. 5******************************************************************************* 6*/ 7//=============================================================================== 8// 9// File sortkey.cpp 10// 11// 12// 13// Created by: Helena Shih 14// 15// Modification History: 16// 17// Date Name Description 18// 19// 6/20/97 helena Java class name change. 20// 6/23/97 helena Added comments to make code more readable. 21// 6/26/98 erm Canged to use byte arrays instead of UnicodeString 22// 7/31/98 erm hashCode: minimum inc should be 2 not 1, 23// Cleaned up operator= 24// 07/12/99 helena HPUX 11 CC port. 25// 03/06/01 synwee Modified compareTo, to handle the result of 26// 2 string similar in contents, but one is longer 27// than the other 28//=============================================================================== 29 30#include "unicode/utypes.h" 31 32#if !UCONFIG_NO_COLLATION 33 34#include "unicode/sortkey.h" 35#include "cmemory.h" 36#include "uelement.h" 37#include "ustr_imp.h" 38 39U_NAMESPACE_BEGIN 40 41// A hash code of kInvalidHashCode indicates that the hash code needs 42// to be computed. A hash code of kEmptyHashCode is used for empty keys 43// and for any key whose computed hash code is kInvalidHashCode. 44static const int32_t kInvalidHashCode = 0; 45static const int32_t kEmptyHashCode = 1; 46// The "bogus hash code" replaces a separate fBogus flag. 47static const int32_t kBogusHashCode = 2; 48 49UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) 50 51CollationKey::CollationKey() 52 : UObject(), fFlagAndLength(0), 53 fHashCode(kEmptyHashCode) 54{ 55} 56 57// Create a collation key from a bit array. 58CollationKey::CollationKey(const uint8_t* newValues, int32_t count) 59 : UObject(), fFlagAndLength(count), 60 fHashCode(kInvalidHashCode) 61{ 62 if (count < 0 || (newValues == NULL && count != 0) || 63 (count > getCapacity() && reallocate(count, 0) == NULL)) { 64 setToBogus(); 65 return; 66 } 67 68 if (count > 0) { 69 uprv_memcpy(getBytes(), newValues, count); 70 } 71} 72 73CollationKey::CollationKey(const CollationKey& other) 74 : UObject(other), fFlagAndLength(other.getLength()), 75 fHashCode(other.fHashCode) 76{ 77 if (other.isBogus()) 78 { 79 setToBogus(); 80 return; 81 } 82 83 int32_t length = fFlagAndLength; 84 if (length > getCapacity() && reallocate(length, 0) == NULL) { 85 setToBogus(); 86 return; 87 } 88 89 if (length > 0) { 90 uprv_memcpy(getBytes(), other.getBytes(), length); 91 } 92} 93 94CollationKey::~CollationKey() 95{ 96 if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } 97} 98 99uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) { 100 uint8_t *newBytes = static_cast<uint8_t *>(uprv_malloc(newCapacity)); 101 if(newBytes == NULL) { return NULL; } 102 if(length > 0) { 103 uprv_memcpy(newBytes, getBytes(), length); 104 } 105 if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); } 106 fUnion.fFields.fBytes = newBytes; 107 fUnion.fFields.fCapacity = newCapacity; 108 fFlagAndLength |= 0x80000000; 109 return newBytes; 110} 111 112void CollationKey::setLength(int32_t newLength) { 113 // U_ASSERT(newLength >= 0 && newLength <= getCapacity()); 114 fFlagAndLength = (fFlagAndLength & 0x80000000) | newLength; 115 fHashCode = kInvalidHashCode; 116} 117 118// set the key to an empty state 119CollationKey& 120CollationKey::reset() 121{ 122 fFlagAndLength &= 0x80000000; 123 fHashCode = kEmptyHashCode; 124 125 return *this; 126} 127 128// set the key to a "bogus" or invalid state 129CollationKey& 130CollationKey::setToBogus() 131{ 132 fFlagAndLength &= 0x80000000; 133 fHashCode = kBogusHashCode; 134 135 return *this; 136} 137 138UBool 139CollationKey::operator==(const CollationKey& source) const 140{ 141 return getLength() == source.getLength() && 142 (this == &source || 143 uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0); 144} 145 146const CollationKey& 147CollationKey::operator=(const CollationKey& other) 148{ 149 if (this != &other) 150 { 151 if (other.isBogus()) 152 { 153 return setToBogus(); 154 } 155 156 int32_t length = other.getLength(); 157 if (length > getCapacity() && reallocate(length, 0) == NULL) { 158 return setToBogus(); 159 } 160 if (length > 0) { 161 uprv_memcpy(getBytes(), other.getBytes(), length); 162 } 163 fFlagAndLength = (fFlagAndLength & 0x80000000) | length; 164 fHashCode = other.fHashCode; 165 } 166 167 return *this; 168} 169 170// Bitwise comparison for the collation keys. 171Collator::EComparisonResult 172CollationKey::compareTo(const CollationKey& target) const 173{ 174 UErrorCode errorCode = U_ZERO_ERROR; 175 return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode)); 176} 177 178// Bitwise comparison for the collation keys. 179UCollationResult 180CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const 181{ 182 if(U_SUCCESS(status)) { 183 const uint8_t *src = getBytes(); 184 const uint8_t *tgt = target.getBytes(); 185 186 // are we comparing the same string 187 if (src == tgt) 188 return UCOL_EQUAL; 189 190 UCollationResult result; 191 192 // are we comparing different lengths? 193 int32_t minLength = getLength(); 194 int32_t targetLength = target.getLength(); 195 if (minLength < targetLength) { 196 result = UCOL_LESS; 197 } else if (minLength == targetLength) { 198 result = UCOL_EQUAL; 199 } else { 200 minLength = targetLength; 201 result = UCOL_GREATER; 202 } 203 204 if (minLength > 0) { 205 int diff = uprv_memcmp(src, tgt, minLength); 206 if (diff > 0) { 207 return UCOL_GREATER; 208 } 209 else 210 if (diff < 0) { 211 return UCOL_LESS; 212 } 213 } 214 215 return result; 216 } else { 217 return UCOL_EQUAL; 218 } 219} 220 221#ifdef U_USE_COLLATION_KEY_DEPRECATES 222// Create a copy of the byte array. 223uint8_t* 224CollationKey::toByteArray(int32_t& count) const 225{ 226 uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); 227 228 if (result == NULL) 229 { 230 count = 0; 231 } 232 else 233 { 234 count = fCount; 235 if (count > 0) { 236 uprv_memcpy(result, fBytes, fCount); 237 } 238 } 239 240 return result; 241} 242#endif 243 244static int32_t 245computeHashCode(const uint8_t *key, int32_t length) { 246 const char *s = reinterpret_cast<const char *>(key); 247 int32_t hash; 248 if (s == NULL || length == 0) { 249 hash = kEmptyHashCode; 250 } else { 251 hash = ustr_hashCharsN(s, length); 252 if (hash == kInvalidHashCode || hash == kBogusHashCode) { 253 hash = kEmptyHashCode; 254 } 255 } 256 return hash; 257} 258 259int32_t 260CollationKey::hashCode() const 261{ 262 // (Cribbed from UnicodeString) 263 // We cache the hashCode; when it becomes invalid, due to any change to the 264 // string, we note this by setting it to kInvalidHashCode. [LIU] 265 266 // Note: This method is semantically const, but physically non-const. 267 268 if (fHashCode == kInvalidHashCode) 269 { 270 fHashCode = computeHashCode(getBytes(), getLength()); 271 } 272 273 return fHashCode; 274} 275 276U_NAMESPACE_END 277 278U_CAPI int32_t U_EXPORT2 279ucol_keyHashCode(const uint8_t *key, 280 int32_t length) 281{ 282 return icu::computeHashCode(key, length); 283} 284 285#endif /* #if !UCONFIG_NO_COLLATION */ 286