1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4******************************************************************************* 5* Copyright (C) 2013-2015, International Business Machines 6* Corporation and others. All Rights Reserved. 7******************************************************************************* 8* collationsettings.cpp 9* 10* created on: 2013feb07 11* created by: Markus W. Scherer 12*/ 13 14#include "unicode/utypes.h" 15 16#if !UCONFIG_NO_COLLATION 17 18#include "unicode/ucol.h" 19#include "cmemory.h" 20#include "collation.h" 21#include "collationdata.h" 22#include "collationsettings.h" 23#include "sharedobject.h" 24#include "uassert.h" 25#include "umutex.h" 26#include "uvectr32.h" 27 28U_NAMESPACE_BEGIN 29 30CollationSettings::CollationSettings(const CollationSettings &other) 31 : SharedObject(other), 32 options(other.options), variableTop(other.variableTop), 33 reorderTable(NULL), 34 minHighNoReorder(other.minHighNoReorder), 35 reorderRanges(NULL), reorderRangesLength(0), 36 reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0), 37 fastLatinOptions(other.fastLatinOptions) { 38 UErrorCode errorCode = U_ZERO_ERROR; 39 copyReorderingFrom(other, errorCode); 40 if(fastLatinOptions >= 0) { 41 uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries)); 42 } 43} 44 45CollationSettings::~CollationSettings() { 46 if(reorderCodesCapacity != 0) { 47 uprv_free(const_cast<int32_t *>(reorderCodes)); 48 } 49} 50 51UBool 52CollationSettings::operator==(const CollationSettings &other) const { 53 if(options != other.options) { return FALSE; } 54 if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; } 55 if(reorderCodesLength != other.reorderCodesLength) { return FALSE; } 56 for(int32_t i = 0; i < reorderCodesLength; ++i) { 57 if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; } 58 } 59 return TRUE; 60} 61 62int32_t 63CollationSettings::hashCode() const { 64 int32_t h = options << 8; 65 if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; } 66 h ^= reorderCodesLength; 67 for(int32_t i = 0; i < reorderCodesLength; ++i) { 68 h ^= (reorderCodes[i] << i); 69 } 70 return h; 71} 72 73void 74CollationSettings::resetReordering() { 75 // When we turn off reordering, we want to set a NULL permutation 76 // rather than a no-op permutation. 77 // Keep the memory via reorderCodes and its capacity. 78 reorderTable = NULL; 79 minHighNoReorder = 0; 80 reorderRangesLength = 0; 81 reorderCodesLength = 0; 82} 83 84void 85CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length, 86 const uint32_t *ranges, int32_t rangesLength, 87 const uint8_t *table, UErrorCode &errorCode) { 88 if(U_FAILURE(errorCode)) { return; } 89 if(table != NULL && 90 (rangesLength == 0 ? 91 !reorderTableHasSplitBytes(table) : 92 rangesLength >= 2 && 93 // The first offset must be 0. The last offset must not be 0. 94 (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) { 95 // We need to release the memory before setting the alias pointer. 96 if(reorderCodesCapacity != 0) { 97 uprv_free(const_cast<int32_t *>(reorderCodes)); 98 reorderCodesCapacity = 0; 99 } 100 reorderTable = table; 101 reorderCodes = codes; 102 reorderCodesLength = length; 103 // Drop ranges before the first split byte. They are reordered by the table. 104 // This then speeds up reordering of the remaining ranges. 105 int32_t firstSplitByteRangeIndex = 0; 106 while(firstSplitByteRangeIndex < rangesLength && 107 (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) { 108 // The second byte of the primary limit is 0. 109 ++firstSplitByteRangeIndex; 110 } 111 if(firstSplitByteRangeIndex == rangesLength) { 112 U_ASSERT(!reorderTableHasSplitBytes(table)); 113 minHighNoReorder = 0; 114 reorderRanges = NULL; 115 reorderRangesLength = 0; 116 } else { 117 U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0); 118 minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; 119 reorderRanges = ranges + firstSplitByteRangeIndex; 120 reorderRangesLength = rangesLength - firstSplitByteRangeIndex; 121 } 122 return; 123 } 124 // Regenerate missing data. 125 setReordering(data, codes, length, errorCode); 126} 127 128void 129CollationSettings::setReordering(const CollationData &data, 130 const int32_t *codes, int32_t codesLength, 131 UErrorCode &errorCode) { 132 if(U_FAILURE(errorCode)) { return; } 133 if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) { 134 resetReordering(); 135 return; 136 } 137 UVector32 rangesList(errorCode); 138 data.makeReorderRanges(codes, codesLength, rangesList, errorCode); 139 if(U_FAILURE(errorCode)) { return; } 140 int32_t rangesLength = rangesList.size(); 141 if(rangesLength == 0) { 142 resetReordering(); 143 return; 144 } 145 const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer()); 146 // ranges[] contains at least two (limit, offset) pairs. 147 // The first offset must be 0. The last offset must not be 0. 148 // Separators (at the low end) and trailing weights (at the high end) 149 // are never reordered. 150 U_ASSERT(rangesLength >= 2); 151 U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0); 152 minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; 153 154 // Write the lead byte permutation table. 155 // Set a 0 for each lead byte that has a range boundary in the middle. 156 uint8_t table[256]; 157 int32_t b = 0; 158 int32_t firstSplitByteRangeIndex = -1; 159 for(int32_t i = 0; i < rangesLength; ++i) { 160 uint32_t pair = ranges[i]; 161 int32_t limit1 = (int32_t)(pair >> 24); 162 while(b < limit1) { 163 table[b] = (uint8_t)(b + pair); 164 ++b; 165 } 166 // Check the second byte of the limit. 167 if((pair & 0xff0000) != 0) { 168 table[limit1] = 0; 169 b = limit1 + 1; 170 if(firstSplitByteRangeIndex < 0) { 171 firstSplitByteRangeIndex = i; 172 } 173 } 174 } 175 while(b <= 0xff) { 176 table[b] = (uint8_t)b; 177 ++b; 178 } 179 if(firstSplitByteRangeIndex < 0) { 180 // The lead byte permutation table alone suffices for reordering. 181 rangesLength = 0; 182 } else { 183 // Remove the ranges below the first split byte. 184 ranges += firstSplitByteRangeIndex; 185 rangesLength -= firstSplitByteRangeIndex; 186 } 187 setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode); 188} 189 190void 191CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength, 192 const uint32_t *ranges, int32_t rangesLength, 193 const uint8_t *table, UErrorCode &errorCode) { 194 if(U_FAILURE(errorCode)) { return; } 195 int32_t *ownedCodes; 196 int32_t totalLength = codesLength + rangesLength; 197 U_ASSERT(totalLength > 0); 198 if(totalLength <= reorderCodesCapacity) { 199 ownedCodes = const_cast<int32_t *>(reorderCodes); 200 } else { 201 // Allocate one memory block for the codes, the ranges, and the 16-aligned table. 202 int32_t capacity = (totalLength + 3) & ~3; // round up to a multiple of 4 ints 203 ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256); 204 if(ownedCodes == NULL) { 205 resetReordering(); 206 errorCode = U_MEMORY_ALLOCATION_ERROR; 207 return; 208 } 209 if(reorderCodesCapacity != 0) { 210 uprv_free(const_cast<int32_t *>(reorderCodes)); 211 } 212 reorderCodes = ownedCodes; 213 reorderCodesCapacity = capacity; 214 } 215 uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256); 216 uprv_memcpy(ownedCodes, codes, codesLength * 4); 217 uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4); 218 reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity); 219 reorderCodesLength = codesLength; 220 reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength; 221 reorderRangesLength = rangesLength; 222} 223 224void 225CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) { 226 if(U_FAILURE(errorCode)) { return; } 227 if(!other.hasReordering()) { 228 resetReordering(); 229 return; 230 } 231 minHighNoReorder = other.minHighNoReorder; 232 if(other.reorderCodesCapacity == 0) { 233 // The reorder arrays are aliased to memory-mapped data. 234 reorderTable = other.reorderTable; 235 reorderRanges = other.reorderRanges; 236 reorderRangesLength = other.reorderRangesLength; 237 reorderCodes = other.reorderCodes; 238 reorderCodesLength = other.reorderCodesLength; 239 } else { 240 setReorderArrays(other.reorderCodes, other.reorderCodesLength, 241 other.reorderRanges, other.reorderRangesLength, 242 other.reorderTable, errorCode); 243 } 244} 245 246UBool 247CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) { 248 U_ASSERT(table[0] == 0); 249 for(int32_t i = 1; i < 256; ++i) { 250 if(table[i] == 0) { 251 return TRUE; 252 } 253 } 254 return FALSE; 255} 256 257uint32_t 258CollationSettings::reorderEx(uint32_t p) const { 259 if(p >= minHighNoReorder) { return p; } 260 // Round up p so that its lower 16 bits are >= any offset bits. 261 // Then compare q directly with (limit, offset) pairs. 262 uint32_t q = p | 0xffff; 263 uint32_t r; 264 const uint32_t *ranges = reorderRanges; 265 while(q >= (r = *ranges)) { ++ranges; } 266 return p + (r << 24); 267} 268 269void 270CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) { 271 if(U_FAILURE(errorCode)) { return; } 272 int32_t noStrength = options & ~STRENGTH_MASK; 273 switch(value) { 274 case UCOL_PRIMARY: 275 case UCOL_SECONDARY: 276 case UCOL_TERTIARY: 277 case UCOL_QUATERNARY: 278 case UCOL_IDENTICAL: 279 options = noStrength | (value << STRENGTH_SHIFT); 280 break; 281 case UCOL_DEFAULT: 282 options = noStrength | (defaultOptions & STRENGTH_MASK); 283 break; 284 default: 285 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 286 break; 287 } 288} 289 290void 291CollationSettings::setFlag(int32_t bit, UColAttributeValue value, 292 int32_t defaultOptions, UErrorCode &errorCode) { 293 if(U_FAILURE(errorCode)) { return; } 294 switch(value) { 295 case UCOL_ON: 296 options |= bit; 297 break; 298 case UCOL_OFF: 299 options &= ~bit; 300 break; 301 case UCOL_DEFAULT: 302 options = (options & ~bit) | (defaultOptions & bit); 303 break; 304 default: 305 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 306 break; 307 } 308} 309 310void 311CollationSettings::setCaseFirst(UColAttributeValue value, 312 int32_t defaultOptions, UErrorCode &errorCode) { 313 if(U_FAILURE(errorCode)) { return; } 314 int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK; 315 switch(value) { 316 case UCOL_OFF: 317 options = noCaseFirst; 318 break; 319 case UCOL_LOWER_FIRST: 320 options = noCaseFirst | CASE_FIRST; 321 break; 322 case UCOL_UPPER_FIRST: 323 options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK; 324 break; 325 case UCOL_DEFAULT: 326 options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK); 327 break; 328 default: 329 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 330 break; 331 } 332} 333 334void 335CollationSettings::setAlternateHandling(UColAttributeValue value, 336 int32_t defaultOptions, UErrorCode &errorCode) { 337 if(U_FAILURE(errorCode)) { return; } 338 int32_t noAlternate = options & ~ALTERNATE_MASK; 339 switch(value) { 340 case UCOL_NON_IGNORABLE: 341 options = noAlternate; 342 break; 343 case UCOL_SHIFTED: 344 options = noAlternate | SHIFTED; 345 break; 346 case UCOL_DEFAULT: 347 options = noAlternate | (defaultOptions & ALTERNATE_MASK); 348 break; 349 default: 350 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 351 break; 352 } 353} 354 355void 356CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) { 357 if(U_FAILURE(errorCode)) { return; } 358 int32_t noMax = options & ~MAX_VARIABLE_MASK; 359 switch(value) { 360 case MAX_VAR_SPACE: 361 case MAX_VAR_PUNCT: 362 case MAX_VAR_SYMBOL: 363 case MAX_VAR_CURRENCY: 364 options = noMax | (value << MAX_VARIABLE_SHIFT); 365 break; 366 case UCOL_DEFAULT: 367 options = noMax | (defaultOptions & MAX_VARIABLE_MASK); 368 break; 369 default: 370 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 371 break; 372 } 373} 374 375U_NAMESPACE_END 376 377#endif // !UCONFIG_NO_COLLATION 378