1/* 2****************************************************************************** 3* Copyright (C) 2001-2015, International Business Machines 4* Corporation and others. All Rights Reserved. 5****************************************************************************** 6* 7* File ucoleitr.cpp 8* 9* Modification History: 10* 11* Date Name Description 12* 02/15/2001 synwee Modified all methods to process its own function 13* instead of calling the equivalent c++ api (coleitr.h) 14* 2012-2014 markus Rewritten in C++ again. 15******************************************************************************/ 16 17#include "unicode/utypes.h" 18 19#if !UCONFIG_NO_COLLATION 20 21#include "unicode/coleitr.h" 22#include "unicode/tblcoll.h" 23#include "unicode/ucoleitr.h" 24#include "unicode/ustring.h" 25#include "unicode/sortkey.h" 26#include "unicode/uobject.h" 27#include "cmemory.h" 28#include "usrchimp.h" 29 30U_NAMESPACE_USE 31 32#define BUFFER_LENGTH 100 33 34#define DEFAULT_BUFFER_SIZE 16 35#define BUFFER_GROW 8 36 37#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0]) 38 39#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) 40 41#define DELETE_ARRAY(array) uprv_free((void *) (array)) 42 43struct RCEI 44{ 45 uint32_t ce; 46 int32_t low; 47 int32_t high; 48}; 49 50U_NAMESPACE_BEGIN 51 52struct RCEBuffer 53{ 54 RCEI defaultBuffer[DEFAULT_BUFFER_SIZE]; 55 RCEI *buffer; 56 int32_t bufferIndex; 57 int32_t bufferSize; 58 59 RCEBuffer(); 60 ~RCEBuffer(); 61 62 UBool isEmpty() const; 63 void put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode); 64 const RCEI *get(); 65}; 66 67RCEBuffer::RCEBuffer() 68{ 69 buffer = defaultBuffer; 70 bufferIndex = 0; 71 bufferSize = UPRV_LENGTHOF(defaultBuffer); 72} 73 74RCEBuffer::~RCEBuffer() 75{ 76 if (buffer != defaultBuffer) { 77 DELETE_ARRAY(buffer); 78 } 79} 80 81UBool RCEBuffer::isEmpty() const 82{ 83 return bufferIndex <= 0; 84} 85 86void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode) 87{ 88 if (U_FAILURE(errorCode)) { 89 return; 90 } 91 if (bufferIndex >= bufferSize) { 92 RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW); 93 if (newBuffer == NULL) { 94 errorCode = U_MEMORY_ALLOCATION_ERROR; 95 return; 96 } 97 98 ARRAY_COPY(newBuffer, buffer, bufferSize); 99 100 if (buffer != defaultBuffer) { 101 DELETE_ARRAY(buffer); 102 } 103 104 buffer = newBuffer; 105 bufferSize += BUFFER_GROW; 106 } 107 108 buffer[bufferIndex].ce = ce; 109 buffer[bufferIndex].low = ixLow; 110 buffer[bufferIndex].high = ixHigh; 111 112 bufferIndex += 1; 113} 114 115const RCEI *RCEBuffer::get() 116{ 117 if (bufferIndex > 0) { 118 return &buffer[--bufferIndex]; 119 } 120 121 return NULL; 122} 123 124PCEBuffer::PCEBuffer() 125{ 126 buffer = defaultBuffer; 127 bufferIndex = 0; 128 bufferSize = UPRV_LENGTHOF(defaultBuffer); 129} 130 131PCEBuffer::~PCEBuffer() 132{ 133 if (buffer != defaultBuffer) { 134 DELETE_ARRAY(buffer); 135 } 136} 137 138void PCEBuffer::reset() 139{ 140 bufferIndex = 0; 141} 142 143UBool PCEBuffer::isEmpty() const 144{ 145 return bufferIndex <= 0; 146} 147 148void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode) 149{ 150 if (U_FAILURE(errorCode)) { 151 return; 152 } 153 if (bufferIndex >= bufferSize) { 154 PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW); 155 if (newBuffer == NULL) { 156 errorCode = U_MEMORY_ALLOCATION_ERROR; 157 return; 158 } 159 160 ARRAY_COPY(newBuffer, buffer, bufferSize); 161 162 if (buffer != defaultBuffer) { 163 DELETE_ARRAY(buffer); 164 } 165 166 buffer = newBuffer; 167 bufferSize += BUFFER_GROW; 168 } 169 170 buffer[bufferIndex].ce = ce; 171 buffer[bufferIndex].low = ixLow; 172 buffer[bufferIndex].high = ixHigh; 173 174 bufferIndex += 1; 175} 176 177const PCEI *PCEBuffer::get() 178{ 179 if (bufferIndex > 0) { 180 return &buffer[--bufferIndex]; 181 } 182 183 return NULL; 184} 185 186UCollationPCE::UCollationPCE(UCollationElements *elems) { init(elems); } 187 188UCollationPCE::UCollationPCE(CollationElementIterator *iter) { init(iter); } 189 190void UCollationPCE::init(UCollationElements *elems) { 191 init(CollationElementIterator::fromUCollationElements(elems)); 192} 193 194void UCollationPCE::init(CollationElementIterator *iter) 195{ 196 cei = iter; 197 init(*iter->rbc_); 198} 199 200void UCollationPCE::init(const Collator &coll) 201{ 202 UErrorCode status = U_ZERO_ERROR; 203 204 strength = coll.getAttribute(UCOL_STRENGTH, status); 205 toShift = coll.getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED; 206 isShifted = FALSE; 207 variableTop = coll.getVariableTop(status); 208} 209 210UCollationPCE::~UCollationPCE() 211{ 212 // nothing to do 213} 214 215uint64_t UCollationPCE::processCE(uint32_t ce) 216{ 217 uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0; 218 219 // This is clean, but somewhat slow... 220 // We could apply the mask to ce and then 221 // just get all three orders... 222 switch(strength) { 223 default: 224 tertiary = ucol_tertiaryOrder(ce); 225 /* note fall-through */ 226 227 case UCOL_SECONDARY: 228 secondary = ucol_secondaryOrder(ce); 229 /* note fall-through */ 230 231 case UCOL_PRIMARY: 232 primary = ucol_primaryOrder(ce); 233 } 234 235 // **** This should probably handle continuations too. **** 236 // **** That means that we need 24 bits for the primary **** 237 // **** instead of the 16 that we're currently using. **** 238 // **** So we can lay out the 64 bits as: 24.12.12.16. **** 239 // **** Another complication with continuations is that **** 240 // **** the *second* CE is marked as a continuation, so **** 241 // **** we always have to peek ahead to know how long **** 242 // **** the primary is... **** 243 if ((toShift && variableTop > ce && primary != 0) 244 || (isShifted && primary == 0)) { 245 246 if (primary == 0) { 247 return UCOL_IGNORABLE; 248 } 249 250 if (strength >= UCOL_QUATERNARY) { 251 quaternary = primary; 252 } 253 254 primary = secondary = tertiary = 0; 255 isShifted = TRUE; 256 } else { 257 if (strength >= UCOL_QUATERNARY) { 258 quaternary = 0xFFFF; 259 } 260 261 isShifted = FALSE; 262 } 263 264 return primary << 48 | secondary << 32 | tertiary << 16 | quaternary; 265} 266 267U_NAMESPACE_END 268 269/* public methods ---------------------------------------------------- */ 270 271U_CAPI UCollationElements* U_EXPORT2 272ucol_openElements(const UCollator *coll, 273 const UChar *text, 274 int32_t textLength, 275 UErrorCode *status) 276{ 277 if (U_FAILURE(*status)) { 278 return NULL; 279 } 280 if (coll == NULL || (text == NULL && textLength != 0)) { 281 *status = U_ILLEGAL_ARGUMENT_ERROR; 282 return NULL; 283 } 284 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); 285 if (rbc == NULL) { 286 *status = U_UNSUPPORTED_ERROR; // coll is a Collator but not a RuleBasedCollator 287 return NULL; 288 } 289 290 UnicodeString s((UBool)(textLength < 0), text, textLength); 291 CollationElementIterator *cei = rbc->createCollationElementIterator(s); 292 if (cei == NULL) { 293 *status = U_MEMORY_ALLOCATION_ERROR; 294 return NULL; 295 } 296 297 return cei->toUCollationElements(); 298} 299 300 301U_CAPI void U_EXPORT2 302ucol_closeElements(UCollationElements *elems) 303{ 304 delete CollationElementIterator::fromUCollationElements(elems); 305} 306 307U_CAPI void U_EXPORT2 308ucol_reset(UCollationElements *elems) 309{ 310 CollationElementIterator::fromUCollationElements(elems)->reset(); 311} 312 313U_CAPI int32_t U_EXPORT2 314ucol_next(UCollationElements *elems, 315 UErrorCode *status) 316{ 317 if (U_FAILURE(*status)) { 318 return UCOL_NULLORDER; 319 } 320 321 return CollationElementIterator::fromUCollationElements(elems)->next(*status); 322} 323 324U_NAMESPACE_BEGIN 325 326int64_t 327UCollationPCE::nextProcessed( 328 int32_t *ixLow, 329 int32_t *ixHigh, 330 UErrorCode *status) 331{ 332 int64_t result = UCOL_IGNORABLE; 333 uint32_t low = 0, high = 0; 334 335 if (U_FAILURE(*status)) { 336 return UCOL_PROCESSED_NULLORDER; 337 } 338 339 pceBuffer.reset(); 340 341 do { 342 low = cei->getOffset(); 343 int32_t ce = cei->next(*status); 344 high = cei->getOffset(); 345 346 if (ce == UCOL_NULLORDER) { 347 result = UCOL_PROCESSED_NULLORDER; 348 break; 349 } 350 351 result = processCE((uint32_t)ce); 352 } while (result == UCOL_IGNORABLE); 353 354 if (ixLow != NULL) { 355 *ixLow = low; 356 } 357 358 if (ixHigh != NULL) { 359 *ixHigh = high; 360 } 361 362 return result; 363} 364 365U_NAMESPACE_END 366 367U_CAPI int32_t U_EXPORT2 368ucol_previous(UCollationElements *elems, 369 UErrorCode *status) 370{ 371 if(U_FAILURE(*status)) { 372 return UCOL_NULLORDER; 373 } 374 return CollationElementIterator::fromUCollationElements(elems)->previous(*status); 375} 376 377U_NAMESPACE_BEGIN 378 379int64_t 380UCollationPCE::previousProcessed( 381 int32_t *ixLow, 382 int32_t *ixHigh, 383 UErrorCode *status) 384{ 385 int64_t result = UCOL_IGNORABLE; 386 int32_t low = 0, high = 0; 387 388 if (U_FAILURE(*status)) { 389 return UCOL_PROCESSED_NULLORDER; 390 } 391 392 // pceBuffer.reset(); 393 394 while (pceBuffer.isEmpty()) { 395 // buffer raw CEs up to non-ignorable primary 396 RCEBuffer rceb; 397 int32_t ce; 398 399 // **** do we need to reset rceb, or will it always be empty at this point **** 400 do { 401 high = cei->getOffset(); 402 ce = cei->previous(*status); 403 low = cei->getOffset(); 404 405 if (ce == UCOL_NULLORDER) { 406 if (!rceb.isEmpty()) { 407 break; 408 } 409 410 goto finish; 411 } 412 413 rceb.put((uint32_t)ce, low, high, *status); 414 } while (U_SUCCESS(*status) && ((ce & UCOL_PRIMARYORDERMASK) == 0 || isContinuation(ce))); 415 416 // process the raw CEs 417 while (U_SUCCESS(*status) && !rceb.isEmpty()) { 418 const RCEI *rcei = rceb.get(); 419 420 result = processCE(rcei->ce); 421 422 if (result != UCOL_IGNORABLE) { 423 pceBuffer.put(result, rcei->low, rcei->high, *status); 424 } 425 } 426 if (U_FAILURE(*status)) { 427 return UCOL_PROCESSED_NULLORDER; 428 } 429 } 430 431finish: 432 if (pceBuffer.isEmpty()) { 433 // **** Is -1 the right value for ixLow, ixHigh? **** 434 if (ixLow != NULL) { 435 *ixLow = -1; 436 } 437 438 if (ixHigh != NULL) { 439 *ixHigh = -1 440 ; 441 } 442 return UCOL_PROCESSED_NULLORDER; 443 } 444 445 const PCEI *pcei = pceBuffer.get(); 446 447 if (ixLow != NULL) { 448 *ixLow = pcei->low; 449 } 450 451 if (ixHigh != NULL) { 452 *ixHigh = pcei->high; 453 } 454 455 return pcei->ce; 456} 457 458U_NAMESPACE_END 459 460U_CAPI int32_t U_EXPORT2 461ucol_getMaxExpansion(const UCollationElements *elems, 462 int32_t order) 463{ 464 return CollationElementIterator::fromUCollationElements(elems)->getMaxExpansion(order); 465 466 // TODO: The old code masked the order according to strength and then did a binary search. 467 // However this was probably at least partially broken because of the following comment. 468 // Still, it might have found a match when this version may not. 469 470 // FIXME: with a masked search, there might be more than one hit, 471 // so we need to look forward and backward from the match to find all 472 // of the hits... 473} 474 475U_CAPI void U_EXPORT2 476ucol_setText( UCollationElements *elems, 477 const UChar *text, 478 int32_t textLength, 479 UErrorCode *status) 480{ 481 if (U_FAILURE(*status)) { 482 return; 483 } 484 485 if ((text == NULL && textLength != 0)) { 486 *status = U_ILLEGAL_ARGUMENT_ERROR; 487 return; 488 } 489 UnicodeString s((UBool)(textLength < 0), text, textLength); 490 return CollationElementIterator::fromUCollationElements(elems)->setText(s, *status); 491} 492 493U_CAPI int32_t U_EXPORT2 494ucol_getOffset(const UCollationElements *elems) 495{ 496 return CollationElementIterator::fromUCollationElements(elems)->getOffset(); 497} 498 499U_CAPI void U_EXPORT2 500ucol_setOffset(UCollationElements *elems, 501 int32_t offset, 502 UErrorCode *status) 503{ 504 if (U_FAILURE(*status)) { 505 return; 506 } 507 508 CollationElementIterator::fromUCollationElements(elems)->setOffset(offset, *status); 509} 510 511U_CAPI int32_t U_EXPORT2 512ucol_primaryOrder (int32_t order) 513{ 514 return (order >> 16) & 0xffff; 515} 516 517U_CAPI int32_t U_EXPORT2 518ucol_secondaryOrder (int32_t order) 519{ 520 return (order >> 8) & 0xff; 521} 522 523U_CAPI int32_t U_EXPORT2 524ucol_tertiaryOrder (int32_t order) 525{ 526 return order & 0xff; 527} 528 529#endif /* #if !UCONFIG_NO_COLLATION */ 530