1/* 2******************************************************************************* 3* Copyright (C) 1996-2014, International Business Machines 4* Corporation and others. All Rights Reserved. 5******************************************************************************* 6* file name: ucol.cpp 7* encoding: US-ASCII 8* tab size: 8 (not used) 9* indentation:4 10* 11* Modification history 12* Date Name Comments 13* 1996-1999 various members of ICU team maintained C API for collation framework 14* 02/16/2001 synwee Added internal method getPrevSpecialCE 15* 03/01/2001 synwee Added maxexpansion functionality. 16* 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant 17* 2012-2014 markus Rewritten in C++ again. 18*/ 19 20#include "unicode/utypes.h" 21 22#if !UCONFIG_NO_COLLATION 23 24#include "unicode/coll.h" 25#include "unicode/tblcoll.h" 26#include "unicode/bytestream.h" 27#include "unicode/coleitr.h" 28#include "unicode/ucoleitr.h" 29#include "unicode/ustring.h" 30#include "cmemory.h" 31#include "collation.h" 32#include "cstring.h" 33#include "putilimp.h" 34#include "uassert.h" 35#include "utracimp.h" 36 37U_NAMESPACE_USE 38 39U_CAPI UCollator* U_EXPORT2 40ucol_openBinary(const uint8_t *bin, int32_t length, 41 const UCollator *base, 42 UErrorCode *status) 43{ 44 if(U_FAILURE(*status)) { return NULL; } 45 RuleBasedCollator *coll = new RuleBasedCollator( 46 bin, length, 47 RuleBasedCollator::rbcFromUCollator(base), 48 *status); 49 if(coll == NULL) { 50 *status = U_MEMORY_ALLOCATION_ERROR; 51 return NULL; 52 } 53 if(U_FAILURE(*status)) { 54 delete coll; 55 return NULL; 56 } 57 return coll->toUCollator(); 58} 59 60U_CAPI int32_t U_EXPORT2 61ucol_cloneBinary(const UCollator *coll, 62 uint8_t *buffer, int32_t capacity, 63 UErrorCode *status) 64{ 65 if(U_FAILURE(*status)) { 66 return 0; 67 } 68 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); 69 if(rbc == NULL && coll != NULL) { 70 *status = U_UNSUPPORTED_ERROR; 71 return 0; 72 } 73 return rbc->cloneBinary(buffer, capacity, *status); 74} 75 76U_CAPI UCollator* U_EXPORT2 77ucol_safeClone(const UCollator *coll, void * /*stackBuffer*/, int32_t * pBufferSize, UErrorCode *status) 78{ 79 if (status == NULL || U_FAILURE(*status)){ 80 return NULL; 81 } 82 if (coll == NULL) { 83 *status = U_ILLEGAL_ARGUMENT_ERROR; 84 return NULL; 85 } 86 if (pBufferSize != NULL) { 87 int32_t inputSize = *pBufferSize; 88 *pBufferSize = 1; 89 if (inputSize == 0) { 90 return NULL; // preflighting for deprecated functionality 91 } 92 } 93 Collator *newColl = Collator::fromUCollator(coll)->clone(); 94 if (newColl == NULL) { 95 *status = U_MEMORY_ALLOCATION_ERROR; 96 } else { 97 *status = U_SAFECLONE_ALLOCATED_WARNING; 98 } 99 return newColl->toUCollator(); 100} 101 102U_CAPI void U_EXPORT2 103ucol_close(UCollator *coll) 104{ 105 UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE); 106 UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll); 107 if(coll != NULL) { 108 delete Collator::fromUCollator(coll); 109 } 110 UTRACE_EXIT(); 111} 112 113U_CAPI int32_t U_EXPORT2 114ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, 115 const uint8_t *src2, int32_t src2Length, 116 uint8_t *dest, int32_t destCapacity) { 117 /* check arguments */ 118 if( src1==NULL || src1Length<-1 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) || 119 src2==NULL || src2Length<-1 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) || 120 destCapacity<0 || (destCapacity>0 && dest==NULL) 121 ) { 122 /* error, attempt to write a zero byte and return 0 */ 123 if(dest!=NULL && destCapacity>0) { 124 *dest=0; 125 } 126 return 0; 127 } 128 129 /* check lengths and capacity */ 130 if(src1Length<0) { 131 src1Length=(int32_t)uprv_strlen((const char *)src1)+1; 132 } 133 if(src2Length<0) { 134 src2Length=(int32_t)uprv_strlen((const char *)src2)+1; 135 } 136 137 int32_t destLength=src1Length+src2Length; 138 if(destLength>destCapacity) { 139 /* the merged sort key does not fit into the destination */ 140 return destLength; 141 } 142 143 /* merge the sort keys with the same number of levels */ 144 uint8_t *p=dest; 145 for(;;) { 146 /* copy level from src1 not including 00 or 01 */ 147 uint8_t b; 148 while((b=*src1)>=2) { 149 ++src1; 150 *p++=b; 151 } 152 153 /* add a 02 merge separator */ 154 *p++=2; 155 156 /* copy level from src2 not including 00 or 01 */ 157 while((b=*src2)>=2) { 158 ++src2; 159 *p++=b; 160 } 161 162 /* if both sort keys have another level, then add a 01 level separator and continue */ 163 if(*src1==1 && *src2==1) { 164 ++src1; 165 ++src2; 166 *p++=1; 167 } else { 168 break; 169 } 170 } 171 172 /* 173 * here, at least one sort key is finished now, but the other one 174 * might have some contents left from containing more levels; 175 * that contents is just appended to the result 176 */ 177 if(*src1!=0) { 178 /* src1 is not finished, therefore *src2==0, and src1 is appended */ 179 src2=src1; 180 } 181 /* append src2, "the other, unfinished sort key" */ 182 while((*p++=*src2++)!=0) {} 183 184 /* the actual length might be less than destLength if either sort key contained illegally embedded zero bytes */ 185 return (int32_t)(p-dest); 186} 187 188U_CAPI int32_t U_EXPORT2 189ucol_getSortKey(const UCollator *coll, 190 const UChar *source, 191 int32_t sourceLength, 192 uint8_t *result, 193 int32_t resultLength) 194{ 195 UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY); 196 if (UTRACE_LEVEL(UTRACE_VERBOSE)) { 197 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source, 198 ((sourceLength==-1 && source!=NULL) ? u_strlen(source) : sourceLength)); 199 } 200 201 int32_t keySize = Collator::fromUCollator(coll)-> 202 getSortKey(source, sourceLength, result, resultLength); 203 204 UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize); 205 UTRACE_EXIT_VALUE(keySize); 206 return keySize; 207} 208 209U_CAPI int32_t U_EXPORT2 210ucol_nextSortKeyPart(const UCollator *coll, 211 UCharIterator *iter, 212 uint32_t state[2], 213 uint8_t *dest, int32_t count, 214 UErrorCode *status) 215{ 216 /* error checking */ 217 if(status==NULL || U_FAILURE(*status)) { 218 return 0; 219 } 220 UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART); 221 UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d", 222 coll, iter, state[0], state[1], dest, count); 223 224 int32_t i = Collator::fromUCollator(coll)-> 225 internalNextSortKeyPart(iter, state, dest, count, *status); 226 227 // Return number of meaningful sortkey bytes. 228 UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d", 229 dest,i, state[0], state[1]); 230 UTRACE_EXIT_VALUE_STATUS(i, *status); 231 return i; 232} 233 234/** 235 * Produce a bound for a given sortkey and a number of levels. 236 */ 237U_CAPI int32_t U_EXPORT2 238ucol_getBound(const uint8_t *source, 239 int32_t sourceLength, 240 UColBoundMode boundType, 241 uint32_t noOfLevels, 242 uint8_t *result, 243 int32_t resultLength, 244 UErrorCode *status) 245{ 246 // consistency checks 247 if(status == NULL || U_FAILURE(*status)) { 248 return 0; 249 } 250 if(source == NULL) { 251 *status = U_ILLEGAL_ARGUMENT_ERROR; 252 return 0; 253 } 254 255 int32_t sourceIndex = 0; 256 // Scan the string until we skip enough of the key OR reach the end of the key 257 do { 258 sourceIndex++; 259 if(source[sourceIndex] == Collation::LEVEL_SEPARATOR_BYTE) { 260 noOfLevels--; 261 } 262 } while (noOfLevels > 0 263 && (source[sourceIndex] != 0 || sourceIndex < sourceLength)); 264 265 if((source[sourceIndex] == 0 || sourceIndex == sourceLength) 266 && noOfLevels > 0) { 267 *status = U_SORT_KEY_TOO_SHORT_WARNING; 268 } 269 270 271 // READ ME: this code assumes that the values for boundType 272 // enum will not changes. They are set so that the enum value 273 // corresponds to the number of extra bytes each bound type 274 // needs. 275 if(result != NULL && resultLength >= sourceIndex+boundType) { 276 uprv_memcpy(result, source, sourceIndex); 277 switch(boundType) { 278 // Lower bound just gets terminated. No extra bytes 279 case UCOL_BOUND_LOWER: // = 0 280 break; 281 // Upper bound needs one extra byte 282 case UCOL_BOUND_UPPER: // = 1 283 result[sourceIndex++] = 2; 284 break; 285 // Upper long bound needs two extra bytes 286 case UCOL_BOUND_UPPER_LONG: // = 2 287 result[sourceIndex++] = 0xFF; 288 result[sourceIndex++] = 0xFF; 289 break; 290 default: 291 *status = U_ILLEGAL_ARGUMENT_ERROR; 292 return 0; 293 } 294 result[sourceIndex++] = 0; 295 296 return sourceIndex; 297 } else { 298 return sourceIndex+boundType+1; 299 } 300} 301 302U_CAPI void U_EXPORT2 303ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode) { 304 if(U_FAILURE(*pErrorCode)) { return; } 305 Collator::fromUCollator(coll)->setMaxVariable(group, *pErrorCode); 306} 307 308U_CAPI UColReorderCode U_EXPORT2 309ucol_getMaxVariable(const UCollator *coll) { 310 return Collator::fromUCollator(coll)->getMaxVariable(); 311} 312 313U_CAPI uint32_t U_EXPORT2 314ucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCode *status) { 315 if(U_FAILURE(*status) || coll == NULL) { 316 return 0; 317 } 318 return Collator::fromUCollator(coll)->setVariableTop(varTop, len, *status); 319} 320 321U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status) { 322 if(U_FAILURE(*status) || coll == NULL) { 323 return 0; 324 } 325 return Collator::fromUCollator(coll)->getVariableTop(*status); 326} 327 328U_CAPI void U_EXPORT2 329ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status) { 330 if(U_FAILURE(*status) || coll == NULL) { 331 return; 332 } 333 Collator::fromUCollator(coll)->setVariableTop(varTop, *status); 334} 335 336U_CAPI void U_EXPORT2 337ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) { 338 if(U_FAILURE(*status) || coll == NULL) { 339 return; 340 } 341 342 Collator::fromUCollator(coll)->setAttribute(attr, value, *status); 343} 344 345U_CAPI UColAttributeValue U_EXPORT2 346ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) { 347 if(U_FAILURE(*status) || coll == NULL) { 348 return UCOL_DEFAULT; 349 } 350 351 return Collator::fromUCollator(coll)->getAttribute(attr, *status); 352} 353 354U_CAPI void U_EXPORT2 355ucol_setStrength( UCollator *coll, 356 UCollationStrength strength) 357{ 358 UErrorCode status = U_ZERO_ERROR; 359 ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status); 360} 361 362U_CAPI UCollationStrength U_EXPORT2 363ucol_getStrength(const UCollator *coll) 364{ 365 UErrorCode status = U_ZERO_ERROR; 366 return ucol_getAttribute(coll, UCOL_STRENGTH, &status); 367} 368 369U_CAPI int32_t U_EXPORT2 370ucol_getReorderCodes(const UCollator *coll, 371 int32_t *dest, 372 int32_t destCapacity, 373 UErrorCode *status) { 374 if (U_FAILURE(*status)) { 375 return 0; 376 } 377 378 return Collator::fromUCollator(coll)->getReorderCodes(dest, destCapacity, *status); 379} 380 381U_CAPI void U_EXPORT2 382ucol_setReorderCodes(UCollator* coll, 383 const int32_t* reorderCodes, 384 int32_t reorderCodesLength, 385 UErrorCode *status) { 386 if (U_FAILURE(*status)) { 387 return; 388 } 389 390 Collator::fromUCollator(coll)->setReorderCodes(reorderCodes, reorderCodesLength, *status); 391} 392 393U_CAPI int32_t U_EXPORT2 394ucol_getEquivalentReorderCodes(int32_t reorderCode, 395 int32_t* dest, 396 int32_t destCapacity, 397 UErrorCode *pErrorCode) { 398 return Collator::getEquivalentReorderCodes(reorderCode, dest, destCapacity, *pErrorCode); 399} 400 401U_CAPI void U_EXPORT2 402ucol_getVersion(const UCollator* coll, 403 UVersionInfo versionInfo) 404{ 405 Collator::fromUCollator(coll)->getVersion(versionInfo); 406} 407 408U_CAPI UCollationResult U_EXPORT2 409ucol_strcollIter( const UCollator *coll, 410 UCharIterator *sIter, 411 UCharIterator *tIter, 412 UErrorCode *status) 413{ 414 if(!status || U_FAILURE(*status)) { 415 return UCOL_EQUAL; 416 } 417 418 UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER); 419 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter); 420 421 if(sIter == NULL || tIter == NULL || coll == NULL) { 422 *status = U_ILLEGAL_ARGUMENT_ERROR; 423 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status); 424 return UCOL_EQUAL; 425 } 426 427 UCollationResult result = Collator::fromUCollator(coll)->compare(*sIter, *tIter, *status); 428 429 UTRACE_EXIT_VALUE_STATUS(result, *status); 430 return result; 431} 432 433 434/* */ 435/* ucol_strcoll Main public API string comparison function */ 436/* */ 437U_CAPI UCollationResult U_EXPORT2 438ucol_strcoll( const UCollator *coll, 439 const UChar *source, 440 int32_t sourceLength, 441 const UChar *target, 442 int32_t targetLength) 443{ 444 U_ALIGN_CODE(16); 445 446 UTRACE_ENTRY(UTRACE_UCOL_STRCOLL); 447 if (UTRACE_LEVEL(UTRACE_VERBOSE)) { 448 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target); 449 UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength); 450 UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength); 451 } 452 453 UErrorCode status = U_ZERO_ERROR; 454 UCollationResult returnVal = Collator::fromUCollator(coll)-> 455 compare(source, sourceLength, target, targetLength, status); 456 UTRACE_EXIT_VALUE_STATUS(returnVal, status); 457 return returnVal; 458} 459 460U_CAPI UCollationResult U_EXPORT2 461ucol_strcollUTF8( 462 const UCollator *coll, 463 const char *source, 464 int32_t sourceLength, 465 const char *target, 466 int32_t targetLength, 467 UErrorCode *status) 468{ 469 U_ALIGN_CODE(16); 470 471 UTRACE_ENTRY(UTRACE_UCOL_STRCOLLUTF8); 472 if (UTRACE_LEVEL(UTRACE_VERBOSE)) { 473 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target); 474 UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vb ", source, sourceLength); 475 UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vb ", target, targetLength); 476 } 477 478 if (U_FAILURE(*status)) { 479 /* do nothing */ 480 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status); 481 return UCOL_EQUAL; 482 } 483 484 UCollationResult returnVal = Collator::fromUCollator(coll)->internalCompareUTF8( 485 source, sourceLength, target, targetLength, *status); 486 UTRACE_EXIT_VALUE_STATUS(returnVal, *status); 487 return returnVal; 488} 489 490 491/* convenience function for comparing strings */ 492U_CAPI UBool U_EXPORT2 493ucol_greater( const UCollator *coll, 494 const UChar *source, 495 int32_t sourceLength, 496 const UChar *target, 497 int32_t targetLength) 498{ 499 return (ucol_strcoll(coll, source, sourceLength, target, targetLength) 500 == UCOL_GREATER); 501} 502 503/* convenience function for comparing strings */ 504U_CAPI UBool U_EXPORT2 505ucol_greaterOrEqual( const UCollator *coll, 506 const UChar *source, 507 int32_t sourceLength, 508 const UChar *target, 509 int32_t targetLength) 510{ 511 return (ucol_strcoll(coll, source, sourceLength, target, targetLength) 512 != UCOL_LESS); 513} 514 515/* convenience function for comparing strings */ 516U_CAPI UBool U_EXPORT2 517ucol_equal( const UCollator *coll, 518 const UChar *source, 519 int32_t sourceLength, 520 const UChar *target, 521 int32_t targetLength) 522{ 523 return (ucol_strcoll(coll, source, sourceLength, target, targetLength) 524 == UCOL_EQUAL); 525} 526 527U_CAPI void U_EXPORT2 528ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) { 529 const Collator *c = Collator::fromUCollator(coll); 530 if(c != NULL) { 531 UVersionInfo v; 532 c->getVersion(v); 533 // Note: This is tied to how the current implementation encodes the UCA version 534 // in the overall getVersion(). 535 // Alternatively, we could load the root collator and get at lower-level data from there. 536 // Either way, it will reflect the input collator's UCA version only 537 // if it is a known implementation. 538 // It would be cleaner to make this a virtual Collator method. 539 info[0] = v[1] >> 3; 540 info[1] = v[1] & 7; 541 info[2] = v[2] >> 6; 542 info[3] = 0; 543 } 544} 545 546U_CAPI const UChar * U_EXPORT2 547ucol_getRules(const UCollator *coll, int32_t *length) { 548 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); 549 // OK to crash if coll==NULL: We do not want to check "this" pointers. 550 if(rbc != NULL || coll == NULL) { 551 const UnicodeString &rules = rbc->getRules(); 552 U_ASSERT(rules.getBuffer()[rules.length()] == 0); 553 *length = rules.length(); 554 return rules.getBuffer(); 555 } 556 static const UChar _NUL = 0; 557 *length = 0; 558 return &_NUL; 559} 560 561U_CAPI int32_t U_EXPORT2 562ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) { 563 UnicodeString rules; 564 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); 565 if(rbc != NULL || coll == NULL) { 566 rbc->getRules(delta, rules); 567 } 568 if(buffer != NULL && bufferLen > 0) { 569 UErrorCode errorCode = U_ZERO_ERROR; 570 return rules.extract(buffer, bufferLen, errorCode); 571 } else { 572 return rules.length(); 573 } 574} 575 576U_CAPI const char * U_EXPORT2 577ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { 578 return ucol_getLocaleByType(coll, type, status); 579} 580 581U_CAPI const char * U_EXPORT2 582ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { 583 if(U_FAILURE(*status)) { 584 return NULL; 585 } 586 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE); 587 UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll); 588 589 const char *result; 590 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); 591 if(rbc == NULL && coll != NULL) { 592 *status = U_UNSUPPORTED_ERROR; 593 result = NULL; 594 } else { 595 result = rbc->internalGetLocaleID(type, *status); 596 } 597 598 UTRACE_DATA1(UTRACE_INFO, "result = %s", result); 599 UTRACE_EXIT_STATUS(*status); 600 return result; 601} 602 603U_CAPI USet * U_EXPORT2 604ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) { 605 if(U_FAILURE(*status)) { 606 return NULL; 607 } 608 UnicodeSet *set = Collator::fromUCollator(coll)->getTailoredSet(*status); 609 if(U_FAILURE(*status)) { 610 delete set; 611 return NULL; 612 } 613 return set->toUSet(); 614} 615 616U_CAPI UBool U_EXPORT2 617ucol_equals(const UCollator *source, const UCollator *target) { 618 return source == target || 619 (*Collator::fromUCollator(source)) == (*Collator::fromUCollator(target)); 620} 621 622#endif /* #if !UCONFIG_NO_COLLATION */ 623