ucol_res.cpp revision b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2
1/* 2******************************************************************************* 3* Copyright (C) 1996-2011, International Business Machines 4* Corporation and others. All Rights Reserved. 5******************************************************************************* 6* file name: ucol_res.cpp 7* encoding: US-ASCII 8* tab size: 8 (not used) 9* indentation:4 10* 11* Description: 12* This file contains dependencies that the collation run-time doesn't normally 13* need. This mainly contains resource bundle usage and collation meta information 14* 15* Modification history 16* Date Name Comments 17* 1996-1999 various members of ICU team maintained C API for collation framework 18* 02/16/2001 synwee Added internal method getPrevSpecialCE 19* 03/01/2001 synwee Added maxexpansion functionality. 20* 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant 21* 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp 22*/ 23 24#include "unicode/utypes.h" 25 26#if !UCONFIG_NO_COLLATION 27#include "unicode/uloc.h" 28#include "unicode/coll.h" 29#include "unicode/tblcoll.h" 30#include "unicode/caniter.h" 31#include "unicode/uscript.h" 32#include "unicode/ustring.h" 33 34#include "ucol_bld.h" 35#include "ucol_imp.h" 36#include "ucol_tok.h" 37#include "ucol_elm.h" 38#include "uresimp.h" 39#include "ustr_imp.h" 40#include "cstring.h" 41#include "umutex.h" 42#include "ucln_in.h" 43#include "ustrenum.h" 44#include "putilimp.h" 45#include "utracimp.h" 46#include "cmemory.h" 47#include "uenumimp.h" 48#include "ulist.h" 49 50U_NAMESPACE_USE 51 52static void ucol_setReorderCodesFromParser(UCollator *coll, UColTokenParser *parser, UErrorCode *status); 53 54// static UCA. There is only one. Collators don't use it. 55// It is referenced only in ucol_initUCA and ucol_cleanup 56static UCollator* _staticUCA = NULL; 57// static pointer to udata memory. Inited in ucol_initUCA 58// used for cleanup in ucol_cleanup 59static UDataMemory* UCA_DATA_MEM = NULL; 60 61U_CDECL_BEGIN 62static UBool U_CALLCONV 63ucol_res_cleanup(void) 64{ 65 if (UCA_DATA_MEM) { 66 udata_close(UCA_DATA_MEM); 67 UCA_DATA_MEM = NULL; 68 } 69 if (_staticUCA) { 70 ucol_close(_staticUCA); 71 _staticUCA = NULL; 72 } 73 return TRUE; 74} 75 76static UBool U_CALLCONV 77isAcceptableUCA(void * /*context*/, 78 const char * /*type*/, const char * /*name*/, 79 const UDataInfo *pInfo){ 80 /* context, type & name are intentionally not used */ 81 if( pInfo->size>=20 && 82 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 83 pInfo->charsetFamily==U_CHARSET_FAMILY && 84 pInfo->dataFormat[0]==UCA_DATA_FORMAT_0 && /* dataFormat="UCol" */ 85 pInfo->dataFormat[1]==UCA_DATA_FORMAT_1 && 86 pInfo->dataFormat[2]==UCA_DATA_FORMAT_2 && 87 pInfo->dataFormat[3]==UCA_DATA_FORMAT_3 && 88 pInfo->formatVersion[0]==UCA_FORMAT_VERSION_0 89#if UCA_FORMAT_VERSION_1!=0 90 && pInfo->formatVersion[1]>=UCA_FORMAT_VERSION_1 91#endif 92 //pInfo->formatVersion[1]==UCA_FORMAT_VERSION_1 && 93 //pInfo->formatVersion[2]==UCA_FORMAT_VERSION_2 && // Too harsh 94 //pInfo->formatVersion[3]==UCA_FORMAT_VERSION_3 && // Too harsh 95 ) { 96 UVersionInfo UCDVersion; 97 u_getUnicodeVersion(UCDVersion); 98 return (UBool)(pInfo->dataVersion[0]==UCDVersion[0] 99 && pInfo->dataVersion[1]==UCDVersion[1]); 100 //&& pInfo->dataVersion[2]==ucaDataInfo.dataVersion[2] 101 //&& pInfo->dataVersion[3]==ucaDataInfo.dataVersion[3]); 102 } else { 103 return FALSE; 104 } 105} 106U_CDECL_END 107 108/* do not close UCA returned by ucol_initUCA! */ 109UCollator * 110ucol_initUCA(UErrorCode *status) { 111 if(U_FAILURE(*status)) { 112 return NULL; 113 } 114 UBool needsInit; 115 UMTX_CHECK(NULL, (_staticUCA == NULL), needsInit); 116 117 if(needsInit) { 118 UDataMemory *result = udata_openChoice(U_ICUDATA_COLL, UCA_DATA_TYPE, UCA_DATA_NAME, isAcceptableUCA, NULL, status); 119 120 if(U_SUCCESS(*status)){ 121 UCollator *newUCA = ucol_initCollator((const UCATableHeader *)udata_getMemory(result), NULL, NULL, status); 122 if(U_SUCCESS(*status)){ 123 // Initalize variables for implicit generation 124 uprv_uca_initImplicitConstants(status); 125 126 umtx_lock(NULL); 127 if(_staticUCA == NULL) { 128 UCA_DATA_MEM = result; 129 _staticUCA = newUCA; 130 newUCA = NULL; 131 result = NULL; 132 } 133 umtx_unlock(NULL); 134 135 ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup); 136 if(newUCA != NULL) { 137 ucol_close(newUCA); 138 udata_close(result); 139 } 140 }else{ 141 ucol_close(newUCA); 142 udata_close(result); 143 } 144 } 145 else { 146 udata_close(result); 147 } 148 } 149 return _staticUCA; 150} 151 152U_CAPI void U_EXPORT2 153ucol_forgetUCA(void) 154{ 155 _staticUCA = NULL; 156 UCA_DATA_MEM = NULL; 157} 158 159/****************************************************************************/ 160/* Following are the open/close functions */ 161/* */ 162/****************************************************************************/ 163static UCollator* 164tryOpeningFromRules(UResourceBundle *collElem, UErrorCode *status) { 165 int32_t rulesLen = 0; 166 const UChar *rules = ures_getStringByKey(collElem, "Sequence", &rulesLen, status); 167 return ucol_openRules(rules, rulesLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, status); 168} 169 170 171// API in ucol_imp.h 172 173U_CFUNC UCollator* 174ucol_open_internal(const char *loc, 175 UErrorCode *status) 176{ 177 UErrorCode intStatus = U_ZERO_ERROR; 178 const UCollator* UCA = ucol_initUCA(status); 179 180 /* New version */ 181 if(U_FAILURE(*status)) return 0; 182 183 184 185 UCollator *result = NULL; 186 UResourceBundle *b = ures_open(U_ICUDATA_COLL, loc, status); 187 188 /* we try to find stuff from keyword */ 189 UResourceBundle *collations = ures_getByKey(b, "collations", NULL, status); 190 UResourceBundle *collElem = NULL; 191 char keyBuffer[256]; 192 // if there is a keyword, we pick it up and try to get elements 193 if(!uloc_getKeywordValue(loc, "collation", keyBuffer, 256, status) || 194 !uprv_strcmp(keyBuffer,"default")) { /* Treat 'zz@collation=default' as 'zz'. */ 195 // no keyword. we try to find the default setting, which will give us the keyword value 196 intStatus = U_ZERO_ERROR; 197 // finding default value does not affect collation fallback status 198 UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", NULL, &intStatus); 199 if(U_SUCCESS(intStatus)) { 200 int32_t defaultKeyLen = 0; 201 const UChar *defaultKey = ures_getString(defaultColl, &defaultKeyLen, &intStatus); 202 u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen); 203 keyBuffer[defaultKeyLen] = 0; 204 } else { 205 *status = U_INTERNAL_PROGRAM_ERROR; 206 return NULL; 207 } 208 ures_close(defaultColl); 209 } 210 collElem = ures_getByKeyWithFallback(collations, keyBuffer, collations, status); 211 collations = NULL; // We just reused the collations object as collElem. 212 213 UResourceBundle *binary = NULL; 214 UResourceBundle *reorderRes = NULL; 215 216 if(*status == U_MISSING_RESOURCE_ERROR) { /* We didn't find the tailoring data, we fallback to the UCA */ 217 *status = U_USING_DEFAULT_WARNING; 218 result = ucol_initCollator(UCA->image, result, UCA, status); 219 if (U_FAILURE(*status)) { 220 goto clean; 221 } 222 // if we use UCA, real locale is root 223 ures_close(b); 224 b = ures_open(U_ICUDATA_COLL, "", status); 225 ures_close(collElem); 226 collElem = ures_open(U_ICUDATA_COLL, "", status); 227 if(U_FAILURE(*status)) { 228 goto clean; 229 } 230 result->hasRealData = FALSE; 231 } else if(U_SUCCESS(*status)) { 232 intStatus = U_ZERO_ERROR; 233 234 binary = ures_getByKey(collElem, "%%CollationBin", NULL, &intStatus); 235 236 if(intStatus == U_MISSING_RESOURCE_ERROR) { /* we didn't find the binary image, we should use the rules */ 237 binary = NULL; 238 result = tryOpeningFromRules(collElem, status); 239 if(U_FAILURE(*status)) { 240 goto clean; 241 } 242 } else if(U_SUCCESS(intStatus)) { /* otherwise, we'll pick a collation data that exists */ 243 int32_t len = 0; 244 const uint8_t *inData = ures_getBinary(binary, &len, status); 245 if(U_FAILURE(*status)) { 246 goto clean; 247 } 248 UCATableHeader *colData = (UCATableHeader *)inData; 249 if(uprv_memcmp(colData->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)) != 0 || 250 uprv_memcmp(colData->UCDVersion, UCA->image->UCDVersion, sizeof(UVersionInfo)) != 0 || 251 colData->version[0] != UCOL_BUILDER_VERSION) 252 { 253 *status = U_DIFFERENT_UCA_VERSION; 254 result = tryOpeningFromRules(collElem, status); 255 } else { 256 if(U_FAILURE(*status)){ 257 goto clean; 258 } 259 if((uint32_t)len > (paddedsize(sizeof(UCATableHeader)) + paddedsize(sizeof(UColOptionSet)))) { 260 result = ucol_initCollator((const UCATableHeader *)inData, result, UCA, status); 261 if(U_FAILURE(*status)){ 262 goto clean; 263 } 264 result->hasRealData = TRUE; 265 } else { 266 result = ucol_initCollator(UCA->image, result, UCA, status); 267 ucol_setOptionsFromHeader(result, (UColOptionSet *)(inData+((const UCATableHeader *)inData)->options), status); 268 if(U_FAILURE(*status)){ 269 goto clean; 270 } 271 result->hasRealData = FALSE; 272 } 273 result->freeImageOnClose = FALSE; 274 275 reorderRes = ures_getByKey(collElem, "%%ReorderCodes", NULL, &intStatus); 276 if (U_SUCCESS(intStatus)) { 277 int32_t reorderCodesLen = 0; 278 const int32_t* reorderCodes = ures_getIntVector(reorderRes, &reorderCodesLen, status); 279 if (reorderCodesLen > 0) { 280 ucol_setReorderCodes(result, reorderCodes, reorderCodesLen, status); 281 // copy the reorder codes into the default reorder codes 282 result->defaultReorderCodesLength = result->reorderCodesLength; 283 result->defaultReorderCodes = (int32_t*) uprv_malloc(result->defaultReorderCodesLength * sizeof(int32_t)); 284 uprv_memcpy(result->defaultReorderCodes, result->reorderCodes, result->defaultReorderCodesLength * sizeof(int32_t)); 285 result->freeDefaultReorderCodesOnClose = TRUE; 286 } 287 if (U_FAILURE(*status)) { 288 goto clean; 289 } 290 } 291 } 292 293 } else { // !U_SUCCESS(binaryStatus) 294 if(U_SUCCESS(*status)) { 295 *status = intStatus; // propagate underlying error 296 } 297 goto clean; 298 } 299 intStatus = U_ZERO_ERROR; 300 result->rules = ures_getStringByKey(collElem, "Sequence", &result->rulesLength, &intStatus); 301 result->freeRulesOnClose = FALSE; 302 } else { /* There is another error, and we're just gonna clean up */ 303 goto clean; 304 } 305 306 intStatus = U_ZERO_ERROR; 307 result->ucaRules = ures_getStringByKey(b,"UCARules",NULL,&intStatus); 308 309 if(loc == NULL) { 310 loc = ures_getLocaleByType(b, ULOC_ACTUAL_LOCALE, status); 311 } 312 result->requestedLocale = uprv_strdup(loc); 313 /* test for NULL */ 314 if (result->requestedLocale == NULL) { 315 *status = U_MEMORY_ALLOCATION_ERROR; 316 goto clean; 317 } 318 loc = ures_getLocaleByType(collElem, ULOC_ACTUAL_LOCALE, status); 319 result->actualLocale = uprv_strdup(loc); 320 /* test for NULL */ 321 if (result->actualLocale == NULL) { 322 *status = U_MEMORY_ALLOCATION_ERROR; 323 goto clean; 324 } 325 loc = ures_getLocaleByType(b, ULOC_ACTUAL_LOCALE, status); 326 result->validLocale = uprv_strdup(loc); 327 /* test for NULL */ 328 if (result->validLocale == NULL) { 329 *status = U_MEMORY_ALLOCATION_ERROR; 330 goto clean; 331 } 332 333 ures_close(b); 334 ures_close(collElem); 335 ures_close(binary); 336 ures_close(reorderRes); 337 return result; 338 339clean: 340 ures_close(b); 341 ures_close(collElem); 342 ures_close(binary); 343 ures_close(reorderRes); 344 ucol_close(result); 345 return NULL; 346} 347 348U_CAPI UCollator* 349ucol_open(const char *loc, 350 UErrorCode *status) 351{ 352 U_NAMESPACE_USE 353 354 UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN); 355 UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc); 356 UCollator *result = NULL; 357 358#if !UCONFIG_NO_SERVICE 359 result = Collator::createUCollator(loc, status); 360 if (result == NULL) 361#endif 362 { 363 result = ucol_open_internal(loc, status); 364 } 365 UTRACE_EXIT_PTR_STATUS(result, *status); 366 return result; 367} 368 369 370UCollator* 371ucol_openRulesForImport( const UChar *rules, 372 int32_t rulesLength, 373 UColAttributeValue normalizationMode, 374 UCollationStrength strength, 375 UParseError *parseError, 376 GetCollationRulesFunction importFunc, 377 void* context, 378 UErrorCode *status) 379{ 380 UColTokenParser src; 381 UColAttributeValue norm; 382 UParseError tErr; 383 384 if(status == NULL || U_FAILURE(*status)){ 385 return 0; 386 } 387 388 if(rules == NULL || rulesLength < -1) { 389 *status = U_ILLEGAL_ARGUMENT_ERROR; 390 return 0; 391 } 392 393 if(rulesLength == -1) { 394 rulesLength = u_strlen(rules); 395 } 396 397 if(parseError == NULL){ 398 parseError = &tErr; 399 } 400 401 switch(normalizationMode) { 402 case UCOL_OFF: 403 case UCOL_ON: 404 case UCOL_DEFAULT: 405 norm = normalizationMode; 406 break; 407 default: 408 *status = U_ILLEGAL_ARGUMENT_ERROR; 409 return 0; 410 } 411 412 UCollator *result = NULL; 413 UCATableHeader *table = NULL; 414 UCollator *UCA = ucol_initUCA(status); 415 416 if(U_FAILURE(*status)){ 417 return NULL; 418 } 419 420 ucol_tok_initTokenList(&src, rules, rulesLength, UCA, importFunc, context, status); 421 ucol_tok_assembleTokenList(&src,parseError, status); 422 423 if(U_FAILURE(*status)) { 424 /* if status is U_ILLEGAL_ARGUMENT_ERROR, src->current points at the offending option */ 425 /* if status is U_INVALID_FORMAT_ERROR, src->current points after the problematic part of the rules */ 426 /* so something might be done here... or on lower level */ 427#ifdef UCOL_DEBUG 428 if(*status == U_ILLEGAL_ARGUMENT_ERROR) { 429 fprintf(stderr, "bad option starting at offset %i\n", (int)(src.current-src.source)); 430 } else { 431 fprintf(stderr, "invalid rule just before offset %i\n", (int)(src.current-src.source)); 432 } 433#endif 434 goto cleanup; 435 } 436 437 if(src.resultLen > 0 || src.removeSet != NULL) { /* we have a set of rules, let's make something of it */ 438 /* also, if we wanted to remove some contractions, we should make a tailoring */ 439 table = ucol_assembleTailoringTable(&src, status); 440 if(U_SUCCESS(*status)) { 441 // builder version 442 table->version[0] = UCOL_BUILDER_VERSION; 443 // no tailoring information on this level 444 table->version[1] = table->version[2] = table->version[3] = 0; 445 // set UCD version 446 u_getUnicodeVersion(table->UCDVersion); 447 // set UCA version 448 uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo)); 449 result = ucol_initCollator(table, 0, UCA, status); 450 if (U_FAILURE(*status)) { 451 goto cleanup; 452 } 453 result->hasRealData = TRUE; 454 result->freeImageOnClose = TRUE; 455 } 456 } else { /* no rules, but no error either */ 457 // must be only options 458 // We will init the collator from UCA 459 result = ucol_initCollator(UCA->image, 0, UCA, status); 460 // Check for null result 461 if (U_FAILURE(*status)) { 462 goto cleanup; 463 } 464 // And set only the options 465 UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet)); 466 /* test for NULL */ 467 if (opts == NULL) { 468 *status = U_MEMORY_ALLOCATION_ERROR; 469 goto cleanup; 470 } 471 uprv_memcpy(opts, src.opts, sizeof(UColOptionSet)); 472 ucol_setOptionsFromHeader(result, opts, status); 473 ucol_setReorderCodesFromParser(result, &src, status); 474 result->freeOptionsOnClose = TRUE; 475 result->hasRealData = FALSE; 476 result->freeImageOnClose = FALSE; 477 } 478 479 if(U_SUCCESS(*status)) { 480 UChar *newRules; 481 result->dataVersion[0] = UCOL_BUILDER_VERSION; 482 if(rulesLength > 0) { 483 newRules = (UChar *)uprv_malloc((rulesLength+1)*U_SIZEOF_UCHAR); 484 /* test for NULL */ 485 if (newRules == NULL) { 486 *status = U_MEMORY_ALLOCATION_ERROR; 487 goto cleanup; 488 } 489 uprv_memcpy(newRules, rules, rulesLength*U_SIZEOF_UCHAR); 490 newRules[rulesLength]=0; 491 result->rules = newRules; 492 result->rulesLength = rulesLength; 493 result->freeRulesOnClose = TRUE; 494 } 495 result->ucaRules = NULL; 496 result->actualLocale = NULL; 497 result->validLocale = NULL; 498 result->requestedLocale = NULL; 499 ucol_buildPermutationTable(result, status); 500 ucol_setAttribute(result, UCOL_STRENGTH, strength, status); 501 ucol_setAttribute(result, UCOL_NORMALIZATION_MODE, norm, status); 502 } else { 503cleanup: 504 if(result != NULL) { 505 ucol_close(result); 506 } else { 507 if(table != NULL) { 508 uprv_free(table); 509 } 510 } 511 result = NULL; 512 } 513 514 ucol_tok_closeTokenList(&src); 515 516 return result; 517} 518 519U_CAPI UCollator* U_EXPORT2 520ucol_openRules( const UChar *rules, 521 int32_t rulesLength, 522 UColAttributeValue normalizationMode, 523 UCollationStrength strength, 524 UParseError *parseError, 525 UErrorCode *status) 526{ 527 return ucol_openRulesForImport(rules, 528 rulesLength, 529 normalizationMode, 530 strength, 531 parseError, 532 ucol_tok_getRulesFromBundle, 533 NULL, 534 status); 535} 536 537U_CAPI int32_t U_EXPORT2 538ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) { 539 UErrorCode status = U_ZERO_ERROR; 540 int32_t len = 0; 541 int32_t UCAlen = 0; 542 const UChar* ucaRules = 0; 543 const UChar *rules = ucol_getRules(coll, &len); 544 if(delta == UCOL_FULL_RULES) { 545 /* take the UCA rules and append real rules at the end */ 546 /* UCA rules will be probably coming from the root RB */ 547 ucaRules = coll->ucaRules; 548 if (ucaRules) { 549 UCAlen = u_strlen(ucaRules); 550 } 551 /* 552 ucaRules = ures_getStringByKey(coll->rb,"UCARules",&UCAlen,&status); 553 UResourceBundle* cresb = ures_getByKeyWithFallback(coll->rb, "collations", NULL, &status); 554 UResourceBundle* uca = ures_getByKeyWithFallback(cresb, "UCA", NULL, &status); 555 ucaRules = ures_getStringByKey(uca,"Sequence",&UCAlen,&status); 556 ures_close(uca); 557 ures_close(cresb); 558 */ 559 } 560 if(U_FAILURE(status)) { 561 return 0; 562 } 563 if(buffer!=0 && bufferLen>0){ 564 *buffer=0; 565 if(UCAlen > 0) { 566 u_memcpy(buffer, ucaRules, uprv_min(UCAlen, bufferLen)); 567 } 568 if(len > 0 && bufferLen > UCAlen) { 569 u_memcpy(buffer+UCAlen, rules, uprv_min(len, bufferLen-UCAlen)); 570 } 571 } 572 return u_terminateUChars(buffer, bufferLen, len+UCAlen, &status); 573} 574 575static const UChar _NUL = 0; 576 577U_CAPI const UChar* U_EXPORT2 578ucol_getRules( const UCollator *coll, 579 int32_t *length) 580{ 581 if(coll->rules != NULL) { 582 *length = coll->rulesLength; 583 return coll->rules; 584 } 585 else { 586 *length = 0; 587 return &_NUL; 588 } 589} 590 591U_CAPI UBool U_EXPORT2 592ucol_equals(const UCollator *source, const UCollator *target) { 593 UErrorCode status = U_ZERO_ERROR; 594 // if pointers are equal, collators are equal 595 if(source == target) { 596 return TRUE; 597 } 598 int32_t i = 0, j = 0; 599 // if any of attributes are different, collators are not equal 600 for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) { 601 if(ucol_getAttribute(source, (UColAttribute)i, &status) != ucol_getAttribute(target, (UColAttribute)i, &status) || U_FAILURE(status)) { 602 return FALSE; 603 } 604 } 605 if (source->reorderCodesLength != target->reorderCodesLength){ 606 return FALSE; 607 } 608 for (i = 0; i < source->reorderCodesLength; i++) { 609 if(source->reorderCodes[i] != target->reorderCodes[i]) { 610 return FALSE; 611 } 612 } 613 614 int32_t sourceRulesLen = 0, targetRulesLen = 0; 615 const UChar *sourceRules = ucol_getRules(source, &sourceRulesLen); 616 const UChar *targetRules = ucol_getRules(target, &targetRulesLen); 617 618 if(sourceRulesLen == targetRulesLen && u_strncmp(sourceRules, targetRules, sourceRulesLen) == 0) { 619 // all the attributes are equal and the rules are equal - collators are equal 620 return(TRUE); 621 } 622 // hard part, need to construct tree from rules and see if they yield the same tailoring 623 UBool result = TRUE; 624 UParseError parseError; 625 UColTokenParser sourceParser, targetParser; 626 int32_t sourceListLen = 0, targetListLen = 0; 627 ucol_tok_initTokenList(&sourceParser, sourceRules, sourceRulesLen, source->UCA, ucol_tok_getRulesFromBundle, NULL, &status); 628 ucol_tok_initTokenList(&targetParser, targetRules, targetRulesLen, target->UCA, ucol_tok_getRulesFromBundle, NULL, &status); 629 sourceListLen = ucol_tok_assembleTokenList(&sourceParser, &parseError, &status); 630 targetListLen = ucol_tok_assembleTokenList(&targetParser, &parseError, &status); 631 632 if(sourceListLen != targetListLen) { 633 // different number of resets 634 result = FALSE; 635 } else { 636 UColToken *sourceReset = NULL, *targetReset = NULL; 637 UChar *sourceResetString = NULL, *targetResetString = NULL; 638 int32_t sourceStringLen = 0, targetStringLen = 0; 639 for(i = 0; i < sourceListLen; i++) { 640 sourceReset = sourceParser.lh[i].reset; 641 sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF); 642 sourceStringLen = sourceReset->source >> 24; 643 for(j = 0; j < sourceListLen; j++) { 644 targetReset = targetParser.lh[j].reset; 645 targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF); 646 targetStringLen = targetReset->source >> 24; 647 if(sourceStringLen == targetStringLen && (u_strncmp(sourceResetString, targetResetString, sourceStringLen) == 0)) { 648 sourceReset = sourceParser.lh[i].first; 649 targetReset = targetParser.lh[j].first; 650 while(sourceReset != NULL && targetReset != NULL) { 651 sourceResetString = sourceParser.source+(sourceReset->source & 0xFFFFFF); 652 sourceStringLen = sourceReset->source >> 24; 653 targetResetString = targetParser.source+(targetReset->source & 0xFFFFFF); 654 targetStringLen = targetReset->source >> 24; 655 if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) { 656 result = FALSE; 657 goto returnResult; 658 } 659 // probably also need to check the expansions 660 if(sourceReset->expansion) { 661 if(!targetReset->expansion) { 662 result = FALSE; 663 goto returnResult; 664 } else { 665 // compare expansions 666 sourceResetString = sourceParser.source+(sourceReset->expansion& 0xFFFFFF); 667 sourceStringLen = sourceReset->expansion >> 24; 668 targetResetString = targetParser.source+(targetReset->expansion & 0xFFFFFF); 669 targetStringLen = targetReset->expansion >> 24; 670 if(sourceStringLen != targetStringLen || (u_strncmp(sourceResetString, targetResetString, sourceStringLen) != 0)) { 671 result = FALSE; 672 goto returnResult; 673 } 674 } 675 } else { 676 if(targetReset->expansion) { 677 result = FALSE; 678 goto returnResult; 679 } 680 } 681 sourceReset = sourceReset->next; 682 targetReset = targetReset->next; 683 } 684 if(sourceReset != targetReset) { // at least one is not NULL 685 // there are more tailored elements in one list 686 result = FALSE; 687 goto returnResult; 688 } 689 690 691 break; 692 } 693 } 694 // couldn't find the reset anchor, so the collators are not equal 695 if(j == sourceListLen) { 696 result = FALSE; 697 goto returnResult; 698 } 699 } 700 } 701 702returnResult: 703 ucol_tok_closeTokenList(&sourceParser); 704 ucol_tok_closeTokenList(&targetParser); 705 return result; 706 707} 708 709U_CAPI int32_t U_EXPORT2 710ucol_getDisplayName( const char *objLoc, 711 const char *dispLoc, 712 UChar *result, 713 int32_t resultLength, 714 UErrorCode *status) 715{ 716 U_NAMESPACE_USE 717 718 if(U_FAILURE(*status)) return -1; 719 UnicodeString dst; 720 if(!(result==NULL && resultLength==0)) { 721 // NULL destination for pure preflighting: empty dummy string 722 // otherwise, alias the destination buffer 723 dst.setTo(result, 0, resultLength); 724 } 725 Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst); 726 return dst.extract(result, resultLength, *status); 727} 728 729U_CAPI const char* U_EXPORT2 730ucol_getAvailable(int32_t index) 731{ 732 int32_t count = 0; 733 const Locale *loc = Collator::getAvailableLocales(count); 734 if (loc != NULL && index < count) { 735 return loc[index].getName(); 736 } 737 return NULL; 738} 739 740U_CAPI int32_t U_EXPORT2 741ucol_countAvailable() 742{ 743 int32_t count = 0; 744 Collator::getAvailableLocales(count); 745 return count; 746} 747 748#if !UCONFIG_NO_SERVICE 749U_CAPI UEnumeration* U_EXPORT2 750ucol_openAvailableLocales(UErrorCode *status) { 751 U_NAMESPACE_USE 752 753 // This is a wrapper over Collator::getAvailableLocales() 754 if (U_FAILURE(*status)) { 755 return NULL; 756 } 757 StringEnumeration *s = Collator::getAvailableLocales(); 758 if (s == NULL) { 759 *status = U_MEMORY_ALLOCATION_ERROR; 760 return NULL; 761 } 762 return uenum_openFromStringEnumeration(s, status); 763} 764#endif 765 766// Note: KEYWORDS[0] != RESOURCE_NAME - alan 767 768static const char RESOURCE_NAME[] = "collations"; 769 770static const char* const KEYWORDS[] = { "collation" }; 771 772#define KEYWORD_COUNT (sizeof(KEYWORDS)/sizeof(KEYWORDS[0])) 773 774U_CAPI UEnumeration* U_EXPORT2 775ucol_getKeywords(UErrorCode *status) { 776 UEnumeration *result = NULL; 777 if (U_SUCCESS(*status)) { 778 return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status); 779 } 780 return result; 781} 782 783U_CAPI UEnumeration* U_EXPORT2 784ucol_getKeywordValues(const char *keyword, UErrorCode *status) { 785 if (U_FAILURE(*status)) { 786 return NULL; 787 } 788 // hard-coded to accept exactly one collation keyword 789 // modify if additional collation keyword is added later 790 if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0) 791 { 792 *status = U_ILLEGAL_ARGUMENT_ERROR; 793 return NULL; 794 } 795 return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status); 796} 797 798static const UEnumeration defaultKeywordValues = { 799 NULL, 800 NULL, 801 ulist_close_keyword_values_iterator, 802 ulist_count_keyword_values, 803 uenum_unextDefault, 804 ulist_next_keyword_value, 805 ulist_reset_keyword_values_iterator 806}; 807 808#include <stdio.h> 809 810U_CAPI UEnumeration* U_EXPORT2 811ucol_getKeywordValuesForLocale(const char* /*key*/, const char* locale, 812 UBool /*commonlyUsed*/, UErrorCode* status) { 813 /* Get the locale base name. */ 814 char localeBuffer[ULOC_FULLNAME_CAPACITY] = ""; 815 uloc_getBaseName(locale, localeBuffer, sizeof(localeBuffer), status); 816 817 /* Create the 2 lists 818 * -values is the temp location for the keyword values 819 * -results hold the actual list used by the UEnumeration object 820 */ 821 UList *values = ulist_createEmptyList(status); 822 UList *results = ulist_createEmptyList(status); 823 UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); 824 if (U_FAILURE(*status) || en == NULL) { 825 if (en == NULL) { 826 *status = U_MEMORY_ALLOCATION_ERROR; 827 } else { 828 uprv_free(en); 829 } 830 ulist_deleteList(values); 831 ulist_deleteList(results); 832 return NULL; 833 } 834 835 memcpy(en, &defaultKeywordValues, sizeof(UEnumeration)); 836 en->context = results; 837 838 /* Open the resource bundle for collation with the given locale. */ 839 UResourceBundle bundle, collations, collres, defres; 840 ures_initStackObject(&bundle); 841 ures_initStackObject(&collations); 842 ures_initStackObject(&collres); 843 ures_initStackObject(&defres); 844 845 ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status); 846 847 while (U_SUCCESS(*status)) { 848 ures_getByKey(&bundle, RESOURCE_NAME, &collations, status); 849 ures_resetIterator(&collations); 850 while (U_SUCCESS(*status) && ures_hasNext(&collations)) { 851 ures_getNextResource(&collations, &collres, status); 852 const char *key = ures_getKey(&collres); 853 /* If the key is default, get the string and store it in results list only 854 * if results list is empty. 855 */ 856 if (uprv_strcmp(key, "default") == 0) { 857 if (ulist_getListSize(results) == 0) { 858 char *defcoll = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY); 859 int32_t defcollLength = ULOC_KEYWORDS_CAPACITY; 860 861 ures_getNextResource(&collres, &defres, status); 862#if U_CHARSET_FAMILY==U_ASCII_FAMILY 863 /* optimize - use the utf-8 string */ 864 ures_getUTF8String(&defres, defcoll, &defcollLength, TRUE, status); 865#else 866 { 867 const UChar* defString = ures_getString(&defres, &defcollLength, status); 868 if(U_SUCCESS(*status)) { 869 if(defcollLength+1 > ULOC_KEYWORDS_CAPACITY) { 870 *status = U_BUFFER_OVERFLOW_ERROR; 871 } else { 872 u_UCharsToChars(defString, defcoll, defcollLength+1); 873 } 874 } 875 } 876#endif 877 878 ulist_addItemBeginList(results, defcoll, TRUE, status); 879 } 880 } else { 881 ulist_addItemEndList(values, key, FALSE, status); 882 } 883 } 884 885 /* If the locale is "" this is root so exit. */ 886 if (uprv_strlen(localeBuffer) == 0) { 887 break; 888 } 889 /* Get the parent locale and open a new resource bundle. */ 890 uloc_getParent(localeBuffer, localeBuffer, sizeof(localeBuffer), status); 891 ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status); 892 } 893 894 ures_close(&defres); 895 ures_close(&collres); 896 ures_close(&collations); 897 ures_close(&bundle); 898 899 if (U_SUCCESS(*status)) { 900 char *value = NULL; 901 ulist_resetList(values); 902 while ((value = (char *)ulist_getNext(values)) != NULL) { 903 if (!ulist_containsString(results, value, (int32_t)uprv_strlen(value))) { 904 ulist_addItemEndList(results, value, FALSE, status); 905 if (U_FAILURE(*status)) { 906 break; 907 } 908 } 909 } 910 } 911 912 ulist_deleteList(values); 913 914 if (U_FAILURE(*status)){ 915 uenum_close(en); 916 en = NULL; 917 } else { 918 ulist_resetList(results); 919 } 920 921 return en; 922} 923 924U_CAPI int32_t U_EXPORT2 925ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, 926 const char* keyword, const char* locale, 927 UBool* isAvailable, UErrorCode* status) 928{ 929 // N.B.: Resource name is "collations" but keyword is "collation" 930 return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL, 931 "collations", keyword, locale, 932 isAvailable, TRUE, status); 933} 934 935/* returns the locale name the collation data comes from */ 936U_CAPI const char * U_EXPORT2 937ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { 938 return ucol_getLocaleByType(coll, type, status); 939} 940 941U_CAPI const char * U_EXPORT2 942ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { 943 const char *result = NULL; 944 if(status == NULL || U_FAILURE(*status)) { 945 return NULL; 946 } 947 UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE); 948 UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll); 949 950 switch(type) { 951 case ULOC_ACTUAL_LOCALE: 952 result = coll->actualLocale; 953 break; 954 case ULOC_VALID_LOCALE: 955 result = coll->validLocale; 956 break; 957 case ULOC_REQUESTED_LOCALE: 958 result = coll->requestedLocale; 959 break; 960 default: 961 *status = U_ILLEGAL_ARGUMENT_ERROR; 962 } 963 UTRACE_DATA1(UTRACE_INFO, "result = %s", result); 964 UTRACE_EXIT_STATUS(*status); 965 return result; 966} 967 968U_CFUNC void U_EXPORT2 969ucol_setReqValidLocales(UCollator *coll, char *requestedLocaleToAdopt, char *validLocaleToAdopt, char *actualLocaleToAdopt) 970{ 971 if (coll) { 972 if (coll->validLocale) { 973 uprv_free(coll->validLocale); 974 } 975 coll->validLocale = validLocaleToAdopt; 976 if (coll->requestedLocale) { // should always have 977 uprv_free(coll->requestedLocale); 978 } 979 coll->requestedLocale = requestedLocaleToAdopt; 980 if (coll->actualLocale) { 981 uprv_free(coll->actualLocale); 982 } 983 coll->actualLocale = actualLocaleToAdopt; 984 } 985} 986 987U_CAPI USet * U_EXPORT2 988ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) 989{ 990 U_NAMESPACE_USE 991 992 if(status == NULL || U_FAILURE(*status)) { 993 return NULL; 994 } 995 if(coll == NULL || coll->UCA == NULL) { 996 *status = U_ILLEGAL_ARGUMENT_ERROR; 997 return NULL; 998 } 999 UParseError parseError; 1000 UColTokenParser src; 1001 int32_t rulesLen = 0; 1002 const UChar *rules = ucol_getRules(coll, &rulesLen); 1003 UBool startOfRules = TRUE; 1004 // we internally use the C++ class, for the following reasons: 1005 // 1. we need to utilize canonical iterator, which is a C++ only class 1006 // 2. canonical iterator returns UnicodeStrings - USet cannot take them 1007 // 3. USet is internally really UnicodeSet, C is just a wrapper 1008 UnicodeSet *tailored = new UnicodeSet(); 1009 UnicodeString pattern; 1010 UnicodeString empty; 1011 CanonicalIterator it(empty, *status); 1012 1013 1014 // The idea is to tokenize the rule set. For each non-reset token, 1015 // we add all the canonicaly equivalent FCD sequences 1016 ucol_tok_initTokenList(&src, rules, rulesLen, coll->UCA, ucol_tok_getRulesFromBundle, NULL, status); 1017 while (ucol_tok_parseNextToken(&src, startOfRules, &parseError, status) != NULL) { 1018 startOfRules = FALSE; 1019 if(src.parsedToken.strength != UCOL_TOK_RESET) { 1020 const UChar *stuff = src.source+(src.parsedToken.charsOffset); 1021 it.setSource(UnicodeString(stuff, src.parsedToken.charsLen), *status); 1022 pattern = it.next(); 1023 while(!pattern.isBogus()) { 1024 if(Normalizer::quickCheck(pattern, UNORM_FCD, *status) != UNORM_NO) { 1025 tailored->add(pattern); 1026 } 1027 pattern = it.next(); 1028 } 1029 } 1030 } 1031 ucol_tok_closeTokenList(&src); 1032 return (USet *)tailored; 1033} 1034 1035/* 1036 * Collation Reordering 1037 */ 1038 1039void ucol_setReorderCodesFromParser(UCollator *coll, UColTokenParser *parser, UErrorCode *status) { 1040 if (U_FAILURE(*status)) { 1041 return; 1042 } 1043 1044 if (parser->reorderCodesLength == 0 || parser->reorderCodes == NULL) { 1045 return; 1046 } 1047 1048 coll->reorderCodesLength = 0; 1049 if (coll->reorderCodes != NULL && coll->freeReorderCodesOnClose == TRUE) { 1050 uprv_free(coll->reorderCodes); 1051 } 1052 1053 if (coll->defaultReorderCodes != NULL && coll->freeDefaultReorderCodesOnClose == TRUE) { 1054 uprv_free(coll->defaultReorderCodes); 1055 } 1056 coll->defaultReorderCodesLength = parser->reorderCodesLength; 1057 coll->defaultReorderCodes = (int32_t*) uprv_malloc(coll->defaultReorderCodesLength * sizeof(int32_t)); 1058 if (coll->defaultReorderCodes == NULL) { 1059 *status = U_MEMORY_ALLOCATION_ERROR; 1060 return; 1061 } 1062 uprv_memcpy(coll->defaultReorderCodes, parser->reorderCodes, coll->defaultReorderCodesLength * sizeof(int32_t)); 1063 coll->freeDefaultReorderCodesOnClose = TRUE; 1064 1065 coll->reorderCodesLength = parser->reorderCodesLength; 1066 coll->reorderCodes = (int32_t*) uprv_malloc(coll->reorderCodesLength * sizeof(int32_t)); 1067 if (coll->reorderCodes == NULL) { 1068 *status = U_MEMORY_ALLOCATION_ERROR; 1069 return; 1070 } 1071 uprv_memcpy(coll->reorderCodes, parser->reorderCodes, coll->reorderCodesLength * sizeof(int32_t)); 1072 coll->freeReorderCodesOnClose = TRUE; 1073} 1074 1075/* 1076 * Data is stored in the reorder code to lead byte table as: 1077 * index count - unsigned short (2 bytes) - number of index entries 1078 * data size - unsigned short (2 bytes) - number of unsigned short data elements 1079 * index[index count] - array of 2 unsigned shorts (4 bytes each entry) 1080 * - reorder code, offset 1081 * - index is sorted by reorder code 1082 * - if an offset has the high bit set then it is not an offset but a single data entry 1083 * once the high bit is stripped off 1084 * data[data size] - array of unsigned short (2 bytes each entry) 1085 * - the data is an usigned short count followed by count number 1086 * of lead bytes stored in an unsigned short 1087 */ 1088U_CFUNC int U_EXPORT2 1089ucol_getLeadBytesForReorderCode(const UCollator *uca, int reorderCode, uint16_t* returnLeadBytes, int returnCapacity) { 1090 uint16_t reorderCodeIndexLength = *((uint16_t*) ((uint8_t *)uca->image + uca->image->scriptToLeadByte)); 1091 uint16_t* reorderCodeIndex = (uint16_t*) ((uint8_t *)uca->image + uca->image->scriptToLeadByte + 2 *sizeof(uint16_t)); 1092 1093 // reorder code index is 2 uint16_t's - reorder code + offset 1094 for (int i = 0; i < reorderCodeIndexLength; i++) { 1095 if (reorderCode == reorderCodeIndex[i*2]) { 1096 uint16_t dataOffset = reorderCodeIndex[(i*2) + 1]; 1097 if ((dataOffset & 0x8000) == 0x8000) { 1098 // offset isn't offset but instead is a single data element 1099 if (returnCapacity >= 1) { 1100 returnLeadBytes[0] = dataOffset & ~0x8000; 1101 return 1; 1102 } 1103 return 0; 1104 } 1105 uint16_t* dataOffsetBase = (uint16_t*) ((uint8_t *)reorderCodeIndex + reorderCodeIndexLength * (2 * sizeof(uint16_t))); 1106 uint16_t leadByteCount = *(dataOffsetBase + dataOffset); 1107 leadByteCount = leadByteCount > returnCapacity ? returnCapacity : leadByteCount; 1108 uprv_memcpy(returnLeadBytes, dataOffsetBase + dataOffset + 1, leadByteCount * sizeof(uint16_t)); 1109 return leadByteCount; 1110 } 1111 } 1112 return 0; 1113} 1114 1115/* 1116 * Data is stored in the lead byte to reorder code table as: 1117 * index count - unsigned short (2 bytes) - number of index entries 1118 * data size - unsigned short (2 bytes) - number of unsigned short data elements 1119 * index[index count] - array of unsigned short (2 bytes each entry) 1120 * - index is sorted by lead byte 1121 * - if an index has the high bit set then it is not an index but a single data entry 1122 * once the high bit is stripped off 1123 * data[data size] - array of unsigned short (2 bytes each entry) 1124 * - the data is an usigned short count followed by count number of reorder codes 1125 */ 1126U_CFUNC int U_EXPORT2 1127ucol_getReorderCodesForLeadByte(const UCollator *uca, int leadByte, int16_t* returnReorderCodes, int returnCapacity) { 1128 uint16_t* leadByteTable = ((uint16_t*) ((uint8_t *)uca->image + uca->image->leadByteToScript)); 1129 uint16_t leadByteIndexLength = *leadByteTable; 1130 if (leadByte >= leadByteIndexLength) { 1131 return 0; 1132 } 1133 uint16_t leadByteIndex = *(leadByteTable + (2 + leadByte)); 1134 1135 if ((leadByteIndex & 0x8000) == 0x8000) { 1136 // offset isn't offset but instead is a single data element 1137 if (returnCapacity >= 1) { 1138 returnReorderCodes[0] = leadByteIndex & ~0x8000; 1139 return 1; 1140 } 1141 return 0; 1142 } 1143 //uint16_t* dataOffsetBase = leadByteTable + (2 + leadByteIndexLength); 1144 uint16_t* reorderCodeData = leadByteTable + (2 + leadByteIndexLength) + leadByteIndex; 1145 uint16_t reorderCodeCount = *reorderCodeData > returnCapacity ? returnCapacity : *reorderCodeData; 1146 uprv_memcpy(returnReorderCodes, reorderCodeData + 1, reorderCodeCount * sizeof(uint16_t)); 1147 return reorderCodeCount; 1148} 1149 1150// used to mark ignorable reorder code slots 1151static const int32_t UCOL_REORDER_CODE_IGNORE = UCOL_REORDER_CODE_LIMIT + 1; 1152 1153U_CFUNC void U_EXPORT2 1154ucol_buildPermutationTable(UCollator *coll, UErrorCode *status) { 1155 uint16_t leadBytesSize = 256; 1156 uint16_t leadBytes[256]; 1157 int32_t internalReorderCodesLength = coll->reorderCodesLength + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST); 1158 int32_t* internalReorderCodes; 1159 1160 // The lowest byte that hasn't been assigned a mapping 1161 int toBottom = 0x03; 1162 // The highest byte that hasn't been assigned a mapping - don't include the special or trailing 1163 int toTop = 0xe4; 1164 1165 // are we filling from the bottom? 1166 bool fromTheBottom = true; 1167 int32_t reorderCodesIndex = -1; 1168 1169 // lead bytes that have alread been assigned to the permutation table 1170 bool newLeadByteUsed[256]; 1171 // permutation table slots that have already been filled 1172 bool permutationSlotFilled[256]; 1173 1174 // nothing to do 1175 if(U_FAILURE(*status) || coll == NULL) { 1176 return; 1177 } 1178 1179 // clear the reordering 1180 if (coll->reorderCodes == NULL || coll->reorderCodesLength == 0 1181 || (coll->reorderCodesLength == 1 && coll->reorderCodes[0] == UCOL_REORDER_CODE_NONE)) { 1182 if (coll->leadBytePermutationTable != NULL) { 1183 if (coll->freeLeadBytePermutationTableOnClose) { 1184 uprv_free(coll->leadBytePermutationTable); 1185 } 1186 coll->leadBytePermutationTable = NULL; 1187 coll->reorderCodesLength = 0; 1188 } 1189 return; 1190 } 1191 1192 // set reordering to the default reordering 1193 if (coll->reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) { 1194 if (coll->reorderCodesLength != 1) { 1195 *status = U_ILLEGAL_ARGUMENT_ERROR; 1196 return; 1197 } 1198 if (coll->freeReorderCodesOnClose == TRUE) { 1199 uprv_free(coll->reorderCodes); 1200 } 1201 coll->reorderCodes = NULL; 1202 1203 if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) { 1204 uprv_free(coll->leadBytePermutationTable); 1205 } 1206 coll->leadBytePermutationTable = NULL; 1207 1208 if (coll->defaultReorderCodesLength == 0) { 1209 return; 1210 } 1211 1212 coll->reorderCodes = (int32_t*)uprv_malloc(coll->defaultReorderCodesLength * sizeof(int32_t)); 1213 coll->freeReorderCodesOnClose = TRUE; 1214 if (coll->reorderCodes == NULL) { 1215 *status = U_MEMORY_ALLOCATION_ERROR; 1216 return; 1217 } 1218 coll->reorderCodesLength = coll->defaultReorderCodesLength; 1219 uprv_memcpy(coll->defaultReorderCodes, coll->reorderCodes, coll->reorderCodesLength * sizeof(int32_t)); 1220 } 1221 1222 if (coll->leadBytePermutationTable == NULL) { 1223 coll->leadBytePermutationTable = (uint8_t*)uprv_malloc(256*sizeof(uint8_t)); 1224 coll->freeLeadBytePermutationTableOnClose = TRUE; 1225 if (coll->leadBytePermutationTable == NULL) { 1226 *status = U_MEMORY_ALLOCATION_ERROR; 1227 return; 1228 } 1229 } 1230 1231 // prefill the reordering codes with the leading entries 1232 internalReorderCodes = (int32_t*)uprv_malloc(internalReorderCodesLength * sizeof(int32_t)); 1233 if (internalReorderCodes == NULL) { 1234 *status = U_MEMORY_ALLOCATION_ERROR; 1235 if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) { 1236 uprv_free(coll->leadBytePermutationTable); 1237 } 1238 coll->leadBytePermutationTable = NULL; 1239 return; 1240 } 1241 1242 for (uint32_t codeIndex = 0; codeIndex < (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST); codeIndex++) { 1243 internalReorderCodes[codeIndex] = UCOL_REORDER_CODE_FIRST + codeIndex; 1244 } 1245 for (int32_t codeIndex = 0; codeIndex < coll->reorderCodesLength; codeIndex++) { 1246 uint32_t reorderCodesCode = coll->reorderCodes[codeIndex]; 1247 internalReorderCodes[codeIndex + (UCOL_REORDER_CODE_LIMIT - UCOL_REORDER_CODE_FIRST)] = reorderCodesCode; 1248 if (reorderCodesCode >= UCOL_REORDER_CODE_FIRST && reorderCodesCode < UCOL_REORDER_CODE_LIMIT) { 1249 internalReorderCodes[reorderCodesCode - UCOL_REORDER_CODE_FIRST] = UCOL_REORDER_CODE_IGNORE; 1250 } 1251 } 1252 1253 for (int i = 0; i < 256; i++) { 1254 if (i < toBottom || i > toTop) { 1255 permutationSlotFilled[i] = true; 1256 newLeadByteUsed[i] = true; 1257 coll->leadBytePermutationTable[i] = i; 1258 } else { 1259 permutationSlotFilled[i] = false; 1260 newLeadByteUsed[i] = false; 1261 coll->leadBytePermutationTable[i] = 0; 1262 } 1263 } 1264 1265 /* Start from the front of the list and place each script we encounter at the 1266 * earliest possible locatation in the permutation table. If we encounter 1267 * UNKNOWN, start processing from the back, and place each script in the last 1268 * possible location. At each step, we also need to make sure that any scripts 1269 * that need to not be moved are copied to their same location in the final table. 1270 */ 1271 for (int reorderCodesCount = 0; reorderCodesCount < internalReorderCodesLength; reorderCodesCount++) { 1272 reorderCodesIndex += fromTheBottom ? 1 : -1; 1273 int32_t next = internalReorderCodes[reorderCodesIndex]; 1274 if (next == UCOL_REORDER_CODE_IGNORE) { 1275 continue; 1276 } 1277 if (next == USCRIPT_UNKNOWN) { 1278 if (fromTheBottom == false) { 1279 // double turnaround 1280 *status = U_ILLEGAL_ARGUMENT_ERROR; 1281 if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) { 1282 uprv_free(coll->leadBytePermutationTable); 1283 } 1284 coll->leadBytePermutationTable = NULL; 1285 coll->reorderCodesLength = 0; 1286 if (internalReorderCodes != NULL) { 1287 uprv_free(internalReorderCodes); 1288 } 1289 return; 1290 } 1291 fromTheBottom = false; 1292 reorderCodesIndex = internalReorderCodesLength; 1293 continue; 1294 } 1295 1296 uint16_t leadByteCount = ucol_getLeadBytesForReorderCode(coll->UCA, next, leadBytes, leadBytesSize); 1297 if (fromTheBottom) { 1298 for (int leadByteIndex = 0; leadByteIndex < leadByteCount; leadByteIndex++) { 1299 // don't place a lead byte twice in the permutation table 1300 if (permutationSlotFilled[leadBytes[leadByteIndex]]) { 1301 // lead byte already used 1302 *status = U_ILLEGAL_ARGUMENT_ERROR; 1303 if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) { 1304 uprv_free(coll->leadBytePermutationTable); 1305 } 1306 coll->leadBytePermutationTable = NULL; 1307 coll->reorderCodesLength = 0; 1308 if (internalReorderCodes != NULL) { 1309 uprv_free(internalReorderCodes); 1310 } 1311 return; 1312 } 1313 1314 coll->leadBytePermutationTable[leadBytes[leadByteIndex]] = toBottom; 1315 newLeadByteUsed[toBottom] = true; 1316 permutationSlotFilled[leadBytes[leadByteIndex]] = true; 1317 toBottom++; 1318 } 1319 } else { 1320 for (int leadByteIndex = leadByteCount - 1; leadByteIndex >= 0; leadByteIndex--) { 1321 // don't place a lead byte twice in the permutation table 1322 if (permutationSlotFilled[leadBytes[leadByteIndex]]) { 1323 // lead byte already used 1324 *status = U_ILLEGAL_ARGUMENT_ERROR; 1325 if (coll->leadBytePermutationTable != NULL && coll->freeLeadBytePermutationTableOnClose == TRUE) { 1326 uprv_free(coll->leadBytePermutationTable); 1327 } 1328 coll->leadBytePermutationTable = NULL; 1329 coll->reorderCodesLength = 0; 1330 if (internalReorderCodes != NULL) { 1331 uprv_free(internalReorderCodes); 1332 } 1333 return; 1334 } 1335 1336 coll->leadBytePermutationTable[leadBytes[leadByteIndex]] = toTop; 1337 newLeadByteUsed[toTop] = true; 1338 permutationSlotFilled[leadBytes[leadByteIndex]] = true; 1339 toTop--; 1340 } 1341 } 1342 } 1343 1344#ifdef REORDER_DEBUG 1345 fprintf(stdout, "\n@@@@ Partial Script Reordering Table\n"); 1346 for (int i = 0; i < 256; i++) { 1347 fprintf(stdout, "\t%02x = %02x\n", i, coll->leadBytePermutationTable[i]); 1348 } 1349 fprintf(stdout, "\n@@@@ Lead Byte Used Table\n"); 1350 for (int i = 0; i < 256; i++) { 1351 fprintf(stdout, "\t%02x = %02x\n", i, newLeadByteUsed[i]); 1352 } 1353 fprintf(stdout, "\n@@@@ Permutation Slot Filled Table\n"); 1354 for (int i = 0; i < 256; i++) { 1355 fprintf(stdout, "\t%02x = %02x\n", i, permutationSlotFilled[i]); 1356 } 1357#endif 1358 1359 /* Copy everything that's left over */ 1360 int reorderCode = 0; 1361 for (int i = 0; i < 256; i++) { 1362 if (!permutationSlotFilled[i]) { 1363 while (reorderCode < 256 && newLeadByteUsed[reorderCode]) { 1364 reorderCode++; 1365 } 1366 coll->leadBytePermutationTable[i] = reorderCode; 1367 permutationSlotFilled[i] = true; 1368 newLeadByteUsed[reorderCode] = true; 1369 } 1370 } 1371 1372#ifdef REORDER_DEBUG 1373 fprintf(stdout, "\n@@@@ Script Reordering Table\n"); 1374 for (int i = 0; i < 256; i++) { 1375 fprintf(stdout, "\t%02x = %02x\n", i, coll->leadBytePermutationTable[i]); 1376 } 1377#endif 1378 1379 if (internalReorderCodes != NULL) { 1380 uprv_free(internalReorderCodes); 1381 } 1382 1383 // force a regen of the latin one table since it is affected by the script reordering 1384 coll->latinOneRegenTable = TRUE; 1385 ucol_updateInternalState(coll, status); 1386} 1387 1388#endif /* #if !UCONFIG_NO_COLLATION */ 1389