1/* 2******************************************************************************* 3* Copyright (C) 1996-2014, International Business Machines 4* Corporation and others. All Rights Reserved. 5******************************************************************************* 6* file name: ucol_res.cpp 7* encoding: US-ASCII 8* tab size: 8 (not used) 9* indentation:4 10* 11* Description: 12* This file contains dependencies that the collation run-time doesn't normally 13* need. This mainly contains resource bundle usage and collation meta information 14* 15* Modification history 16* Date Name Comments 17* 1996-1999 various members of ICU team maintained C API for collation framework 18* 02/16/2001 synwee Added internal method getPrevSpecialCE 19* 03/01/2001 synwee Added maxexpansion functionality. 20* 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant 21* 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp 22* 2012-2014 markus Rewritten in C++ again. 23*/ 24 25#include "unicode/utypes.h" 26 27#if !UCONFIG_NO_COLLATION 28 29#include "unicode/coll.h" 30#include "unicode/localpointer.h" 31#include "unicode/locid.h" 32#include "unicode/tblcoll.h" 33#include "unicode/ucol.h" 34#include "unicode/uloc.h" 35#include "unicode/unistr.h" 36#include "unicode/ures.h" 37#include "cmemory.h" 38#include "cstring.h" 39#include "collationdatareader.h" 40#include "collationroot.h" 41#include "collationtailoring.h" 42#include "putilimp.h" 43#include "uassert.h" 44#include "ucln_in.h" 45#include "ucol_imp.h" 46#include "uenumimp.h" 47#include "ulist.h" 48#include "umutex.h" 49#include "uresimp.h" 50#include "ustrenum.h" 51#include "utracimp.h" 52 53#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 54 55U_NAMESPACE_BEGIN 56 57namespace { 58 59static const UChar *rootRules = NULL; 60static int32_t rootRulesLength = 0; 61static UResourceBundle *rootBundle = NULL; 62static UInitOnce gInitOnce = U_INITONCE_INITIALIZER; 63 64} // namespace 65 66U_CDECL_BEGIN 67 68static UBool U_CALLCONV 69ucol_res_cleanup() { 70 rootRules = NULL; 71 rootRulesLength = 0; 72 ures_close(rootBundle); 73 rootBundle = NULL; 74 gInitOnce.reset(); 75 return TRUE; 76} 77 78U_CDECL_END 79 80void 81CollationLoader::loadRootRules(UErrorCode &errorCode) { 82 if(U_FAILURE(errorCode)) { return; } 83 rootBundle = ures_open(U_ICUDATA_COLL, kRootLocaleName, &errorCode); 84 if(U_FAILURE(errorCode)) { return; } 85 rootRules = ures_getStringByKey(rootBundle, "UCARules", &rootRulesLength, &errorCode); 86 if(U_FAILURE(errorCode)) { 87 ures_close(rootBundle); 88 rootBundle = NULL; 89 return; 90 } 91 ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup); 92} 93 94void 95CollationLoader::appendRootRules(UnicodeString &s) { 96 UErrorCode errorCode = U_ZERO_ERROR; 97 umtx_initOnce(gInitOnce, CollationLoader::loadRootRules, errorCode); 98 if(U_SUCCESS(errorCode)) { 99 s.append(rootRules, rootRulesLength); 100 } 101} 102 103UnicodeString * 104CollationLoader::loadRules(const char *localeID, const char *collationType, UErrorCode &errorCode) { 105 if(U_FAILURE(errorCode)) { return NULL; } 106 U_ASSERT(collationType != NULL && *collationType != 0); 107 // Copy the type for lowercasing. 108 char type[16]; 109 int32_t typeLength = uprv_strlen(collationType); 110 if(typeLength >= LENGTHOF(type)) { 111 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 112 return NULL; 113 } 114 uprv_memcpy(type, collationType, typeLength + 1); 115 T_CString_toLowerCase(type); 116 117 LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_COLL, localeID, &errorCode)); 118 LocalUResourceBundlePointer collations( 119 ures_getByKey(bundle.getAlias(), "collations", NULL, &errorCode)); 120 LocalUResourceBundlePointer data( 121 ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode)); 122 int32_t length; 123 const UChar *s = ures_getStringByKey(data.getAlias(), "Sequence", &length, &errorCode); 124 if(U_FAILURE(errorCode)) { return NULL; } 125 126 // No string pointer aliasing so that we need not hold onto the resource bundle. 127 UnicodeString *rules = new UnicodeString(s, length); 128 if(rules == NULL) { 129 errorCode = U_MEMORY_ALLOCATION_ERROR; 130 return NULL; 131 } 132 return rules; 133} 134 135const CollationTailoring * 136CollationLoader::loadTailoring(const Locale &locale, Locale &validLocale, UErrorCode &errorCode) { 137 const CollationTailoring *root = CollationRoot::getRoot(errorCode); 138 if(U_FAILURE(errorCode)) { return NULL; } 139 const char *name = locale.getName(); 140 if(*name == 0 || uprv_strcmp(name, "root") == 0) { 141 validLocale = Locale::getRoot(); 142 return root; 143 } 144 145 LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_COLL, name, &errorCode)); 146 if(errorCode == U_MISSING_RESOURCE_ERROR) { 147 errorCode = U_USING_DEFAULT_WARNING; 148 validLocale = Locale::getRoot(); 149 return root; 150 } 151 const char *vLocale = ures_getLocaleByType(bundle.getAlias(), ULOC_ACTUAL_LOCALE, &errorCode); 152 if(U_FAILURE(errorCode)) { return NULL; } 153 validLocale = Locale(vLocale); 154 155 // There are zero or more tailorings in the collations table. 156 LocalUResourceBundlePointer collations( 157 ures_getByKey(bundle.getAlias(), "collations", NULL, &errorCode)); 158 if(errorCode == U_MISSING_RESOURCE_ERROR) { 159 errorCode = U_USING_DEFAULT_WARNING; 160 return root; 161 } 162 if(U_FAILURE(errorCode)) { return NULL; } 163 164 // Fetch the collation type from the locale ID and the default type from the data. 165 char type[16]; 166 int32_t typeLength = locale.getKeywordValue("collation", type, LENGTHOF(type) - 1, errorCode); 167 if(U_FAILURE(errorCode)) { 168 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 169 return NULL; 170 } 171 type[typeLength] = 0; // in case of U_NOT_TERMINATED_WARNING 172 char defaultType[16]; 173 { 174 UErrorCode internalErrorCode = U_ZERO_ERROR; 175 LocalUResourceBundlePointer def( 176 ures_getByKeyWithFallback(collations.getAlias(), "default", NULL, 177 &internalErrorCode)); 178 int32_t length; 179 const UChar *s = ures_getString(def.getAlias(), &length, &internalErrorCode); 180 if(U_SUCCESS(internalErrorCode) && length < LENGTHOF(defaultType)) { 181 u_UCharsToChars(s, defaultType, length + 1); 182 } else { 183 uprv_strcpy(defaultType, "standard"); 184 } 185 } 186 if(typeLength == 0 || uprv_strcmp(type, "default") == 0) { 187 uprv_strcpy(type, defaultType); 188 } else { 189 T_CString_toLowerCase(type); 190 } 191 192 // Load the collations/type tailoring, with type fallback. 193 UBool typeFallback = FALSE; 194 LocalUResourceBundlePointer data( 195 ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode)); 196 if(errorCode == U_MISSING_RESOURCE_ERROR && 197 typeLength > 6 && uprv_strncmp(type, "search", 6) == 0) { 198 // fall back from something like "searchjl" to "search" 199 typeFallback = TRUE; 200 type[6] = 0; 201 errorCode = U_ZERO_ERROR; 202 data.adoptInstead( 203 ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode)); 204 } 205 if(errorCode == U_MISSING_RESOURCE_ERROR && uprv_strcmp(type, defaultType) != 0) { 206 // fall back to the default type 207 typeFallback = TRUE; 208 uprv_strcpy(type, defaultType); 209 errorCode = U_ZERO_ERROR; 210 data.adoptInstead( 211 ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode)); 212 } 213 if(errorCode == U_MISSING_RESOURCE_ERROR && uprv_strcmp(type, "standard") != 0) { 214 // fall back to the "standard" type 215 typeFallback = TRUE; 216 uprv_strcpy(type, "standard"); 217 errorCode = U_ZERO_ERROR; 218 data.adoptInstead( 219 ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode)); 220 } 221 if(errorCode == U_MISSING_RESOURCE_ERROR) { 222 errorCode = U_USING_DEFAULT_WARNING; 223 return root; 224 } 225 if(U_FAILURE(errorCode)) { return NULL; } 226 227 LocalPointer<CollationTailoring> t(new CollationTailoring(root->settings)); 228 if(t.isNull() || t->isBogus()) { 229 errorCode = U_MEMORY_ALLOCATION_ERROR; 230 return NULL; 231 } 232 233 // Is this the same as the root collator? If so, then use that instead. 234 const char *actualLocale = ures_getLocaleByType(data.getAlias(), ULOC_ACTUAL_LOCALE, &errorCode); 235 if(U_FAILURE(errorCode)) { return NULL; } 236 if((*actualLocale == 0 || uprv_strcmp(actualLocale, "root") == 0) && 237 uprv_strcmp(type, "standard") == 0) { 238 if(typeFallback) { 239 errorCode = U_USING_DEFAULT_WARNING; 240 } 241 return root; 242 } 243 t->actualLocale = Locale(actualLocale); 244 245 // deserialize 246 LocalUResourceBundlePointer binary( 247 ures_getByKey(data.getAlias(), "%%CollationBin", NULL, &errorCode)); 248 // Note: U_MISSING_RESOURCE_ERROR --> The old code built from rules if available 249 // but that created undesirable dependencies. 250 int32_t length; 251 const uint8_t *inBytes = ures_getBinary(binary.getAlias(), &length, &errorCode); 252 if(U_FAILURE(errorCode)) { return NULL; } 253 CollationDataReader::read(root, inBytes, length, *t, errorCode); 254 // Note: U_COLLATOR_VERSION_MISMATCH --> The old code built from rules if available 255 // but that created undesirable dependencies. 256 if(U_FAILURE(errorCode)) { return NULL; } 257 258 // Try to fetch the optional rules string. 259 { 260 UErrorCode internalErrorCode = U_ZERO_ERROR; 261 int32_t length; 262 const UChar *s = ures_getStringByKey(data.getAlias(), "Sequence", &length, 263 &internalErrorCode); 264 if(U_SUCCESS(errorCode)) { 265 t->rules.setTo(TRUE, s, length); 266 } 267 } 268 269 // Set the collation types on the informational locales, 270 // except when they match the default types (for brevity and backwards compatibility). 271 // For the valid locale, suppress the default type. 272 if(uprv_strcmp(type, defaultType) != 0) { 273 validLocale.setKeywordValue("collation", type, errorCode); 274 if(U_FAILURE(errorCode)) { return NULL; } 275 } 276 277 // For the actual locale, suppress the default type *according to the actual locale*. 278 // For example, zh has default=pinyin and contains all of the Chinese tailorings. 279 // zh_Hant has default=stroke but has no other data. 280 // For the valid locale "zh_Hant" we need to suppress stroke. 281 // For the actual locale "zh" we need to suppress pinyin instead. 282 if(uprv_strcmp(actualLocale, vLocale) != 0) { 283 // Opening a bundle for the actual locale should always succeed. 284 LocalUResourceBundlePointer actualBundle( 285 ures_open(U_ICUDATA_COLL, actualLocale, &errorCode)); 286 if(U_FAILURE(errorCode)) { return NULL; } 287 UErrorCode internalErrorCode = U_ZERO_ERROR; 288 LocalUResourceBundlePointer def( 289 ures_getByKeyWithFallback(actualBundle.getAlias(), "collations/default", NULL, 290 &internalErrorCode)); 291 int32_t length; 292 const UChar *s = ures_getString(def.getAlias(), &length, &internalErrorCode); 293 if(U_SUCCESS(internalErrorCode) && length < LENGTHOF(defaultType)) { 294 u_UCharsToChars(s, defaultType, length + 1); 295 } else { 296 uprv_strcpy(defaultType, "standard"); 297 } 298 } 299 if(uprv_strcmp(type, defaultType) != 0) { 300 t->actualLocale.setKeywordValue("collation", type, errorCode); 301 if(U_FAILURE(errorCode)) { return NULL; } 302 } 303 304 if(typeFallback) { 305 errorCode = U_USING_DEFAULT_WARNING; 306 } 307 t->bundle = bundle.orphan(); 308 return t.orphan(); 309} 310 311U_NAMESPACE_END 312 313U_NAMESPACE_USE 314 315U_CAPI UCollator* 316ucol_open(const char *loc, 317 UErrorCode *status) 318{ 319 U_NAMESPACE_USE 320 321 UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN); 322 UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc); 323 UCollator *result = NULL; 324 325 Collator *coll = Collator::createInstance(loc, *status); 326 if(U_SUCCESS(*status)) { 327 result = coll->toUCollator(); 328 } 329 UTRACE_EXIT_PTR_STATUS(result, *status); 330 return result; 331} 332 333 334U_CAPI int32_t U_EXPORT2 335ucol_getDisplayName( const char *objLoc, 336 const char *dispLoc, 337 UChar *result, 338 int32_t resultLength, 339 UErrorCode *status) 340{ 341 U_NAMESPACE_USE 342 343 if(U_FAILURE(*status)) return -1; 344 UnicodeString dst; 345 if(!(result==NULL && resultLength==0)) { 346 // NULL destination for pure preflighting: empty dummy string 347 // otherwise, alias the destination buffer 348 dst.setTo(result, 0, resultLength); 349 } 350 Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst); 351 return dst.extract(result, resultLength, *status); 352} 353 354U_CAPI const char* U_EXPORT2 355ucol_getAvailable(int32_t index) 356{ 357 int32_t count = 0; 358 const Locale *loc = Collator::getAvailableLocales(count); 359 if (loc != NULL && index < count) { 360 return loc[index].getName(); 361 } 362 return NULL; 363} 364 365U_CAPI int32_t U_EXPORT2 366ucol_countAvailable() 367{ 368 int32_t count = 0; 369 Collator::getAvailableLocales(count); 370 return count; 371} 372 373#if !UCONFIG_NO_SERVICE 374U_CAPI UEnumeration* U_EXPORT2 375ucol_openAvailableLocales(UErrorCode *status) { 376 U_NAMESPACE_USE 377 378 // This is a wrapper over Collator::getAvailableLocales() 379 if (U_FAILURE(*status)) { 380 return NULL; 381 } 382 StringEnumeration *s = icu::Collator::getAvailableLocales(); 383 if (s == NULL) { 384 *status = U_MEMORY_ALLOCATION_ERROR; 385 return NULL; 386 } 387 return uenum_openFromStringEnumeration(s, status); 388} 389#endif 390 391// Note: KEYWORDS[0] != RESOURCE_NAME - alan 392 393static const char RESOURCE_NAME[] = "collations"; 394 395static const char* const KEYWORDS[] = { "collation" }; 396 397#define KEYWORD_COUNT LENGTHOF(KEYWORDS) 398 399U_CAPI UEnumeration* U_EXPORT2 400ucol_getKeywords(UErrorCode *status) { 401 UEnumeration *result = NULL; 402 if (U_SUCCESS(*status)) { 403 return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status); 404 } 405 return result; 406} 407 408U_CAPI UEnumeration* U_EXPORT2 409ucol_getKeywordValues(const char *keyword, UErrorCode *status) { 410 if (U_FAILURE(*status)) { 411 return NULL; 412 } 413 // hard-coded to accept exactly one collation keyword 414 // modify if additional collation keyword is added later 415 if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0) 416 { 417 *status = U_ILLEGAL_ARGUMENT_ERROR; 418 return NULL; 419 } 420 return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status); 421} 422 423static const UEnumeration defaultKeywordValues = { 424 NULL, 425 NULL, 426 ulist_close_keyword_values_iterator, 427 ulist_count_keyword_values, 428 uenum_unextDefault, 429 ulist_next_keyword_value, 430 ulist_reset_keyword_values_iterator 431}; 432 433#include <stdio.h> 434 435U_CAPI UEnumeration* U_EXPORT2 436ucol_getKeywordValuesForLocale(const char* /*key*/, const char* locale, 437 UBool /*commonlyUsed*/, UErrorCode* status) { 438 /* Get the locale base name. */ 439 char localeBuffer[ULOC_FULLNAME_CAPACITY] = ""; 440 uloc_getBaseName(locale, localeBuffer, sizeof(localeBuffer), status); 441 442 /* Create the 2 lists 443 * -values is the temp location for the keyword values 444 * -results hold the actual list used by the UEnumeration object 445 */ 446 UList *values = ulist_createEmptyList(status); 447 UList *results = ulist_createEmptyList(status); 448 UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); 449 if (U_FAILURE(*status) || en == NULL) { 450 if (en == NULL) { 451 *status = U_MEMORY_ALLOCATION_ERROR; 452 } else { 453 uprv_free(en); 454 } 455 ulist_deleteList(values); 456 ulist_deleteList(results); 457 return NULL; 458 } 459 460 memcpy(en, &defaultKeywordValues, sizeof(UEnumeration)); 461 en->context = results; 462 463 /* Open the resource bundle for collation with the given locale. */ 464 UResourceBundle bundle, collations, collres, defres; 465 ures_initStackObject(&bundle); 466 ures_initStackObject(&collations); 467 ures_initStackObject(&collres); 468 ures_initStackObject(&defres); 469 470 ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status); 471 472 while (U_SUCCESS(*status)) { 473 ures_getByKey(&bundle, RESOURCE_NAME, &collations, status); 474 ures_resetIterator(&collations); 475 while (U_SUCCESS(*status) && ures_hasNext(&collations)) { 476 ures_getNextResource(&collations, &collres, status); 477 const char *key = ures_getKey(&collres); 478 /* If the key is default, get the string and store it in results list only 479 * if results list is empty. 480 */ 481 if (uprv_strcmp(key, "default") == 0) { 482 if (ulist_getListSize(results) == 0) { 483 char *defcoll = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY); 484 int32_t defcollLength = ULOC_KEYWORDS_CAPACITY; 485 486 ures_getNextResource(&collres, &defres, status); 487#if U_CHARSET_FAMILY==U_ASCII_FAMILY 488 /* optimize - use the utf-8 string */ 489 ures_getUTF8String(&defres, defcoll, &defcollLength, TRUE, status); 490#else 491 { 492 const UChar* defString = ures_getString(&defres, &defcollLength, status); 493 if(U_SUCCESS(*status)) { 494 if(defcollLength+1 > ULOC_KEYWORDS_CAPACITY) { 495 *status = U_BUFFER_OVERFLOW_ERROR; 496 } else { 497 u_UCharsToChars(defString, defcoll, defcollLength+1); 498 } 499 } 500 } 501#endif 502 503 ulist_addItemBeginList(results, defcoll, TRUE, status); 504 } 505 } else { 506 ulist_addItemEndList(values, key, FALSE, status); 507 } 508 } 509 510 /* If the locale is "" this is root so exit. */ 511 if (uprv_strlen(localeBuffer) == 0) { 512 break; 513 } 514 /* Get the parent locale and open a new resource bundle. */ 515 uloc_getParent(localeBuffer, localeBuffer, sizeof(localeBuffer), status); 516 ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status); 517 } 518 519 ures_close(&defres); 520 ures_close(&collres); 521 ures_close(&collations); 522 ures_close(&bundle); 523 524 if (U_SUCCESS(*status)) { 525 char *value = NULL; 526 ulist_resetList(values); 527 while ((value = (char *)ulist_getNext(values)) != NULL) { 528 if (!ulist_containsString(results, value, (int32_t)uprv_strlen(value))) { 529 ulist_addItemEndList(results, value, FALSE, status); 530 if (U_FAILURE(*status)) { 531 break; 532 } 533 } 534 } 535 } 536 537 ulist_deleteList(values); 538 539 if (U_FAILURE(*status)){ 540 uenum_close(en); 541 en = NULL; 542 } else { 543 ulist_resetList(results); 544 } 545 546 return en; 547} 548 549U_CAPI int32_t U_EXPORT2 550ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, 551 const char* keyword, const char* locale, 552 UBool* isAvailable, UErrorCode* status) 553{ 554 // N.B.: Resource name is "collations" but keyword is "collation" 555 return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL, 556 "collations", keyword, locale, 557 isAvailable, TRUE, status); 558} 559 560#endif /* #if !UCONFIG_NO_COLLATION */ 561