1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4********************************************************************** 5* Copyright (c) 2001-2014, International Business Machines 6* Corporation and others. All Rights Reserved. 7********************************************************************** 8* Date Name Description 9* 08/10/2001 aliu Creation. 10********************************************************************** 11*/ 12 13#include "unicode/utypes.h" 14 15#if !UCONFIG_NO_TRANSLITERATION 16 17#include "unicode/translit.h" 18#include "unicode/resbund.h" 19#include "unicode/uniset.h" 20#include "unicode/uscript.h" 21#include "rbt.h" 22#include "cpdtrans.h" 23#include "nultrans.h" 24#include "transreg.h" 25#include "rbt_data.h" 26#include "rbt_pars.h" 27#include "tridpars.h" 28#include "charstr.h" 29#include "uassert.h" 30#include "locutil.h" 31 32// Enable the following symbol to add debugging code that tracks the 33// allocation, deletion, and use of Entry objects. BoundsChecker has 34// reported dangling pointer errors with these objects, but I have 35// been unable to confirm them. I suspect BoundsChecker is getting 36// confused with pointers going into and coming out of a UHashtable, 37// despite the hinting code that is designed to help it. 38// #define DEBUG_MEM 39#ifdef DEBUG_MEM 40#include <stdio.h> 41#endif 42 43// UChar constants 44static const UChar LOCALE_SEP = 95; // '_' 45//static const UChar ID_SEP = 0x002D; /*-*/ 46//static const UChar VARIANT_SEP = 0x002F; // '/' 47 48// String constants 49static const UChar ANY[] = { 0x41, 0x6E, 0x79, 0 }; // Any 50static const UChar LAT[] = { 0x4C, 0x61, 0x74, 0 }; // Lat 51 52// empty string 53#define NO_VARIANT UnicodeString() 54 55// initial estimate for specDAG size 56// ICU 60 Transliterator::countAvailableSources() 57#define SPECDAG_INIT_SIZE 149 58 59// initial estimate for number of variant names 60#define VARIANT_LIST_INIT_SIZE 11 61#define VARIANT_LIST_MAX_SIZE 31 62 63// initial estimate for availableIDs count (default estimate is 8 => multiple reallocs) 64// ICU 60 Transliterator::countAvailableIDs() 65#define AVAILABLE_IDS_INIT_SIZE 641 66 67// initial estimate for number of targets for source "Any", "Lat" 68// ICU 60 Transliterator::countAvailableTargets("Any")/("Latn") 69#define ANY_TARGETS_INIT_SIZE 125 70#define LAT_TARGETS_INIT_SIZE 23 71 72/** 73 * Resource bundle key for the RuleBasedTransliterator rule. 74 */ 75//static const char RB_RULE[] = "Rule"; 76 77U_NAMESPACE_BEGIN 78 79//------------------------------------------------------------------ 80// Alias 81//------------------------------------------------------------------ 82 83TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID, 84 const UnicodeSet* cpdFilter) : 85 ID(), 86 aliasesOrRules(theAliasID), 87 transes(0), 88 compoundFilter(cpdFilter), 89 direction(UTRANS_FORWARD), 90 type(TransliteratorAlias::SIMPLE) { 91} 92 93TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID, 94 const UnicodeString& idBlocks, 95 UVector* adoptedTransliterators, 96 const UnicodeSet* cpdFilter) : 97 ID(theID), 98 aliasesOrRules(idBlocks), 99 transes(adoptedTransliterators), 100 compoundFilter(cpdFilter), 101 direction(UTRANS_FORWARD), 102 type(TransliteratorAlias::COMPOUND) { 103} 104 105TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID, 106 const UnicodeString& rules, 107 UTransDirection dir) : 108 ID(theID), 109 aliasesOrRules(rules), 110 transes(0), 111 compoundFilter(0), 112 direction(dir), 113 type(TransliteratorAlias::RULES) { 114} 115 116TransliteratorAlias::~TransliteratorAlias() { 117 delete transes; 118} 119 120 121Transliterator* TransliteratorAlias::create(UParseError& pe, 122 UErrorCode& ec) { 123 if (U_FAILURE(ec)) { 124 return 0; 125 } 126 Transliterator *t = NULL; 127 switch (type) { 128 case SIMPLE: 129 t = Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec); 130 if(U_FAILURE(ec)){ 131 return 0; 132 } 133 if (compoundFilter != 0) 134 t->adoptFilter((UnicodeSet*)compoundFilter->clone()); 135 break; 136 case COMPOUND: 137 { 138 // the total number of transliterators in the compound is the total number of anonymous transliterators 139 // plus the total number of ID blocks-- we start by assuming the list begins and ends with an ID 140 // block and that each pair anonymous transliterators has an ID block between them. Then we go back 141 // to see whether there really are ID blocks at the beginning and end (by looking for U+FFFF, which 142 // marks the position where an anonymous transliterator goes) and adjust accordingly 143 int32_t anonymousRBTs = transes->size(); 144 int32_t transCount = anonymousRBTs * 2 + 1; 145 if (!aliasesOrRules.isEmpty() && aliasesOrRules[0] == (UChar)(0xffff)) 146 --transCount; 147 if (aliasesOrRules.length() >= 2 && aliasesOrRules[aliasesOrRules.length() - 1] == (UChar)(0xffff)) 148 --transCount; 149 UnicodeString noIDBlock((UChar)(0xffff)); 150 noIDBlock += ((UChar)(0xffff)); 151 int32_t pos = aliasesOrRules.indexOf(noIDBlock); 152 while (pos >= 0) { 153 --transCount; 154 pos = aliasesOrRules.indexOf(noIDBlock, pos + 1); 155 } 156 157 UVector transliterators(ec); 158 UnicodeString idBlock; 159 int32_t blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff)); 160 while (blockSeparatorPos >= 0) { 161 aliasesOrRules.extract(0, blockSeparatorPos, idBlock); 162 aliasesOrRules.remove(0, blockSeparatorPos + 1); 163 if (!idBlock.isEmpty()) 164 transliterators.addElement(Transliterator::createInstance(idBlock, UTRANS_FORWARD, pe, ec), ec); 165 if (!transes->isEmpty()) 166 transliterators.addElement(transes->orphanElementAt(0), ec); 167 blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff)); 168 } 169 if (!aliasesOrRules.isEmpty()) 170 transliterators.addElement(Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec), ec); 171 while (!transes->isEmpty()) 172 transliterators.addElement(transes->orphanElementAt(0), ec); 173 174 if (U_SUCCESS(ec)) { 175 t = new CompoundTransliterator(ID, transliterators, 176 (compoundFilter ? (UnicodeSet*)(compoundFilter->clone()) : 0), 177 anonymousRBTs, pe, ec); 178 if (t == 0) { 179 ec = U_MEMORY_ALLOCATION_ERROR; 180 return 0; 181 } 182 } else { 183 for (int32_t i = 0; i < transliterators.size(); i++) 184 delete (Transliterator*)(transliterators.elementAt(i)); 185 } 186 } 187 break; 188 case RULES: 189 U_ASSERT(FALSE); // don't call create() if isRuleBased() returns TRUE! 190 break; 191 } 192 return t; 193} 194 195UBool TransliteratorAlias::isRuleBased() const { 196 return type == RULES; 197} 198 199void TransliteratorAlias::parse(TransliteratorParser& parser, 200 UParseError& pe, UErrorCode& ec) const { 201 U_ASSERT(type == RULES); 202 if (U_FAILURE(ec)) { 203 return; 204 } 205 206 parser.parse(aliasesOrRules, direction, pe, ec); 207} 208 209//---------------------------------------------------------------------- 210// class TransliteratorSpec 211//---------------------------------------------------------------------- 212 213/** 214 * A TransliteratorSpec is a string specifying either a source or a target. In more 215 * general terms, it may also specify a variant, but we only use the 216 * Spec class for sources and targets. 217 * 218 * A Spec may be a locale or a script. If it is a locale, it has a 219 * fallback chain that goes xx_YY_ZZZ -> xx_YY -> xx -> ssss, where 220 * ssss is the script mapping of xx_YY_ZZZ. The Spec API methods 221 * hasFallback(), next(), and reset() iterate over this fallback 222 * sequence. 223 * 224 * The Spec class canonicalizes itself, so the locale is put into 225 * canonical form, or the script is transformed from an abbreviation 226 * to a full name. 227 */ 228class TransliteratorSpec : public UMemory { 229 public: 230 TransliteratorSpec(const UnicodeString& spec); 231 ~TransliteratorSpec(); 232 233 const UnicodeString& get() const; 234 UBool hasFallback() const; 235 const UnicodeString& next(); 236 void reset(); 237 238 UBool isLocale() const; 239 ResourceBundle& getBundle() const; 240 241 operator const UnicodeString&() const { return get(); } 242 const UnicodeString& getTop() const { return top; } 243 244 private: 245 void setupNext(); 246 247 UnicodeString top; 248 UnicodeString spec; 249 UnicodeString nextSpec; 250 UnicodeString scriptName; 251 UBool isSpecLocale; // TRUE if spec is a locale 252 UBool isNextLocale; // TRUE if nextSpec is a locale 253 ResourceBundle* res; 254 255 TransliteratorSpec(const TransliteratorSpec &other); // forbid copying of this class 256 TransliteratorSpec &operator=(const TransliteratorSpec &other); // forbid copying of this class 257}; 258 259TransliteratorSpec::TransliteratorSpec(const UnicodeString& theSpec) 260: top(theSpec), 261 res(0) 262{ 263 UErrorCode status = U_ZERO_ERROR; 264 Locale topLoc(""); 265 LocaleUtility::initLocaleFromName(theSpec, topLoc); 266 if (!topLoc.isBogus()) { 267 res = new ResourceBundle(U_ICUDATA_TRANSLIT, topLoc, status); 268 /* test for NULL */ 269 if (res == 0) { 270 return; 271 } 272 if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) { 273 delete res; 274 res = 0; 275 } 276 } 277 278 // Canonicalize script name -or- do locale->script mapping 279 status = U_ZERO_ERROR; 280 static const int32_t capacity = 10; 281 UScriptCode script[capacity]={USCRIPT_INVALID_CODE}; 282 int32_t num = uscript_getCode(CharString().appendInvariantChars(theSpec, status).data(), 283 script, capacity, &status); 284 if (num > 0 && script[0] != USCRIPT_INVALID_CODE) { 285 scriptName = UnicodeString(uscript_getName(script[0]), -1, US_INV); 286 } 287 288 // Canonicalize top 289 if (res != 0) { 290 // Canonicalize locale name 291 UnicodeString locStr; 292 LocaleUtility::initNameFromLocale(topLoc, locStr); 293 if (!locStr.isBogus()) { 294 top = locStr; 295 } 296 } else if (scriptName.length() != 0) { 297 // We are a script; use canonical name 298 top = scriptName; 299 } 300 301 // assert(spec != top); 302 reset(); 303} 304 305TransliteratorSpec::~TransliteratorSpec() { 306 delete res; 307} 308 309UBool TransliteratorSpec::hasFallback() const { 310 return nextSpec.length() != 0; 311} 312 313void TransliteratorSpec::reset() { 314 if (spec != top) { 315 spec = top; 316 isSpecLocale = (res != 0); 317 setupNext(); 318 } 319} 320 321void TransliteratorSpec::setupNext() { 322 isNextLocale = FALSE; 323 if (isSpecLocale) { 324 nextSpec = spec; 325 int32_t i = nextSpec.lastIndexOf(LOCALE_SEP); 326 // If i == 0 then we have _FOO, so we fall through 327 // to the scriptName. 328 if (i > 0) { 329 nextSpec.truncate(i); 330 isNextLocale = TRUE; 331 } else { 332 nextSpec = scriptName; // scriptName may be empty 333 } 334 } else { 335 // spec is a script, so we are at the end 336 nextSpec.truncate(0); 337 } 338} 339 340// Protocol: 341// for(const UnicodeString& s(spec.get()); 342// spec.hasFallback(); s(spec.next())) { ... 343 344const UnicodeString& TransliteratorSpec::next() { 345 spec = nextSpec; 346 isSpecLocale = isNextLocale; 347 setupNext(); 348 return spec; 349} 350 351const UnicodeString& TransliteratorSpec::get() const { 352 return spec; 353} 354 355UBool TransliteratorSpec::isLocale() const { 356 return isSpecLocale; 357} 358 359ResourceBundle& TransliteratorSpec::getBundle() const { 360 return *res; 361} 362 363//---------------------------------------------------------------------- 364 365#ifdef DEBUG_MEM 366 367// Vector of Entry pointers currently in use 368static UVector* DEBUG_entries = NULL; 369 370static void DEBUG_setup() { 371 if (DEBUG_entries == NULL) { 372 UErrorCode ec = U_ZERO_ERROR; 373 DEBUG_entries = new UVector(ec); 374 } 375} 376 377// Caller must call DEBUG_setup first. Return index of given Entry, 378// if it is in use (not deleted yet), or -1 if not found. 379static int DEBUG_findEntry(TransliteratorEntry* e) { 380 for (int i=0; i<DEBUG_entries->size(); ++i) { 381 if (e == (TransliteratorEntry*) DEBUG_entries->elementAt(i)) { 382 return i; 383 } 384 } 385 return -1; 386} 387 388// Track object creation 389static void DEBUG_newEntry(TransliteratorEntry* e) { 390 DEBUG_setup(); 391 if (DEBUG_findEntry(e) >= 0) { 392 // This should really never happen unless the heap is broken 393 printf("ERROR DEBUG_newEntry duplicate new pointer %08X\n", e); 394 return; 395 } 396 UErrorCode ec = U_ZERO_ERROR; 397 DEBUG_entries->addElement(e, ec); 398} 399 400// Track object deletion 401static void DEBUG_delEntry(TransliteratorEntry* e) { 402 DEBUG_setup(); 403 int i = DEBUG_findEntry(e); 404 if (i < 0) { 405 printf("ERROR DEBUG_delEntry possible double deletion %08X\n", e); 406 return; 407 } 408 DEBUG_entries->removeElementAt(i); 409} 410 411// Track object usage 412static void DEBUG_useEntry(TransliteratorEntry* e) { 413 if (e == NULL) return; 414 DEBUG_setup(); 415 int i = DEBUG_findEntry(e); 416 if (i < 0) { 417 printf("ERROR DEBUG_useEntry possible dangling pointer %08X\n", e); 418 } 419} 420 421#else 422// If we're not debugging then make these macros into NOPs 423#define DEBUG_newEntry(x) 424#define DEBUG_delEntry(x) 425#define DEBUG_useEntry(x) 426#endif 427 428//---------------------------------------------------------------------- 429// class Entry 430//---------------------------------------------------------------------- 431 432/** 433 * The Entry object stores objects of different types and 434 * singleton objects as placeholders for rule-based transliterators to 435 * be built as needed. Instances of this struct can be placeholders, 436 * can represent prototype transliterators to be cloned, or can 437 * represent TransliteratorData objects. We don't support storing 438 * classes in the registry because we don't have the rtti infrastructure 439 * for it. We could easily add this if there is a need for it in the 440 * future. 441 */ 442class TransliteratorEntry : public UMemory { 443public: 444 enum Type { 445 RULES_FORWARD, 446 RULES_REVERSE, 447 LOCALE_RULES, 448 PROTOTYPE, 449 RBT_DATA, 450 COMPOUND_RBT, 451 ALIAS, 452 FACTORY, 453 NONE // Only used for uninitialized entries 454 } entryType; 455 // NOTE: stringArg cannot go inside the union because 456 // it has a copy constructor 457 UnicodeString stringArg; // For RULES_*, ALIAS, COMPOUND_RBT 458 int32_t intArg; // For COMPOUND_RBT, LOCALE_RULES 459 UnicodeSet* compoundFilter; // For COMPOUND_RBT 460 union { 461 Transliterator* prototype; // For PROTOTYPE 462 TransliterationRuleData* data; // For RBT_DATA 463 UVector* dataVector; // For COMPOUND_RBT 464 struct { 465 Transliterator::Factory function; 466 Transliterator::Token context; 467 } factory; // For FACTORY 468 } u; 469 TransliteratorEntry(); 470 ~TransliteratorEntry(); 471 void adoptPrototype(Transliterator* adopted); 472 void setFactory(Transliterator::Factory factory, 473 Transliterator::Token context); 474 475private: 476 477 TransliteratorEntry(const TransliteratorEntry &other); // forbid copying of this class 478 TransliteratorEntry &operator=(const TransliteratorEntry &other); // forbid copying of this class 479}; 480 481TransliteratorEntry::TransliteratorEntry() { 482 u.prototype = 0; 483 compoundFilter = NULL; 484 entryType = NONE; 485 DEBUG_newEntry(this); 486} 487 488TransliteratorEntry::~TransliteratorEntry() { 489 DEBUG_delEntry(this); 490 if (entryType == PROTOTYPE) { 491 delete u.prototype; 492 } else if (entryType == RBT_DATA) { 493 // The data object is shared between instances of RBT. The 494 // entry object owns it. It should only be deleted when the 495 // transliterator component is being cleaned up. Doing so 496 // invalidates any RBTs that the user has instantiated. 497 delete u.data; 498 } else if (entryType == COMPOUND_RBT) { 499 while (u.dataVector != NULL && !u.dataVector->isEmpty()) 500 delete (TransliterationRuleData*)u.dataVector->orphanElementAt(0); 501 delete u.dataVector; 502 } 503 delete compoundFilter; 504} 505 506void TransliteratorEntry::adoptPrototype(Transliterator* adopted) { 507 if (entryType == PROTOTYPE) { 508 delete u.prototype; 509 } 510 entryType = PROTOTYPE; 511 u.prototype = adopted; 512} 513 514void TransliteratorEntry::setFactory(Transliterator::Factory factory, 515 Transliterator::Token context) { 516 if (entryType == PROTOTYPE) { 517 delete u.prototype; 518 } 519 entryType = FACTORY; 520 u.factory.function = factory; 521 u.factory.context = context; 522} 523 524// UObjectDeleter for Hashtable::setValueDeleter 525U_CDECL_BEGIN 526static void U_CALLCONV 527deleteEntry(void* obj) { 528 delete (TransliteratorEntry*) obj; 529} 530U_CDECL_END 531 532//---------------------------------------------------------------------- 533// class TransliteratorRegistry: Basic public API 534//---------------------------------------------------------------------- 535 536TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) : 537 registry(TRUE, status), 538 specDAG(TRUE, SPECDAG_INIT_SIZE, status), 539 variantList(VARIANT_LIST_INIT_SIZE, status), 540 availableIDs(AVAILABLE_IDS_INIT_SIZE, status) 541{ 542 registry.setValueDeleter(deleteEntry); 543 variantList.setDeleter(uprv_deleteUObject); 544 variantList.setComparer(uhash_compareCaselessUnicodeString); 545 UnicodeString *emptyString = new UnicodeString(); 546 if (emptyString != NULL) { 547 variantList.addElement(emptyString, status); 548 } 549 availableIDs.setDeleter(uprv_deleteUObject); 550 availableIDs.setComparer(uhash_compareCaselessUnicodeString); 551 specDAG.setValueDeleter(uhash_deleteHashtable); 552} 553 554TransliteratorRegistry::~TransliteratorRegistry() { 555 // Through the magic of C++, everything cleans itself up 556} 557 558Transliterator* TransliteratorRegistry::get(const UnicodeString& ID, 559 TransliteratorAlias*& aliasReturn, 560 UErrorCode& status) { 561 U_ASSERT(aliasReturn == NULL); 562 TransliteratorEntry *entry = find(ID); 563 return (entry == 0) ? 0 564 : instantiateEntry(ID, entry, aliasReturn, status); 565} 566 567Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID, 568 TransliteratorParser& parser, 569 TransliteratorAlias*& aliasReturn, 570 UErrorCode& status) { 571 U_ASSERT(aliasReturn == NULL); 572 TransliteratorEntry *entry = find(ID); 573 574 if (entry == 0) { 575 // We get to this point if there are two threads, one of which 576 // is instantiating an ID, and another of which is removing 577 // the same ID from the registry, and the timing is just right. 578 return 0; 579 } 580 581 // The usage model for the caller is that they will first call 582 // reg->get() inside the mutex, they'll get back an alias, they call 583 // alias->isRuleBased(), and if they get TRUE, they call alias->parse() 584 // outside the mutex, then reg->reget() inside the mutex again. A real 585 // mess, but it gets things working for ICU 3.0. [alan]. 586 587 // Note: It's possible that in between the caller calling 588 // alias->parse() and reg->reget(), that another thread will have 589 // called reg->reget(), and the entry will already have been fixed up. 590 // We have to detect this so we don't stomp over existing entry 591 // data members and potentially leak memory (u.data and compoundFilter). 592 593 if (entry->entryType == TransliteratorEntry::RULES_FORWARD || 594 entry->entryType == TransliteratorEntry::RULES_REVERSE || 595 entry->entryType == TransliteratorEntry::LOCALE_RULES) { 596 597 if (parser.idBlockVector.isEmpty() && parser.dataVector.isEmpty()) { 598 entry->u.data = 0; 599 entry->entryType = TransliteratorEntry::ALIAS; 600 entry->stringArg = UNICODE_STRING_SIMPLE("Any-NULL"); 601 } 602 else if (parser.idBlockVector.isEmpty() && parser.dataVector.size() == 1) { 603 entry->u.data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); 604 entry->entryType = TransliteratorEntry::RBT_DATA; 605 } 606 else if (parser.idBlockVector.size() == 1 && parser.dataVector.isEmpty()) { 607 entry->stringArg = *(UnicodeString*)(parser.idBlockVector.elementAt(0)); 608 entry->compoundFilter = parser.orphanCompoundFilter(); 609 entry->entryType = TransliteratorEntry::ALIAS; 610 } 611 else { 612 entry->entryType = TransliteratorEntry::COMPOUND_RBT; 613 entry->compoundFilter = parser.orphanCompoundFilter(); 614 entry->u.dataVector = new UVector(status); 615 entry->stringArg.remove(); 616 617 int32_t limit = parser.idBlockVector.size(); 618 if (parser.dataVector.size() > limit) 619 limit = parser.dataVector.size(); 620 621 for (int32_t i = 0; i < limit; i++) { 622 if (i < parser.idBlockVector.size()) { 623 UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i); 624 if (!idBlock->isEmpty()) 625 entry->stringArg += *idBlock; 626 } 627 if (!parser.dataVector.isEmpty()) { 628 TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); 629 entry->u.dataVector->addElement(data, status); 630 entry->stringArg += (UChar)0xffff; // use U+FFFF to mark position of RBTs in ID block 631 } 632 } 633 } 634 } 635 636 Transliterator *t = 637 instantiateEntry(ID, entry, aliasReturn, status); 638 return t; 639} 640 641void TransliteratorRegistry::put(Transliterator* adoptedProto, 642 UBool visible, 643 UErrorCode& ec) 644{ 645 TransliteratorEntry *entry = new TransliteratorEntry(); 646 if (entry == NULL) { 647 ec = U_MEMORY_ALLOCATION_ERROR; 648 return; 649 } 650 entry->adoptPrototype(adoptedProto); 651 registerEntry(adoptedProto->getID(), entry, visible); 652} 653 654void TransliteratorRegistry::put(const UnicodeString& ID, 655 Transliterator::Factory factory, 656 Transliterator::Token context, 657 UBool visible, 658 UErrorCode& ec) { 659 TransliteratorEntry *entry = new TransliteratorEntry(); 660 if (entry == NULL) { 661 ec = U_MEMORY_ALLOCATION_ERROR; 662 return; 663 } 664 entry->setFactory(factory, context); 665 registerEntry(ID, entry, visible); 666} 667 668void TransliteratorRegistry::put(const UnicodeString& ID, 669 const UnicodeString& resourceName, 670 UTransDirection dir, 671 UBool readonlyResourceAlias, 672 UBool visible, 673 UErrorCode& ec) { 674 TransliteratorEntry *entry = new TransliteratorEntry(); 675 if (entry == NULL) { 676 ec = U_MEMORY_ALLOCATION_ERROR; 677 return; 678 } 679 entry->entryType = (dir == UTRANS_FORWARD) ? TransliteratorEntry::RULES_FORWARD 680 : TransliteratorEntry::RULES_REVERSE; 681 if (readonlyResourceAlias) { 682 entry->stringArg.setTo(TRUE, resourceName.getBuffer(), -1); 683 } 684 else { 685 entry->stringArg = resourceName; 686 } 687 registerEntry(ID, entry, visible); 688} 689 690void TransliteratorRegistry::put(const UnicodeString& ID, 691 const UnicodeString& alias, 692 UBool readonlyAliasAlias, 693 UBool visible, 694 UErrorCode& /*ec*/) { 695 TransliteratorEntry *entry = new TransliteratorEntry(); 696 // Null pointer check 697 if (entry != NULL) { 698 entry->entryType = TransliteratorEntry::ALIAS; 699 if (readonlyAliasAlias) { 700 entry->stringArg.setTo(TRUE, alias.getBuffer(), -1); 701 } 702 else { 703 entry->stringArg = alias; 704 } 705 registerEntry(ID, entry, visible); 706 } 707} 708 709void TransliteratorRegistry::remove(const UnicodeString& ID) { 710 UnicodeString source, target, variant; 711 UBool sawSource; 712 TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource); 713 // Only need to do this if ID.indexOf('-') < 0 714 UnicodeString id; 715 TransliteratorIDParser::STVtoID(source, target, variant, id); 716 registry.remove(id); 717 removeSTV(source, target, variant); 718 availableIDs.removeElement((void*) &id); 719} 720 721//---------------------------------------------------------------------- 722// class TransliteratorRegistry: Public ID and spec management 723//---------------------------------------------------------------------- 724 725/** 726 * == OBSOLETE - remove in ICU 3.4 == 727 * Return the number of IDs currently registered with the system. 728 * To retrieve the actual IDs, call getAvailableID(i) with 729 * i from 0 to countAvailableIDs() - 1. 730 */ 731int32_t TransliteratorRegistry::countAvailableIDs(void) const { 732 return availableIDs.size(); 733} 734 735/** 736 * == OBSOLETE - remove in ICU 3.4 == 737 * Return the index-th available ID. index must be between 0 738 * and countAvailableIDs() - 1, inclusive. If index is out of 739 * range, the result of getAvailableID(0) is returned. 740 */ 741const UnicodeString& TransliteratorRegistry::getAvailableID(int32_t index) const { 742 if (index < 0 || index >= availableIDs.size()) { 743 index = 0; 744 } 745 return *(const UnicodeString*) availableIDs[index]; 746} 747 748StringEnumeration* TransliteratorRegistry::getAvailableIDs() const { 749 return new Enumeration(*this); 750} 751 752int32_t TransliteratorRegistry::countAvailableSources(void) const { 753 return specDAG.count(); 754} 755 756UnicodeString& TransliteratorRegistry::getAvailableSource(int32_t index, 757 UnicodeString& result) const { 758 int32_t pos = UHASH_FIRST; 759 const UHashElement *e = 0; 760 while (index-- >= 0) { 761 e = specDAG.nextElement(pos); 762 if (e == 0) { 763 break; 764 } 765 } 766 if (e == 0) { 767 result.truncate(0); 768 } else { 769 result = *(UnicodeString*) e->key.pointer; 770 } 771 return result; 772} 773 774int32_t TransliteratorRegistry::countAvailableTargets(const UnicodeString& source) const { 775 Hashtable *targets = (Hashtable*) specDAG.get(source); 776 return (targets == 0) ? 0 : targets->count(); 777} 778 779UnicodeString& TransliteratorRegistry::getAvailableTarget(int32_t index, 780 const UnicodeString& source, 781 UnicodeString& result) const { 782 Hashtable *targets = (Hashtable*) specDAG.get(source); 783 if (targets == 0) { 784 result.truncate(0); // invalid source 785 return result; 786 } 787 int32_t pos = UHASH_FIRST; 788 const UHashElement *e = 0; 789 while (index-- >= 0) { 790 e = targets->nextElement(pos); 791 if (e == 0) { 792 break; 793 } 794 } 795 if (e == 0) { 796 result.truncate(0); // invalid index 797 } else { 798 result = *(UnicodeString*) e->key.pointer; 799 } 800 return result; 801} 802 803int32_t TransliteratorRegistry::countAvailableVariants(const UnicodeString& source, 804 const UnicodeString& target) const { 805 Hashtable *targets = (Hashtable*) specDAG.get(source); 806 if (targets == 0) { 807 return 0; 808 } 809 uint32_t varMask = targets->geti(target); 810 int32_t varCount = 0; 811 while (varMask > 0) { 812 if (varMask & 1) { 813 varCount++; 814 } 815 varMask >>= 1; 816 } 817 return varCount; 818} 819 820UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index, 821 const UnicodeString& source, 822 const UnicodeString& target, 823 UnicodeString& result) const { 824 Hashtable *targets = (Hashtable*) specDAG.get(source); 825 if (targets == 0) { 826 result.truncate(0); // invalid source 827 return result; 828 } 829 uint32_t varMask = targets->geti(target); 830 int32_t varCount = 0; 831 int32_t varListIndex = 0; 832 while (varMask > 0) { 833 if (varMask & 1) { 834 if (varCount == index) { 835 UnicodeString *v = (UnicodeString*) variantList.elementAt(varListIndex); 836 if (v != NULL) { 837 result = *v; 838 return result; 839 } 840 break; 841 } 842 varCount++; 843 } 844 varMask >>= 1; 845 varListIndex++; 846 } 847 result.truncate(0); // invalid target or index 848 return result; 849} 850 851//---------------------------------------------------------------------- 852// class TransliteratorRegistry::Enumeration 853//---------------------------------------------------------------------- 854 855TransliteratorRegistry::Enumeration::Enumeration(const TransliteratorRegistry& _reg) : 856 index(0), reg(_reg) { 857} 858 859TransliteratorRegistry::Enumeration::~Enumeration() { 860} 861 862int32_t TransliteratorRegistry::Enumeration::count(UErrorCode& /*status*/) const { 863 return reg.availableIDs.size(); 864} 865 866const UnicodeString* TransliteratorRegistry::Enumeration::snext(UErrorCode& status) { 867 // This is sloppy but safe -- if we get out of sync with the underlying 868 // registry, we will still return legal strings, but they might not 869 // correspond to the snapshot at construction time. So there could be 870 // duplicate IDs or omitted IDs if insertions or deletions occur in one 871 // thread while another is iterating. To be more rigorous, add a timestamp, 872 // which is incremented with any modification, and validate this iterator 873 // against the timestamp at construction time. This probably isn't worth 874 // doing as long as there is some possibility of removing this code in favor 875 // of some new code based on Doug's service framework. 876 if (U_FAILURE(status)) { 877 return NULL; 878 } 879 int32_t n = reg.availableIDs.size(); 880 if (index > n) { 881 status = U_ENUM_OUT_OF_SYNC_ERROR; 882 } 883 // index == n is okay -- this means we've reached the end 884 if (index < n) { 885 // Copy the string! This avoids lifetime problems. 886 unistr = *(const UnicodeString*)reg.availableIDs[index++]; 887 return &unistr; 888 } else { 889 return NULL; 890 } 891} 892 893void TransliteratorRegistry::Enumeration::reset(UErrorCode& /*status*/) { 894 index = 0; 895} 896 897UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TransliteratorRegistry::Enumeration) 898 899//---------------------------------------------------------------------- 900// class TransliteratorRegistry: internal 901//---------------------------------------------------------------------- 902 903/** 904 * Convenience method. Calls 6-arg registerEntry(). 905 */ 906void TransliteratorRegistry::registerEntry(const UnicodeString& source, 907 const UnicodeString& target, 908 const UnicodeString& variant, 909 TransliteratorEntry* adopted, 910 UBool visible) { 911 UnicodeString ID; 912 UnicodeString s(source); 913 if (s.length() == 0) { 914 s.setTo(TRUE, ANY, 3); 915 } 916 TransliteratorIDParser::STVtoID(source, target, variant, ID); 917 registerEntry(ID, s, target, variant, adopted, visible); 918} 919 920/** 921 * Convenience method. Calls 6-arg registerEntry(). 922 */ 923void TransliteratorRegistry::registerEntry(const UnicodeString& ID, 924 TransliteratorEntry* adopted, 925 UBool visible) { 926 UnicodeString source, target, variant; 927 UBool sawSource; 928 TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource); 929 // Only need to do this if ID.indexOf('-') < 0 930 UnicodeString id; 931 TransliteratorIDParser::STVtoID(source, target, variant, id); 932 registerEntry(id, source, target, variant, adopted, visible); 933} 934 935/** 936 * Register an entry object (adopted) with the given ID, source, 937 * target, and variant strings. 938 */ 939void TransliteratorRegistry::registerEntry(const UnicodeString& ID, 940 const UnicodeString& source, 941 const UnicodeString& target, 942 const UnicodeString& variant, 943 TransliteratorEntry* adopted, 944 UBool visible) { 945 UErrorCode status = U_ZERO_ERROR; 946 registry.put(ID, adopted, status); 947 if (visible) { 948 registerSTV(source, target, variant); 949 if (!availableIDs.contains((void*) &ID)) { 950 UnicodeString *newID = (UnicodeString *)ID.clone(); 951 // Check to make sure newID was created. 952 if (newID != NULL) { 953 // NUL-terminate the ID string 954 newID->getTerminatedBuffer(); 955 availableIDs.addElement(newID, status); 956 } 957 } 958 } else { 959 removeSTV(source, target, variant); 960 availableIDs.removeElement((void*) &ID); 961 } 962} 963 964/** 965 * Register a source-target/variant in the specDAG. Variant may be 966 * empty, but source and target must not be. 967 */ 968void TransliteratorRegistry::registerSTV(const UnicodeString& source, 969 const UnicodeString& target, 970 const UnicodeString& variant) { 971 // assert(source.length() > 0); 972 // assert(target.length() > 0); 973 UErrorCode status = U_ZERO_ERROR; 974 Hashtable *targets = (Hashtable*) specDAG.get(source); 975 if (targets == 0) { 976 int32_t size = 3; 977 if (source.compare(ANY,3) == 0) { 978 size = ANY_TARGETS_INIT_SIZE; 979 } else if (source.compare(LAT,3) == 0) { 980 size = LAT_TARGETS_INIT_SIZE; 981 } 982 targets = new Hashtable(TRUE, size, status); 983 if (U_FAILURE(status) || targets == NULL) { 984 return; 985 } 986 specDAG.put(source, targets, status); 987 } 988 int32_t variantListIndex = variantList.indexOf((void*) &variant, 0); 989 if (variantListIndex < 0) { 990 if (variantList.size() >= VARIANT_LIST_MAX_SIZE) { 991 // can't handle any more variants 992 return; 993 } 994 UnicodeString *variantEntry = new UnicodeString(variant); 995 if (variantEntry != NULL) { 996 variantList.addElement(variantEntry, status); 997 if (U_SUCCESS(status)) { 998 variantListIndex = variantList.size() - 1; 999 } 1000 } 1001 if (variantListIndex < 0) { 1002 return; 1003 } 1004 } 1005 uint32_t addMask = 1 << variantListIndex; 1006 uint32_t varMask = targets->geti(target); 1007 targets->puti(target, varMask | addMask, status); 1008} 1009 1010/** 1011 * Remove a source-target/variant from the specDAG. 1012 */ 1013void TransliteratorRegistry::removeSTV(const UnicodeString& source, 1014 const UnicodeString& target, 1015 const UnicodeString& variant) { 1016 // assert(source.length() > 0); 1017 // assert(target.length() > 0); 1018 UErrorCode status = U_ZERO_ERROR; 1019 Hashtable *targets = (Hashtable*) specDAG.get(source); 1020 if (targets == NULL) { 1021 return; // should never happen for valid s-t/v 1022 } 1023 uint32_t varMask = targets->geti(target); 1024 if (varMask == 0) { 1025 return; // should never happen for valid s-t/v 1026 } 1027 int32_t variantListIndex = variantList.indexOf((void*) &variant, 0); 1028 if (variantListIndex < 0) { 1029 return; // should never happen for valid s-t/v 1030 } 1031 int32_t remMask = 1 << variantListIndex; 1032 varMask &= (~remMask); 1033 if (varMask != 0) { 1034 targets->puti(target, varMask, status); 1035 } else { 1036 targets->remove(target); // should delete variants 1037 if (targets->count() == 0) { 1038 specDAG.remove(source); // should delete targets 1039 } 1040 } 1041} 1042 1043/** 1044 * Attempt to find a source-target/variant in the dynamic registry 1045 * store. Return 0 on failure. 1046 * 1047 * Caller does NOT own returned object. 1048 */ 1049TransliteratorEntry* TransliteratorRegistry::findInDynamicStore(const TransliteratorSpec& src, 1050 const TransliteratorSpec& trg, 1051 const UnicodeString& variant) const { 1052 UnicodeString ID; 1053 TransliteratorIDParser::STVtoID(src, trg, variant, ID); 1054 TransliteratorEntry *e = (TransliteratorEntry*) registry.get(ID); 1055 DEBUG_useEntry(e); 1056 return e; 1057} 1058 1059/** 1060 * Attempt to find a source-target/variant in the static locale 1061 * resource store. Do not perform fallback. Return 0 on failure. 1062 * 1063 * On success, create a new entry object, register it in the dynamic 1064 * store, and return a pointer to it, but do not make it public -- 1065 * just because someone requested something, we do not expand the 1066 * available ID list (or spec DAG). 1067 * 1068 * Caller does NOT own returned object. 1069 */ 1070TransliteratorEntry* TransliteratorRegistry::findInStaticStore(const TransliteratorSpec& src, 1071 const TransliteratorSpec& trg, 1072 const UnicodeString& variant) { 1073 TransliteratorEntry* entry = 0; 1074 if (src.isLocale()) { 1075 entry = findInBundle(src, trg, variant, UTRANS_FORWARD); 1076 } else if (trg.isLocale()) { 1077 entry = findInBundle(trg, src, variant, UTRANS_REVERSE); 1078 } 1079 1080 // If we found an entry, store it in the Hashtable for next 1081 // time. 1082 if (entry != 0) { 1083 registerEntry(src.getTop(), trg.getTop(), variant, entry, FALSE); 1084 } 1085 1086 return entry; 1087} 1088 1089// As of 2.0, resource bundle keys cannot contain '_' 1090static const UChar TRANSLITERATE_TO[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,84,111,0}; // "TransliterateTo" 1091 1092static const UChar TRANSLITERATE_FROM[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,70,114,111,109,0}; // "TransliterateFrom" 1093 1094static const UChar TRANSLITERATE[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,0}; // "Transliterate" 1095 1096/** 1097 * Attempt to find an entry in a single resource bundle. This is 1098 * a one-sided lookup. findInStaticStore() performs up to two such 1099 * lookups, one for the source, and one for the target. 1100 * 1101 * Do not perform fallback. Return 0 on failure. 1102 * 1103 * On success, create a new Entry object, populate it, and return it. 1104 * The caller owns the returned object. 1105 */ 1106TransliteratorEntry* TransliteratorRegistry::findInBundle(const TransliteratorSpec& specToOpen, 1107 const TransliteratorSpec& specToFind, 1108 const UnicodeString& variant, 1109 UTransDirection direction) 1110{ 1111 UnicodeString utag; 1112 UnicodeString resStr; 1113 int32_t pass; 1114 1115 for (pass=0; pass<2; ++pass) { 1116 utag.truncate(0); 1117 // First try either TransliteratorTo_xxx or 1118 // TransliterateFrom_xxx, then try the bidirectional 1119 // Transliterate_xxx. This precedence order is arbitrary 1120 // but must be consistent and documented. 1121 if (pass == 0) { 1122 utag.append(direction == UTRANS_FORWARD ? 1123 TRANSLITERATE_TO : TRANSLITERATE_FROM, -1); 1124 } else { 1125 utag.append(TRANSLITERATE, -1); 1126 } 1127 UnicodeString s(specToFind.get()); 1128 utag.append(s.toUpper("")); 1129 UErrorCode status = U_ZERO_ERROR; 1130 ResourceBundle subres(specToOpen.getBundle().get( 1131 CharString().appendInvariantChars(utag, status).data(), status)); 1132 if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) { 1133 continue; 1134 } 1135 1136 s.truncate(0); 1137 if (specToOpen.get() != LocaleUtility::initNameFromLocale(subres.getLocale(), s)) { 1138 continue; 1139 } 1140 1141 if (variant.length() != 0) { 1142 status = U_ZERO_ERROR; 1143 resStr = subres.getStringEx( 1144 CharString().appendInvariantChars(variant, status).data(), status); 1145 if (U_SUCCESS(status)) { 1146 // Exit loop successfully 1147 break; 1148 } 1149 } else { 1150 // Variant is empty, which means match the first variant listed. 1151 status = U_ZERO_ERROR; 1152 resStr = subres.getStringEx(1, status); 1153 if (U_SUCCESS(status)) { 1154 // Exit loop successfully 1155 break; 1156 } 1157 } 1158 } 1159 1160 if (pass==2) { 1161 // Failed 1162 return NULL; 1163 } 1164 1165 // We have succeeded in loading a string from the locale 1166 // resources. Create a new registry entry to hold it and return it. 1167 TransliteratorEntry *entry = new TransliteratorEntry(); 1168 if (entry != 0) { 1169 // The direction is always forward for the 1170 // TransliterateTo_xxx and TransliterateFrom_xxx 1171 // items; those are unidirectional forward rules. 1172 // For the bidirectional Transliterate_xxx items, 1173 // the direction is the value passed in to this 1174 // function. 1175 int32_t dir = (pass == 0) ? UTRANS_FORWARD : direction; 1176 entry->entryType = TransliteratorEntry::LOCALE_RULES; 1177 entry->stringArg = resStr; 1178 entry->intArg = dir; 1179 } 1180 1181 return entry; 1182} 1183 1184/** 1185 * Convenience method. Calls 3-arg find(). 1186 */ 1187TransliteratorEntry* TransliteratorRegistry::find(const UnicodeString& ID) { 1188 UnicodeString source, target, variant; 1189 UBool sawSource; 1190 TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource); 1191 return find(source, target, variant); 1192} 1193 1194/** 1195 * Top-level find method. Attempt to find a source-target/variant in 1196 * either the dynamic or the static (locale resource) store. Perform 1197 * fallback. 1198 * 1199 * Lookup sequence for ss_SS_SSS-tt_TT_TTT/v: 1200 * 1201 * ss_SS_SSS-tt_TT_TTT/v -- in hashtable 1202 * ss_SS_SSS-tt_TT_TTT/v -- in ss_SS_SSS (no fallback) 1203 * 1204 * repeat with t = tt_TT_TTT, tt_TT, tt, and tscript 1205 * 1206 * ss_SS_SSS-t/ * 1207 * ss_SS-t/ * 1208 * ss-t/ * 1209 * sscript-t/ * 1210 * 1211 * Here * matches the first variant listed. 1212 * 1213 * Caller does NOT own returned object. Return 0 on failure. 1214 */ 1215TransliteratorEntry* TransliteratorRegistry::find(UnicodeString& source, 1216 UnicodeString& target, 1217 UnicodeString& variant) { 1218 1219 TransliteratorSpec src(source); 1220 TransliteratorSpec trg(target); 1221 TransliteratorEntry* entry; 1222 1223 // Seek exact match in hashtable. Temporary fix for ICU 4.6. 1224 // TODO: The general logic for finding a matching transliterator needs to be reviewed. 1225 // ICU ticket #8089 1226 UnicodeString ID; 1227 TransliteratorIDParser::STVtoID(source, target, variant, ID); 1228 entry = (TransliteratorEntry*) registry.get(ID); 1229 if (entry != 0) { 1230 // std::string ss; 1231 // std::cout << ID.toUTF8String(ss) << std::endl; 1232 return entry; 1233 } 1234 1235 if (variant.length() != 0) { 1236 1237 // Seek exact match in hashtable 1238 entry = findInDynamicStore(src, trg, variant); 1239 if (entry != 0) { 1240 return entry; 1241 } 1242 1243 // Seek exact match in locale resources 1244 entry = findInStaticStore(src, trg, variant); 1245 if (entry != 0) { 1246 return entry; 1247 } 1248 } 1249 1250 for (;;) { 1251 src.reset(); 1252 for (;;) { 1253 // Seek match in hashtable 1254 entry = findInDynamicStore(src, trg, NO_VARIANT); 1255 if (entry != 0) { 1256 return entry; 1257 } 1258 1259 // Seek match in locale resources 1260 entry = findInStaticStore(src, trg, NO_VARIANT); 1261 if (entry != 0) { 1262 return entry; 1263 } 1264 if (!src.hasFallback()) { 1265 break; 1266 } 1267 src.next(); 1268 } 1269 if (!trg.hasFallback()) { 1270 break; 1271 } 1272 trg.next(); 1273 } 1274 1275 return 0; 1276} 1277 1278/** 1279 * Given an Entry object, instantiate it. Caller owns result. Return 1280 * 0 on failure. 1281 * 1282 * Return a non-empty aliasReturn value if the ID points to an alias. 1283 * We cannot instantiate it ourselves because the alias may contain 1284 * filters or compounds, which we do not understand. Caller should 1285 * make aliasReturn empty before calling. 1286 * 1287 * The entry object is assumed to reside in the dynamic store. It may be 1288 * modified. 1289 */ 1290Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID, 1291 TransliteratorEntry *entry, 1292 TransliteratorAlias* &aliasReturn, 1293 UErrorCode& status) { 1294 Transliterator *t = 0; 1295 U_ASSERT(aliasReturn == 0); 1296 1297 switch (entry->entryType) { 1298 case TransliteratorEntry::RBT_DATA: 1299 t = new RuleBasedTransliterator(ID, entry->u.data); 1300 if (t == 0) { 1301 status = U_MEMORY_ALLOCATION_ERROR; 1302 } 1303 return t; 1304 case TransliteratorEntry::PROTOTYPE: 1305 t = entry->u.prototype->clone(); 1306 if (t == 0) { 1307 status = U_MEMORY_ALLOCATION_ERROR; 1308 } 1309 return t; 1310 case TransliteratorEntry::ALIAS: 1311 aliasReturn = new TransliteratorAlias(entry->stringArg, entry->compoundFilter); 1312 if (aliasReturn == 0) { 1313 status = U_MEMORY_ALLOCATION_ERROR; 1314 } 1315 return 0; 1316 case TransliteratorEntry::FACTORY: 1317 t = entry->u.factory.function(ID, entry->u.factory.context); 1318 if (t == 0) { 1319 status = U_MEMORY_ALLOCATION_ERROR; 1320 } 1321 return t; 1322 case TransliteratorEntry::COMPOUND_RBT: 1323 { 1324 UVector* rbts = new UVector(entry->u.dataVector->size(), status); 1325 // Check for null pointer 1326 if (rbts == NULL) { 1327 status = U_MEMORY_ALLOCATION_ERROR; 1328 return NULL; 1329 } 1330 int32_t passNumber = 1; 1331 for (int32_t i = 0; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) { 1332 // TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")? 1333 Transliterator* t = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++), 1334 (TransliterationRuleData*)(entry->u.dataVector->elementAt(i)), FALSE); 1335 if (t == 0) 1336 status = U_MEMORY_ALLOCATION_ERROR; 1337 else 1338 rbts->addElement(t, status); 1339 } 1340 if (U_FAILURE(status)) { 1341 delete rbts; 1342 return 0; 1343 } 1344 aliasReturn = new TransliteratorAlias(ID, entry->stringArg, rbts, entry->compoundFilter); 1345 } 1346 if (aliasReturn == 0) { 1347 status = U_MEMORY_ALLOCATION_ERROR; 1348 } 1349 return 0; 1350 case TransliteratorEntry::LOCALE_RULES: 1351 aliasReturn = new TransliteratorAlias(ID, entry->stringArg, 1352 (UTransDirection) entry->intArg); 1353 if (aliasReturn == 0) { 1354 status = U_MEMORY_ALLOCATION_ERROR; 1355 } 1356 return 0; 1357 case TransliteratorEntry::RULES_FORWARD: 1358 case TransliteratorEntry::RULES_REVERSE: 1359 // Process the rule data into a TransliteratorRuleData object, 1360 // and possibly also into an ::id header and/or footer. Then 1361 // we modify the registry with the parsed data and retry. 1362 { 1363 TransliteratorParser parser(status); 1364 1365 // We use the file name, taken from another resource bundle 1366 // 2-d array at static init time, as a locale language. We're 1367 // just using the locale mechanism to map through to a file 1368 // name; this in no way represents an actual locale. 1369 //CharString ch(entry->stringArg); 1370 //UResourceBundle *bundle = ures_openDirect(0, ch, &status); 1371 UnicodeString rules = entry->stringArg; 1372 //ures_close(bundle); 1373 1374 //if (U_FAILURE(status)) { 1375 // We have a failure of some kind. Remove the ID from the 1376 // registry so we don't keep trying. NOTE: This will throw off 1377 // anyone who is, at the moment, trying to iterate over the 1378 // available IDs. That's acceptable since we should never 1379 // really get here except under installation, configuration, 1380 // or unrecoverable run time memory failures. 1381 // remove(ID); 1382 //} else { 1383 1384 // If the status indicates a failure, then we don't have any 1385 // rules -- there is probably an installation error. The list 1386 // in the root locale should correspond to all the installed 1387 // transliterators; if it lists something that's not 1388 // installed, we'll get an error from ResourceBundle. 1389 aliasReturn = new TransliteratorAlias(ID, rules, 1390 ((entry->entryType == TransliteratorEntry::RULES_REVERSE) ? 1391 UTRANS_REVERSE : UTRANS_FORWARD)); 1392 if (aliasReturn == 0) { 1393 status = U_MEMORY_ALLOCATION_ERROR; 1394 } 1395 //} 1396 } 1397 return 0; 1398 default: 1399 U_ASSERT(FALSE); // can't get here 1400 return 0; 1401 } 1402} 1403U_NAMESPACE_END 1404 1405#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 1406 1407//eof 1408