1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5*   Copyright (c) 2001-2014, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7**********************************************************************
8*   Date        Name        Description
9*   08/10/2001  aliu        Creation.
10**********************************************************************
11*/
12
13#include "unicode/utypes.h"
14
15#if !UCONFIG_NO_TRANSLITERATION
16
17#include "unicode/translit.h"
18#include "unicode/resbund.h"
19#include "unicode/uniset.h"
20#include "unicode/uscript.h"
21#include "rbt.h"
22#include "cpdtrans.h"
23#include "nultrans.h"
24#include "transreg.h"
25#include "rbt_data.h"
26#include "rbt_pars.h"
27#include "tridpars.h"
28#include "charstr.h"
29#include "uassert.h"
30#include "locutil.h"
31
32// Enable the following symbol to add debugging code that tracks the
33// allocation, deletion, and use of Entry objects.  BoundsChecker has
34// reported dangling pointer errors with these objects, but I have
35// been unable to confirm them.  I suspect BoundsChecker is getting
36// confused with pointers going into and coming out of a UHashtable,
37// despite the hinting code that is designed to help it.
38// #define DEBUG_MEM
39#ifdef DEBUG_MEM
40#include <stdio.h>
41#endif
42
43// UChar constants
44static const UChar LOCALE_SEP  = 95; // '_'
45//static const UChar ID_SEP      = 0x002D; /*-*/
46//static const UChar VARIANT_SEP = 0x002F; // '/'
47
48// String constants
49static const UChar ANY[] = { 0x41, 0x6E, 0x79, 0 }; // Any
50static const UChar LAT[] = { 0x4C, 0x61, 0x74, 0 }; // Lat
51
52// empty string
53#define NO_VARIANT UnicodeString()
54
55// initial estimate for specDAG size
56// ICU 60 Transliterator::countAvailableSources()
57#define SPECDAG_INIT_SIZE 149
58
59// initial estimate for number of variant names
60#define VARIANT_LIST_INIT_SIZE 11
61#define VARIANT_LIST_MAX_SIZE 31
62
63// initial estimate for availableIDs count (default estimate is 8 => multiple reallocs)
64// ICU 60 Transliterator::countAvailableIDs()
65#define AVAILABLE_IDS_INIT_SIZE 641
66
67// initial estimate for number of targets for source "Any", "Lat"
68// ICU 60 Transliterator::countAvailableTargets("Any")/("Latn")
69#define ANY_TARGETS_INIT_SIZE 125
70#define LAT_TARGETS_INIT_SIZE 23
71
72/**
73 * Resource bundle key for the RuleBasedTransliterator rule.
74 */
75//static const char RB_RULE[] = "Rule";
76
77U_NAMESPACE_BEGIN
78
79//------------------------------------------------------------------
80// Alias
81//------------------------------------------------------------------
82
83TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID,
84                                         const UnicodeSet* cpdFilter) :
85    ID(),
86    aliasesOrRules(theAliasID),
87    transes(0),
88    compoundFilter(cpdFilter),
89    direction(UTRANS_FORWARD),
90    type(TransliteratorAlias::SIMPLE) {
91}
92
93TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
94                                         const UnicodeString& idBlocks,
95                                         UVector* adoptedTransliterators,
96                                         const UnicodeSet* cpdFilter) :
97    ID(theID),
98    aliasesOrRules(idBlocks),
99    transes(adoptedTransliterators),
100    compoundFilter(cpdFilter),
101    direction(UTRANS_FORWARD),
102    type(TransliteratorAlias::COMPOUND) {
103}
104
105TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
106                                         const UnicodeString& rules,
107                                         UTransDirection dir) :
108    ID(theID),
109    aliasesOrRules(rules),
110    transes(0),
111    compoundFilter(0),
112    direction(dir),
113    type(TransliteratorAlias::RULES) {
114}
115
116TransliteratorAlias::~TransliteratorAlias() {
117    delete transes;
118}
119
120
121Transliterator* TransliteratorAlias::create(UParseError& pe,
122                                            UErrorCode& ec) {
123    if (U_FAILURE(ec)) {
124        return 0;
125    }
126    Transliterator *t = NULL;
127    switch (type) {
128    case SIMPLE:
129        t = Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec);
130        if(U_FAILURE(ec)){
131            return 0;
132        }
133        if (compoundFilter != 0)
134            t->adoptFilter((UnicodeSet*)compoundFilter->clone());
135        break;
136    case COMPOUND:
137        {
138            // the total number of transliterators in the compound is the total number of anonymous transliterators
139            // plus the total number of ID blocks-- we start by assuming the list begins and ends with an ID
140            // block and that each pair anonymous transliterators has an ID block between them.  Then we go back
141            // to see whether there really are ID blocks at the beginning and end (by looking for U+FFFF, which
142            // marks the position where an anonymous transliterator goes) and adjust accordingly
143            int32_t anonymousRBTs = transes->size();
144            int32_t transCount = anonymousRBTs * 2 + 1;
145            if (!aliasesOrRules.isEmpty() && aliasesOrRules[0] == (UChar)(0xffff))
146                --transCount;
147            if (aliasesOrRules.length() >= 2 && aliasesOrRules[aliasesOrRules.length() - 1] == (UChar)(0xffff))
148                --transCount;
149            UnicodeString noIDBlock((UChar)(0xffff));
150            noIDBlock += ((UChar)(0xffff));
151            int32_t pos = aliasesOrRules.indexOf(noIDBlock);
152            while (pos >= 0) {
153                --transCount;
154                pos = aliasesOrRules.indexOf(noIDBlock, pos + 1);
155            }
156
157            UVector transliterators(ec);
158            UnicodeString idBlock;
159            int32_t blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff));
160            while (blockSeparatorPos >= 0) {
161                aliasesOrRules.extract(0, blockSeparatorPos, idBlock);
162                aliasesOrRules.remove(0, blockSeparatorPos + 1);
163                if (!idBlock.isEmpty())
164                    transliterators.addElement(Transliterator::createInstance(idBlock, UTRANS_FORWARD, pe, ec), ec);
165                if (!transes->isEmpty())
166                    transliterators.addElement(transes->orphanElementAt(0), ec);
167                blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff));
168            }
169            if (!aliasesOrRules.isEmpty())
170                transliterators.addElement(Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec), ec);
171            while (!transes->isEmpty())
172                transliterators.addElement(transes->orphanElementAt(0), ec);
173
174            if (U_SUCCESS(ec)) {
175                t = new CompoundTransliterator(ID, transliterators,
176                    (compoundFilter ? (UnicodeSet*)(compoundFilter->clone()) : 0),
177                    anonymousRBTs, pe, ec);
178                if (t == 0) {
179                    ec = U_MEMORY_ALLOCATION_ERROR;
180                    return 0;
181                }
182            } else {
183                for (int32_t i = 0; i < transliterators.size(); i++)
184                    delete (Transliterator*)(transliterators.elementAt(i));
185            }
186        }
187        break;
188    case RULES:
189        U_ASSERT(FALSE); // don't call create() if isRuleBased() returns TRUE!
190        break;
191    }
192    return t;
193}
194
195UBool TransliteratorAlias::isRuleBased() const {
196    return type == RULES;
197}
198
199void TransliteratorAlias::parse(TransliteratorParser& parser,
200                                UParseError& pe, UErrorCode& ec) const {
201    U_ASSERT(type == RULES);
202    if (U_FAILURE(ec)) {
203        return;
204    }
205
206    parser.parse(aliasesOrRules, direction, pe, ec);
207}
208
209//----------------------------------------------------------------------
210// class TransliteratorSpec
211//----------------------------------------------------------------------
212
213/**
214 * A TransliteratorSpec is a string specifying either a source or a target.  In more
215 * general terms, it may also specify a variant, but we only use the
216 * Spec class for sources and targets.
217 *
218 * A Spec may be a locale or a script.  If it is a locale, it has a
219 * fallback chain that goes xx_YY_ZZZ -> xx_YY -> xx -> ssss, where
220 * ssss is the script mapping of xx_YY_ZZZ.  The Spec API methods
221 * hasFallback(), next(), and reset() iterate over this fallback
222 * sequence.
223 *
224 * The Spec class canonicalizes itself, so the locale is put into
225 * canonical form, or the script is transformed from an abbreviation
226 * to a full name.
227 */
228class TransliteratorSpec : public UMemory {
229 public:
230    TransliteratorSpec(const UnicodeString& spec);
231    ~TransliteratorSpec();
232
233    const UnicodeString& get() const;
234    UBool hasFallback() const;
235    const UnicodeString& next();
236    void reset();
237
238    UBool isLocale() const;
239    ResourceBundle& getBundle() const;
240
241    operator const UnicodeString&() const { return get(); }
242    const UnicodeString& getTop() const { return top; }
243
244 private:
245    void setupNext();
246
247    UnicodeString top;
248    UnicodeString spec;
249    UnicodeString nextSpec;
250    UnicodeString scriptName;
251    UBool isSpecLocale; // TRUE if spec is a locale
252    UBool isNextLocale; // TRUE if nextSpec is a locale
253    ResourceBundle* res;
254
255    TransliteratorSpec(const TransliteratorSpec &other); // forbid copying of this class
256    TransliteratorSpec &operator=(const TransliteratorSpec &other); // forbid copying of this class
257};
258
259TransliteratorSpec::TransliteratorSpec(const UnicodeString& theSpec)
260: top(theSpec),
261  res(0)
262{
263    UErrorCode status = U_ZERO_ERROR;
264    Locale topLoc("");
265    LocaleUtility::initLocaleFromName(theSpec, topLoc);
266    if (!topLoc.isBogus()) {
267        res = new ResourceBundle(U_ICUDATA_TRANSLIT, topLoc, status);
268        /* test for NULL */
269        if (res == 0) {
270            return;
271        }
272        if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) {
273            delete res;
274            res = 0;
275        }
276    }
277
278    // Canonicalize script name -or- do locale->script mapping
279    status = U_ZERO_ERROR;
280    static const int32_t capacity = 10;
281    UScriptCode script[capacity]={USCRIPT_INVALID_CODE};
282    int32_t num = uscript_getCode(CharString().appendInvariantChars(theSpec, status).data(),
283                                  script, capacity, &status);
284    if (num > 0 && script[0] != USCRIPT_INVALID_CODE) {
285        scriptName = UnicodeString(uscript_getName(script[0]), -1, US_INV);
286    }
287
288    // Canonicalize top
289    if (res != 0) {
290        // Canonicalize locale name
291        UnicodeString locStr;
292        LocaleUtility::initNameFromLocale(topLoc, locStr);
293        if (!locStr.isBogus()) {
294            top = locStr;
295        }
296    } else if (scriptName.length() != 0) {
297        // We are a script; use canonical name
298        top = scriptName;
299    }
300
301    // assert(spec != top);
302    reset();
303}
304
305TransliteratorSpec::~TransliteratorSpec() {
306    delete res;
307}
308
309UBool TransliteratorSpec::hasFallback() const {
310    return nextSpec.length() != 0;
311}
312
313void TransliteratorSpec::reset() {
314    if (spec != top) {
315        spec = top;
316        isSpecLocale = (res != 0);
317        setupNext();
318    }
319}
320
321void TransliteratorSpec::setupNext() {
322    isNextLocale = FALSE;
323    if (isSpecLocale) {
324        nextSpec = spec;
325        int32_t i = nextSpec.lastIndexOf(LOCALE_SEP);
326        // If i == 0 then we have _FOO, so we fall through
327        // to the scriptName.
328        if (i > 0) {
329            nextSpec.truncate(i);
330            isNextLocale = TRUE;
331        } else {
332            nextSpec = scriptName; // scriptName may be empty
333        }
334    } else {
335        // spec is a script, so we are at the end
336        nextSpec.truncate(0);
337    }
338}
339
340// Protocol:
341// for(const UnicodeString& s(spec.get());
342//     spec.hasFallback(); s(spec.next())) { ...
343
344const UnicodeString& TransliteratorSpec::next() {
345    spec = nextSpec;
346    isSpecLocale = isNextLocale;
347    setupNext();
348    return spec;
349}
350
351const UnicodeString& TransliteratorSpec::get() const {
352    return spec;
353}
354
355UBool TransliteratorSpec::isLocale() const {
356    return isSpecLocale;
357}
358
359ResourceBundle& TransliteratorSpec::getBundle() const {
360    return *res;
361}
362
363//----------------------------------------------------------------------
364
365#ifdef DEBUG_MEM
366
367// Vector of Entry pointers currently in use
368static UVector* DEBUG_entries = NULL;
369
370static void DEBUG_setup() {
371    if (DEBUG_entries == NULL) {
372        UErrorCode ec = U_ZERO_ERROR;
373        DEBUG_entries = new UVector(ec);
374    }
375}
376
377// Caller must call DEBUG_setup first.  Return index of given Entry,
378// if it is in use (not deleted yet), or -1 if not found.
379static int DEBUG_findEntry(TransliteratorEntry* e) {
380    for (int i=0; i<DEBUG_entries->size(); ++i) {
381        if (e == (TransliteratorEntry*) DEBUG_entries->elementAt(i)) {
382            return i;
383        }
384    }
385    return -1;
386}
387
388// Track object creation
389static void DEBUG_newEntry(TransliteratorEntry* e) {
390    DEBUG_setup();
391    if (DEBUG_findEntry(e) >= 0) {
392        // This should really never happen unless the heap is broken
393        printf("ERROR DEBUG_newEntry duplicate new pointer %08X\n", e);
394        return;
395    }
396    UErrorCode ec = U_ZERO_ERROR;
397    DEBUG_entries->addElement(e, ec);
398}
399
400// Track object deletion
401static void DEBUG_delEntry(TransliteratorEntry* e) {
402    DEBUG_setup();
403    int i = DEBUG_findEntry(e);
404    if (i < 0) {
405        printf("ERROR DEBUG_delEntry possible double deletion %08X\n", e);
406        return;
407    }
408    DEBUG_entries->removeElementAt(i);
409}
410
411// Track object usage
412static void DEBUG_useEntry(TransliteratorEntry* e) {
413    if (e == NULL) return;
414    DEBUG_setup();
415    int i = DEBUG_findEntry(e);
416    if (i < 0) {
417        printf("ERROR DEBUG_useEntry possible dangling pointer %08X\n", e);
418    }
419}
420
421#else
422// If we're not debugging then make these macros into NOPs
423#define DEBUG_newEntry(x)
424#define DEBUG_delEntry(x)
425#define DEBUG_useEntry(x)
426#endif
427
428//----------------------------------------------------------------------
429// class Entry
430//----------------------------------------------------------------------
431
432/**
433 * The Entry object stores objects of different types and
434 * singleton objects as placeholders for rule-based transliterators to
435 * be built as needed.  Instances of this struct can be placeholders,
436 * can represent prototype transliterators to be cloned, or can
437 * represent TransliteratorData objects.  We don't support storing
438 * classes in the registry because we don't have the rtti infrastructure
439 * for it.  We could easily add this if there is a need for it in the
440 * future.
441 */
442class TransliteratorEntry : public UMemory {
443public:
444    enum Type {
445        RULES_FORWARD,
446        RULES_REVERSE,
447        LOCALE_RULES,
448        PROTOTYPE,
449        RBT_DATA,
450        COMPOUND_RBT,
451        ALIAS,
452        FACTORY,
453        NONE // Only used for uninitialized entries
454    } entryType;
455    // NOTE: stringArg cannot go inside the union because
456    // it has a copy constructor
457    UnicodeString stringArg; // For RULES_*, ALIAS, COMPOUND_RBT
458    int32_t intArg; // For COMPOUND_RBT, LOCALE_RULES
459    UnicodeSet* compoundFilter; // For COMPOUND_RBT
460    union {
461        Transliterator* prototype; // For PROTOTYPE
462        TransliterationRuleData* data; // For RBT_DATA
463        UVector* dataVector;    // For COMPOUND_RBT
464        struct {
465            Transliterator::Factory function;
466            Transliterator::Token   context;
467        } factory; // For FACTORY
468    } u;
469    TransliteratorEntry();
470    ~TransliteratorEntry();
471    void adoptPrototype(Transliterator* adopted);
472    void setFactory(Transliterator::Factory factory,
473                    Transliterator::Token context);
474
475private:
476
477    TransliteratorEntry(const TransliteratorEntry &other); // forbid copying of this class
478    TransliteratorEntry &operator=(const TransliteratorEntry &other); // forbid copying of this class
479};
480
481TransliteratorEntry::TransliteratorEntry() {
482    u.prototype = 0;
483    compoundFilter = NULL;
484    entryType = NONE;
485    DEBUG_newEntry(this);
486}
487
488TransliteratorEntry::~TransliteratorEntry() {
489    DEBUG_delEntry(this);
490    if (entryType == PROTOTYPE) {
491        delete u.prototype;
492    } else if (entryType == RBT_DATA) {
493        // The data object is shared between instances of RBT.  The
494        // entry object owns it.  It should only be deleted when the
495        // transliterator component is being cleaned up.  Doing so
496        // invalidates any RBTs that the user has instantiated.
497        delete u.data;
498    } else if (entryType == COMPOUND_RBT) {
499        while (u.dataVector != NULL && !u.dataVector->isEmpty())
500            delete (TransliterationRuleData*)u.dataVector->orphanElementAt(0);
501        delete u.dataVector;
502    }
503    delete compoundFilter;
504}
505
506void TransliteratorEntry::adoptPrototype(Transliterator* adopted) {
507    if (entryType == PROTOTYPE) {
508        delete u.prototype;
509    }
510    entryType = PROTOTYPE;
511    u.prototype = adopted;
512}
513
514void TransliteratorEntry::setFactory(Transliterator::Factory factory,
515                       Transliterator::Token context) {
516    if (entryType == PROTOTYPE) {
517        delete u.prototype;
518    }
519    entryType = FACTORY;
520    u.factory.function = factory;
521    u.factory.context = context;
522}
523
524// UObjectDeleter for Hashtable::setValueDeleter
525U_CDECL_BEGIN
526static void U_CALLCONV
527deleteEntry(void* obj) {
528    delete (TransliteratorEntry*) obj;
529}
530U_CDECL_END
531
532//----------------------------------------------------------------------
533// class TransliteratorRegistry: Basic public API
534//----------------------------------------------------------------------
535
536TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) :
537    registry(TRUE, status),
538    specDAG(TRUE, SPECDAG_INIT_SIZE, status),
539    variantList(VARIANT_LIST_INIT_SIZE, status),
540    availableIDs(AVAILABLE_IDS_INIT_SIZE, status)
541{
542    registry.setValueDeleter(deleteEntry);
543    variantList.setDeleter(uprv_deleteUObject);
544    variantList.setComparer(uhash_compareCaselessUnicodeString);
545    UnicodeString *emptyString = new UnicodeString();
546    if (emptyString != NULL) {
547        variantList.addElement(emptyString, status);
548    }
549    availableIDs.setDeleter(uprv_deleteUObject);
550    availableIDs.setComparer(uhash_compareCaselessUnicodeString);
551    specDAG.setValueDeleter(uhash_deleteHashtable);
552}
553
554TransliteratorRegistry::~TransliteratorRegistry() {
555    // Through the magic of C++, everything cleans itself up
556}
557
558Transliterator* TransliteratorRegistry::get(const UnicodeString& ID,
559                                            TransliteratorAlias*& aliasReturn,
560                                            UErrorCode& status) {
561    U_ASSERT(aliasReturn == NULL);
562    TransliteratorEntry *entry = find(ID);
563    return (entry == 0) ? 0
564        : instantiateEntry(ID, entry, aliasReturn, status);
565}
566
567Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID,
568                                              TransliteratorParser& parser,
569                                              TransliteratorAlias*& aliasReturn,
570                                              UErrorCode& status) {
571    U_ASSERT(aliasReturn == NULL);
572    TransliteratorEntry *entry = find(ID);
573
574    if (entry == 0) {
575        // We get to this point if there are two threads, one of which
576        // is instantiating an ID, and another of which is removing
577        // the same ID from the registry, and the timing is just right.
578        return 0;
579    }
580
581    // The usage model for the caller is that they will first call
582    // reg->get() inside the mutex, they'll get back an alias, they call
583    // alias->isRuleBased(), and if they get TRUE, they call alias->parse()
584    // outside the mutex, then reg->reget() inside the mutex again.  A real
585    // mess, but it gets things working for ICU 3.0. [alan].
586
587    // Note: It's possible that in between the caller calling
588    // alias->parse() and reg->reget(), that another thread will have
589    // called reg->reget(), and the entry will already have been fixed up.
590    // We have to detect this so we don't stomp over existing entry
591    // data members and potentially leak memory (u.data and compoundFilter).
592
593    if (entry->entryType == TransliteratorEntry::RULES_FORWARD ||
594        entry->entryType == TransliteratorEntry::RULES_REVERSE ||
595        entry->entryType == TransliteratorEntry::LOCALE_RULES) {
596
597        if (parser.idBlockVector.isEmpty() && parser.dataVector.isEmpty()) {
598            entry->u.data = 0;
599            entry->entryType = TransliteratorEntry::ALIAS;
600            entry->stringArg = UNICODE_STRING_SIMPLE("Any-NULL");
601        }
602        else if (parser.idBlockVector.isEmpty() && parser.dataVector.size() == 1) {
603            entry->u.data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
604            entry->entryType = TransliteratorEntry::RBT_DATA;
605        }
606        else if (parser.idBlockVector.size() == 1 && parser.dataVector.isEmpty()) {
607            entry->stringArg = *(UnicodeString*)(parser.idBlockVector.elementAt(0));
608            entry->compoundFilter = parser.orphanCompoundFilter();
609            entry->entryType = TransliteratorEntry::ALIAS;
610        }
611        else {
612            entry->entryType = TransliteratorEntry::COMPOUND_RBT;
613            entry->compoundFilter = parser.orphanCompoundFilter();
614            entry->u.dataVector = new UVector(status);
615            entry->stringArg.remove();
616
617            int32_t limit = parser.idBlockVector.size();
618            if (parser.dataVector.size() > limit)
619                limit = parser.dataVector.size();
620
621            for (int32_t i = 0; i < limit; i++) {
622                if (i < parser.idBlockVector.size()) {
623                    UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i);
624                    if (!idBlock->isEmpty())
625                        entry->stringArg += *idBlock;
626                }
627                if (!parser.dataVector.isEmpty()) {
628                    TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
629                    entry->u.dataVector->addElement(data, status);
630                    entry->stringArg += (UChar)0xffff;  // use U+FFFF to mark position of RBTs in ID block
631                }
632            }
633        }
634    }
635
636    Transliterator *t =
637        instantiateEntry(ID, entry, aliasReturn, status);
638    return t;
639}
640
641void TransliteratorRegistry::put(Transliterator* adoptedProto,
642                                 UBool visible,
643                                 UErrorCode& ec)
644{
645    TransliteratorEntry *entry = new TransliteratorEntry();
646    if (entry == NULL) {
647        ec = U_MEMORY_ALLOCATION_ERROR;
648        return;
649    }
650    entry->adoptPrototype(adoptedProto);
651    registerEntry(adoptedProto->getID(), entry, visible);
652}
653
654void TransliteratorRegistry::put(const UnicodeString& ID,
655                                 Transliterator::Factory factory,
656                                 Transliterator::Token context,
657                                 UBool visible,
658                                 UErrorCode& ec) {
659    TransliteratorEntry *entry = new TransliteratorEntry();
660    if (entry == NULL) {
661        ec = U_MEMORY_ALLOCATION_ERROR;
662        return;
663    }
664    entry->setFactory(factory, context);
665    registerEntry(ID, entry, visible);
666}
667
668void TransliteratorRegistry::put(const UnicodeString& ID,
669                                 const UnicodeString& resourceName,
670                                 UTransDirection dir,
671                                 UBool readonlyResourceAlias,
672                                 UBool visible,
673                                 UErrorCode& ec) {
674    TransliteratorEntry *entry = new TransliteratorEntry();
675    if (entry == NULL) {
676        ec = U_MEMORY_ALLOCATION_ERROR;
677        return;
678    }
679    entry->entryType = (dir == UTRANS_FORWARD) ? TransliteratorEntry::RULES_FORWARD
680        : TransliteratorEntry::RULES_REVERSE;
681    if (readonlyResourceAlias) {
682        entry->stringArg.setTo(TRUE, resourceName.getBuffer(), -1);
683    }
684    else {
685        entry->stringArg = resourceName;
686    }
687    registerEntry(ID, entry, visible);
688}
689
690void TransliteratorRegistry::put(const UnicodeString& ID,
691                                 const UnicodeString& alias,
692                                 UBool readonlyAliasAlias,
693                                 UBool visible,
694                                 UErrorCode& /*ec*/) {
695    TransliteratorEntry *entry = new TransliteratorEntry();
696    // Null pointer check
697    if (entry != NULL) {
698        entry->entryType = TransliteratorEntry::ALIAS;
699        if (readonlyAliasAlias) {
700            entry->stringArg.setTo(TRUE, alias.getBuffer(), -1);
701        }
702        else {
703            entry->stringArg = alias;
704        }
705        registerEntry(ID, entry, visible);
706    }
707}
708
709void TransliteratorRegistry::remove(const UnicodeString& ID) {
710    UnicodeString source, target, variant;
711    UBool sawSource;
712    TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
713    // Only need to do this if ID.indexOf('-') < 0
714    UnicodeString id;
715    TransliteratorIDParser::STVtoID(source, target, variant, id);
716    registry.remove(id);
717    removeSTV(source, target, variant);
718    availableIDs.removeElement((void*) &id);
719}
720
721//----------------------------------------------------------------------
722// class TransliteratorRegistry: Public ID and spec management
723//----------------------------------------------------------------------
724
725/**
726 * == OBSOLETE - remove in ICU 3.4 ==
727 * Return the number of IDs currently registered with the system.
728 * To retrieve the actual IDs, call getAvailableID(i) with
729 * i from 0 to countAvailableIDs() - 1.
730 */
731int32_t TransliteratorRegistry::countAvailableIDs(void) const {
732    return availableIDs.size();
733}
734
735/**
736 * == OBSOLETE - remove in ICU 3.4 ==
737 * Return the index-th available ID.  index must be between 0
738 * and countAvailableIDs() - 1, inclusive.  If index is out of
739 * range, the result of getAvailableID(0) is returned.
740 */
741const UnicodeString& TransliteratorRegistry::getAvailableID(int32_t index) const {
742    if (index < 0 || index >= availableIDs.size()) {
743        index = 0;
744    }
745    return *(const UnicodeString*) availableIDs[index];
746}
747
748StringEnumeration* TransliteratorRegistry::getAvailableIDs() const {
749    return new Enumeration(*this);
750}
751
752int32_t TransliteratorRegistry::countAvailableSources(void) const {
753    return specDAG.count();
754}
755
756UnicodeString& TransliteratorRegistry::getAvailableSource(int32_t index,
757                                                          UnicodeString& result) const {
758    int32_t pos = UHASH_FIRST;
759    const UHashElement *e = 0;
760    while (index-- >= 0) {
761        e = specDAG.nextElement(pos);
762        if (e == 0) {
763            break;
764        }
765    }
766    if (e == 0) {
767        result.truncate(0);
768    } else {
769        result = *(UnicodeString*) e->key.pointer;
770    }
771    return result;
772}
773
774int32_t TransliteratorRegistry::countAvailableTargets(const UnicodeString& source) const {
775    Hashtable *targets = (Hashtable*) specDAG.get(source);
776    return (targets == 0) ? 0 : targets->count();
777}
778
779UnicodeString& TransliteratorRegistry::getAvailableTarget(int32_t index,
780                                                          const UnicodeString& source,
781                                                          UnicodeString& result) const {
782    Hashtable *targets = (Hashtable*) specDAG.get(source);
783    if (targets == 0) {
784        result.truncate(0); // invalid source
785        return result;
786    }
787    int32_t pos = UHASH_FIRST;
788    const UHashElement *e = 0;
789    while (index-- >= 0) {
790        e = targets->nextElement(pos);
791        if (e == 0) {
792            break;
793        }
794    }
795    if (e == 0) {
796        result.truncate(0); // invalid index
797    } else {
798        result = *(UnicodeString*) e->key.pointer;
799    }
800    return result;
801}
802
803int32_t TransliteratorRegistry::countAvailableVariants(const UnicodeString& source,
804                                                       const UnicodeString& target) const {
805    Hashtable *targets = (Hashtable*) specDAG.get(source);
806    if (targets == 0) {
807        return 0;
808    }
809    uint32_t varMask = targets->geti(target);
810    int32_t varCount = 0;
811    while (varMask > 0) {
812        if (varMask & 1) {
813            varCount++;
814        }
815        varMask >>= 1;
816    }
817    return varCount;
818}
819
820UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index,
821                                                           const UnicodeString& source,
822                                                           const UnicodeString& target,
823                                                           UnicodeString& result) const {
824    Hashtable *targets = (Hashtable*) specDAG.get(source);
825    if (targets == 0) {
826        result.truncate(0); // invalid source
827        return result;
828    }
829    uint32_t varMask = targets->geti(target);
830    int32_t varCount = 0;
831    int32_t varListIndex = 0;
832    while (varMask > 0) {
833        if (varMask & 1) {
834            if (varCount == index) {
835                UnicodeString *v = (UnicodeString*) variantList.elementAt(varListIndex);
836                if (v != NULL) {
837                    result = *v;
838                    return result;
839                }
840                break;
841            }
842            varCount++;
843        }
844        varMask >>= 1;
845        varListIndex++;
846    }
847    result.truncate(0); // invalid target or index
848    return result;
849}
850
851//----------------------------------------------------------------------
852// class TransliteratorRegistry::Enumeration
853//----------------------------------------------------------------------
854
855TransliteratorRegistry::Enumeration::Enumeration(const TransliteratorRegistry& _reg) :
856    index(0), reg(_reg) {
857}
858
859TransliteratorRegistry::Enumeration::~Enumeration() {
860}
861
862int32_t TransliteratorRegistry::Enumeration::count(UErrorCode& /*status*/) const {
863    return reg.availableIDs.size();
864}
865
866const UnicodeString* TransliteratorRegistry::Enumeration::snext(UErrorCode& status) {
867    // This is sloppy but safe -- if we get out of sync with the underlying
868    // registry, we will still return legal strings, but they might not
869    // correspond to the snapshot at construction time.  So there could be
870    // duplicate IDs or omitted IDs if insertions or deletions occur in one
871    // thread while another is iterating.  To be more rigorous, add a timestamp,
872    // which is incremented with any modification, and validate this iterator
873    // against the timestamp at construction time.  This probably isn't worth
874    // doing as long as there is some possibility of removing this code in favor
875    // of some new code based on Doug's service framework.
876    if (U_FAILURE(status)) {
877        return NULL;
878    }
879    int32_t n = reg.availableIDs.size();
880    if (index > n) {
881        status = U_ENUM_OUT_OF_SYNC_ERROR;
882    }
883    // index == n is okay -- this means we've reached the end
884    if (index < n) {
885        // Copy the string! This avoids lifetime problems.
886        unistr = *(const UnicodeString*)reg.availableIDs[index++];
887        return &unistr;
888    } else {
889        return NULL;
890    }
891}
892
893void TransliteratorRegistry::Enumeration::reset(UErrorCode& /*status*/) {
894    index = 0;
895}
896
897UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TransliteratorRegistry::Enumeration)
898
899//----------------------------------------------------------------------
900// class TransliteratorRegistry: internal
901//----------------------------------------------------------------------
902
903/**
904 * Convenience method.  Calls 6-arg registerEntry().
905 */
906void TransliteratorRegistry::registerEntry(const UnicodeString& source,
907                                           const UnicodeString& target,
908                                           const UnicodeString& variant,
909                                           TransliteratorEntry* adopted,
910                                           UBool visible) {
911    UnicodeString ID;
912    UnicodeString s(source);
913    if (s.length() == 0) {
914        s.setTo(TRUE, ANY, 3);
915    }
916    TransliteratorIDParser::STVtoID(source, target, variant, ID);
917    registerEntry(ID, s, target, variant, adopted, visible);
918}
919
920/**
921 * Convenience method.  Calls 6-arg registerEntry().
922 */
923void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
924                                           TransliteratorEntry* adopted,
925                                           UBool visible) {
926    UnicodeString source, target, variant;
927    UBool sawSource;
928    TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
929    // Only need to do this if ID.indexOf('-') < 0
930    UnicodeString id;
931    TransliteratorIDParser::STVtoID(source, target, variant, id);
932    registerEntry(id, source, target, variant, adopted, visible);
933}
934
935/**
936 * Register an entry object (adopted) with the given ID, source,
937 * target, and variant strings.
938 */
939void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
940                                           const UnicodeString& source,
941                                           const UnicodeString& target,
942                                           const UnicodeString& variant,
943                                           TransliteratorEntry* adopted,
944                                           UBool visible) {
945    UErrorCode status = U_ZERO_ERROR;
946    registry.put(ID, adopted, status);
947    if (visible) {
948        registerSTV(source, target, variant);
949        if (!availableIDs.contains((void*) &ID)) {
950            UnicodeString *newID = (UnicodeString *)ID.clone();
951            // Check to make sure newID was created.
952            if (newID != NULL) {
953                // NUL-terminate the ID string
954                newID->getTerminatedBuffer();
955                availableIDs.addElement(newID, status);
956            }
957        }
958    } else {
959        removeSTV(source, target, variant);
960        availableIDs.removeElement((void*) &ID);
961    }
962}
963
964/**
965 * Register a source-target/variant in the specDAG.  Variant may be
966 * empty, but source and target must not be.
967 */
968void TransliteratorRegistry::registerSTV(const UnicodeString& source,
969                                         const UnicodeString& target,
970                                         const UnicodeString& variant) {
971    // assert(source.length() > 0);
972    // assert(target.length() > 0);
973    UErrorCode status = U_ZERO_ERROR;
974    Hashtable *targets = (Hashtable*) specDAG.get(source);
975    if (targets == 0) {
976        int32_t size = 3;
977        if (source.compare(ANY,3) == 0) {
978            size = ANY_TARGETS_INIT_SIZE;
979        } else if (source.compare(LAT,3) == 0) {
980            size = LAT_TARGETS_INIT_SIZE;
981        }
982        targets = new Hashtable(TRUE, size, status);
983        if (U_FAILURE(status) || targets == NULL) {
984            return;
985        }
986        specDAG.put(source, targets, status);
987    }
988    int32_t variantListIndex = variantList.indexOf((void*) &variant, 0);
989    if (variantListIndex < 0) {
990        if (variantList.size() >= VARIANT_LIST_MAX_SIZE) {
991            // can't handle any more variants
992            return;
993        }
994        UnicodeString *variantEntry = new UnicodeString(variant);
995        if (variantEntry != NULL) {
996            variantList.addElement(variantEntry, status);
997            if (U_SUCCESS(status)) {
998                variantListIndex = variantList.size() - 1;
999            }
1000        }
1001        if (variantListIndex < 0) {
1002            return;
1003        }
1004    }
1005    uint32_t addMask = 1 << variantListIndex;
1006    uint32_t varMask = targets->geti(target);
1007    targets->puti(target, varMask | addMask, status);
1008}
1009
1010/**
1011 * Remove a source-target/variant from the specDAG.
1012 */
1013void TransliteratorRegistry::removeSTV(const UnicodeString& source,
1014                                       const UnicodeString& target,
1015                                       const UnicodeString& variant) {
1016    // assert(source.length() > 0);
1017    // assert(target.length() > 0);
1018    UErrorCode status = U_ZERO_ERROR;
1019    Hashtable *targets = (Hashtable*) specDAG.get(source);
1020    if (targets == NULL) {
1021        return; // should never happen for valid s-t/v
1022    }
1023    uint32_t varMask = targets->geti(target);
1024    if (varMask == 0) {
1025        return; // should never happen for valid s-t/v
1026    }
1027    int32_t variantListIndex = variantList.indexOf((void*) &variant, 0);
1028    if (variantListIndex < 0) {
1029        return; // should never happen for valid s-t/v
1030    }
1031    int32_t remMask = 1 << variantListIndex;
1032    varMask &= (~remMask);
1033    if (varMask != 0) {
1034        targets->puti(target, varMask, status);
1035    } else {
1036        targets->remove(target); // should delete variants
1037        if (targets->count() == 0) {
1038            specDAG.remove(source); // should delete targets
1039        }
1040    }
1041}
1042
1043/**
1044 * Attempt to find a source-target/variant in the dynamic registry
1045 * store.  Return 0 on failure.
1046 *
1047 * Caller does NOT own returned object.
1048 */
1049TransliteratorEntry* TransliteratorRegistry::findInDynamicStore(const TransliteratorSpec& src,
1050                                                  const TransliteratorSpec& trg,
1051                                                  const UnicodeString& variant) const {
1052    UnicodeString ID;
1053    TransliteratorIDParser::STVtoID(src, trg, variant, ID);
1054    TransliteratorEntry *e = (TransliteratorEntry*) registry.get(ID);
1055    DEBUG_useEntry(e);
1056    return e;
1057}
1058
1059/**
1060 * Attempt to find a source-target/variant in the static locale
1061 * resource store.  Do not perform fallback.  Return 0 on failure.
1062 *
1063 * On success, create a new entry object, register it in the dynamic
1064 * store, and return a pointer to it, but do not make it public --
1065 * just because someone requested something, we do not expand the
1066 * available ID list (or spec DAG).
1067 *
1068 * Caller does NOT own returned object.
1069 */
1070TransliteratorEntry* TransliteratorRegistry::findInStaticStore(const TransliteratorSpec& src,
1071                                                 const TransliteratorSpec& trg,
1072                                                 const UnicodeString& variant) {
1073    TransliteratorEntry* entry = 0;
1074    if (src.isLocale()) {
1075        entry = findInBundle(src, trg, variant, UTRANS_FORWARD);
1076    } else if (trg.isLocale()) {
1077        entry = findInBundle(trg, src, variant, UTRANS_REVERSE);
1078    }
1079
1080    // If we found an entry, store it in the Hashtable for next
1081    // time.
1082    if (entry != 0) {
1083        registerEntry(src.getTop(), trg.getTop(), variant, entry, FALSE);
1084    }
1085
1086    return entry;
1087}
1088
1089// As of 2.0, resource bundle keys cannot contain '_'
1090static const UChar TRANSLITERATE_TO[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,84,111,0}; // "TransliterateTo"
1091
1092static const UChar TRANSLITERATE_FROM[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,70,114,111,109,0}; // "TransliterateFrom"
1093
1094static const UChar TRANSLITERATE[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,0}; // "Transliterate"
1095
1096/**
1097 * Attempt to find an entry in a single resource bundle.  This is
1098 * a one-sided lookup.  findInStaticStore() performs up to two such
1099 * lookups, one for the source, and one for the target.
1100 *
1101 * Do not perform fallback.  Return 0 on failure.
1102 *
1103 * On success, create a new Entry object, populate it, and return it.
1104 * The caller owns the returned object.
1105 */
1106TransliteratorEntry* TransliteratorRegistry::findInBundle(const TransliteratorSpec& specToOpen,
1107                                            const TransliteratorSpec& specToFind,
1108                                            const UnicodeString& variant,
1109                                            UTransDirection direction)
1110{
1111    UnicodeString utag;
1112    UnicodeString resStr;
1113    int32_t pass;
1114
1115    for (pass=0; pass<2; ++pass) {
1116        utag.truncate(0);
1117        // First try either TransliteratorTo_xxx or
1118        // TransliterateFrom_xxx, then try the bidirectional
1119        // Transliterate_xxx.  This precedence order is arbitrary
1120        // but must be consistent and documented.
1121        if (pass == 0) {
1122            utag.append(direction == UTRANS_FORWARD ?
1123                        TRANSLITERATE_TO : TRANSLITERATE_FROM, -1);
1124        } else {
1125            utag.append(TRANSLITERATE, -1);
1126        }
1127        UnicodeString s(specToFind.get());
1128        utag.append(s.toUpper(""));
1129        UErrorCode status = U_ZERO_ERROR;
1130        ResourceBundle subres(specToOpen.getBundle().get(
1131            CharString().appendInvariantChars(utag, status).data(), status));
1132        if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) {
1133            continue;
1134        }
1135
1136        s.truncate(0);
1137        if (specToOpen.get() != LocaleUtility::initNameFromLocale(subres.getLocale(), s)) {
1138            continue;
1139        }
1140
1141        if (variant.length() != 0) {
1142            status = U_ZERO_ERROR;
1143            resStr = subres.getStringEx(
1144                CharString().appendInvariantChars(variant, status).data(), status);
1145            if (U_SUCCESS(status)) {
1146                // Exit loop successfully
1147                break;
1148            }
1149        } else {
1150            // Variant is empty, which means match the first variant listed.
1151            status = U_ZERO_ERROR;
1152            resStr = subres.getStringEx(1, status);
1153            if (U_SUCCESS(status)) {
1154                // Exit loop successfully
1155                break;
1156            }
1157        }
1158    }
1159
1160    if (pass==2) {
1161        // Failed
1162        return NULL;
1163    }
1164
1165    // We have succeeded in loading a string from the locale
1166    // resources.  Create a new registry entry to hold it and return it.
1167    TransliteratorEntry *entry = new TransliteratorEntry();
1168    if (entry != 0) {
1169        // The direction is always forward for the
1170        // TransliterateTo_xxx and TransliterateFrom_xxx
1171        // items; those are unidirectional forward rules.
1172        // For the bidirectional Transliterate_xxx items,
1173        // the direction is the value passed in to this
1174        // function.
1175        int32_t dir = (pass == 0) ? UTRANS_FORWARD : direction;
1176        entry->entryType = TransliteratorEntry::LOCALE_RULES;
1177        entry->stringArg = resStr;
1178        entry->intArg = dir;
1179    }
1180
1181    return entry;
1182}
1183
1184/**
1185 * Convenience method.  Calls 3-arg find().
1186 */
1187TransliteratorEntry* TransliteratorRegistry::find(const UnicodeString& ID) {
1188    UnicodeString source, target, variant;
1189    UBool sawSource;
1190    TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
1191    return find(source, target, variant);
1192}
1193
1194/**
1195 * Top-level find method.  Attempt to find a source-target/variant in
1196 * either the dynamic or the static (locale resource) store.  Perform
1197 * fallback.
1198 *
1199 * Lookup sequence for ss_SS_SSS-tt_TT_TTT/v:
1200 *
1201 *   ss_SS_SSS-tt_TT_TTT/v -- in hashtable
1202 *   ss_SS_SSS-tt_TT_TTT/v -- in ss_SS_SSS (no fallback)
1203 *
1204 *     repeat with t = tt_TT_TTT, tt_TT, tt, and tscript
1205 *
1206 *     ss_SS_SSS-t/ *
1207 *     ss_SS-t/ *
1208 *     ss-t/ *
1209 *     sscript-t/ *
1210 *
1211 * Here * matches the first variant listed.
1212 *
1213 * Caller does NOT own returned object.  Return 0 on failure.
1214 */
1215TransliteratorEntry* TransliteratorRegistry::find(UnicodeString& source,
1216                                    UnicodeString& target,
1217                                    UnicodeString& variant) {
1218
1219    TransliteratorSpec src(source);
1220    TransliteratorSpec trg(target);
1221    TransliteratorEntry* entry;
1222
1223    // Seek exact match in hashtable.  Temporary fix for ICU 4.6.
1224    // TODO: The general logic for finding a matching transliterator needs to be reviewed.
1225    // ICU ticket #8089
1226    UnicodeString ID;
1227    TransliteratorIDParser::STVtoID(source, target, variant, ID);
1228    entry = (TransliteratorEntry*) registry.get(ID);
1229    if (entry != 0) {
1230        // std::string ss;
1231        // std::cout << ID.toUTF8String(ss) << std::endl;
1232        return entry;
1233    }
1234
1235    if (variant.length() != 0) {
1236
1237        // Seek exact match in hashtable
1238        entry = findInDynamicStore(src, trg, variant);
1239        if (entry != 0) {
1240            return entry;
1241        }
1242
1243        // Seek exact match in locale resources
1244        entry = findInStaticStore(src, trg, variant);
1245        if (entry != 0) {
1246            return entry;
1247        }
1248    }
1249
1250    for (;;) {
1251        src.reset();
1252        for (;;) {
1253            // Seek match in hashtable
1254            entry = findInDynamicStore(src, trg, NO_VARIANT);
1255            if (entry != 0) {
1256                return entry;
1257            }
1258
1259            // Seek match in locale resources
1260            entry = findInStaticStore(src, trg, NO_VARIANT);
1261            if (entry != 0) {
1262                return entry;
1263            }
1264            if (!src.hasFallback()) {
1265                break;
1266            }
1267            src.next();
1268        }
1269        if (!trg.hasFallback()) {
1270            break;
1271        }
1272        trg.next();
1273    }
1274
1275    return 0;
1276}
1277
1278/**
1279 * Given an Entry object, instantiate it.  Caller owns result.  Return
1280 * 0 on failure.
1281 *
1282 * Return a non-empty aliasReturn value if the ID points to an alias.
1283 * We cannot instantiate it ourselves because the alias may contain
1284 * filters or compounds, which we do not understand.  Caller should
1285 * make aliasReturn empty before calling.
1286 *
1287 * The entry object is assumed to reside in the dynamic store.  It may be
1288 * modified.
1289 */
1290Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID,
1291                                                         TransliteratorEntry *entry,
1292                                                         TransliteratorAlias* &aliasReturn,
1293                                                         UErrorCode& status) {
1294    Transliterator *t = 0;
1295    U_ASSERT(aliasReturn == 0);
1296
1297    switch (entry->entryType) {
1298    case TransliteratorEntry::RBT_DATA:
1299        t = new RuleBasedTransliterator(ID, entry->u.data);
1300        if (t == 0) {
1301            status = U_MEMORY_ALLOCATION_ERROR;
1302        }
1303        return t;
1304    case TransliteratorEntry::PROTOTYPE:
1305        t = entry->u.prototype->clone();
1306        if (t == 0) {
1307            status = U_MEMORY_ALLOCATION_ERROR;
1308        }
1309        return t;
1310    case TransliteratorEntry::ALIAS:
1311        aliasReturn = new TransliteratorAlias(entry->stringArg, entry->compoundFilter);
1312        if (aliasReturn == 0) {
1313            status = U_MEMORY_ALLOCATION_ERROR;
1314        }
1315        return 0;
1316    case TransliteratorEntry::FACTORY:
1317        t = entry->u.factory.function(ID, entry->u.factory.context);
1318        if (t == 0) {
1319            status = U_MEMORY_ALLOCATION_ERROR;
1320        }
1321        return t;
1322    case TransliteratorEntry::COMPOUND_RBT:
1323        {
1324            UVector* rbts = new UVector(entry->u.dataVector->size(), status);
1325            // Check for null pointer
1326            if (rbts == NULL) {
1327                status = U_MEMORY_ALLOCATION_ERROR;
1328                return NULL;
1329            }
1330            int32_t passNumber = 1;
1331            for (int32_t i = 0; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) {
1332                // TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")?
1333                Transliterator* t = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++),
1334                    (TransliterationRuleData*)(entry->u.dataVector->elementAt(i)), FALSE);
1335                if (t == 0)
1336                    status = U_MEMORY_ALLOCATION_ERROR;
1337                else
1338                    rbts->addElement(t, status);
1339            }
1340            if (U_FAILURE(status)) {
1341                delete rbts;
1342                return 0;
1343            }
1344            aliasReturn = new TransliteratorAlias(ID, entry->stringArg, rbts, entry->compoundFilter);
1345        }
1346        if (aliasReturn == 0) {
1347            status = U_MEMORY_ALLOCATION_ERROR;
1348        }
1349        return 0;
1350    case TransliteratorEntry::LOCALE_RULES:
1351        aliasReturn = new TransliteratorAlias(ID, entry->stringArg,
1352                                              (UTransDirection) entry->intArg);
1353        if (aliasReturn == 0) {
1354            status = U_MEMORY_ALLOCATION_ERROR;
1355        }
1356        return 0;
1357    case TransliteratorEntry::RULES_FORWARD:
1358    case TransliteratorEntry::RULES_REVERSE:
1359        // Process the rule data into a TransliteratorRuleData object,
1360        // and possibly also into an ::id header and/or footer.  Then
1361        // we modify the registry with the parsed data and retry.
1362        {
1363            TransliteratorParser parser(status);
1364
1365            // We use the file name, taken from another resource bundle
1366            // 2-d array at static init time, as a locale language.  We're
1367            // just using the locale mechanism to map through to a file
1368            // name; this in no way represents an actual locale.
1369            //CharString ch(entry->stringArg);
1370            //UResourceBundle *bundle = ures_openDirect(0, ch, &status);
1371            UnicodeString rules = entry->stringArg;
1372            //ures_close(bundle);
1373
1374            //if (U_FAILURE(status)) {
1375                // We have a failure of some kind.  Remove the ID from the
1376                // registry so we don't keep trying.  NOTE: This will throw off
1377                // anyone who is, at the moment, trying to iterate over the
1378                // available IDs.  That's acceptable since we should never
1379                // really get here except under installation, configuration,
1380                // or unrecoverable run time memory failures.
1381            //    remove(ID);
1382            //} else {
1383
1384                // If the status indicates a failure, then we don't have any
1385                // rules -- there is probably an installation error.  The list
1386                // in the root locale should correspond to all the installed
1387                // transliterators; if it lists something that's not
1388                // installed, we'll get an error from ResourceBundle.
1389                aliasReturn = new TransliteratorAlias(ID, rules,
1390                    ((entry->entryType == TransliteratorEntry::RULES_REVERSE) ?
1391                     UTRANS_REVERSE : UTRANS_FORWARD));
1392                if (aliasReturn == 0) {
1393                    status = U_MEMORY_ALLOCATION_ERROR;
1394                }
1395            //}
1396        }
1397        return 0;
1398    default:
1399        U_ASSERT(FALSE); // can't get here
1400        return 0;
1401    }
1402}
1403U_NAMESPACE_END
1404
1405#endif /* #if !UCONFIG_NO_TRANSLITERATION */
1406
1407//eof
1408