1/*
2**********************************************************************
3*   Copyright (c) 2001-2008, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*   Date        Name        Description
7*   08/10/2001  aliu        Creation.
8**********************************************************************
9*/
10#ifndef _TRANSREG_H
11#define _TRANSREG_H
12
13#include "unicode/utypes.h"
14
15#if !UCONFIG_NO_TRANSLITERATION
16
17#include "unicode/uobject.h"
18#include "unicode/translit.h"
19#include "hash.h"
20#include "uvector.h"
21
22U_NAMESPACE_BEGIN
23
24class TransliteratorEntry;
25class TransliteratorSpec;
26class UnicodeString;
27
28//------------------------------------------------------------------
29// TransliteratorAlias
30//------------------------------------------------------------------
31
32/**
33 * A TransliteratorAlias object is returned by get() if the given ID
34 * actually translates into something else.  The caller then invokes
35 * the create() method on the alias to create the actual
36 * transliterator, and deletes the alias.
37 *
38 * Why all the shenanigans?  To prevent circular calls between
39 * the registry code and the transliterator code that deadlocks.
40 */
41class TransliteratorAlias : public UMemory {
42 public:
43    /**
44     * Construct a simple alias (type == SIMPLE)
45     * @param aliasID the given id.
46     */
47    TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);
48
49    /**
50     * Construct a compound RBT alias (type == COMPOUND)
51     */
52    TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
53                        UVector* adoptedTransliterators,
54                        const UnicodeSet* compoundFilter);
55
56    /**
57     * Construct a rules alias (type = RULES)
58     */
59    TransliteratorAlias(const UnicodeString& theID,
60                        const UnicodeString& rules,
61                        UTransDirection dir);
62
63    ~TransliteratorAlias();
64
65    /**
66     * The whole point of create() is that the caller must invoke
67     * it when the registry mutex is NOT held, to prevent deadlock.
68     * It may only be called once.
69     *
70     * Note: Only call create() if isRuleBased() returns FALSE.
71     *
72     * This method must be called *outside* of the TransliteratorRegistry
73     * mutex.
74     */
75    Transliterator* create(UParseError&, UErrorCode&);
76
77    /**
78     * Return TRUE if this alias is rule-based.  If so, the caller
79     * must call parse() on it, then call TransliteratorRegistry::reget().
80     */
81    UBool isRuleBased() const;
82
83    /**
84     * If isRuleBased() returns TRUE, then the caller must call this
85     * method, followed by TransliteratorRegistry::reget().  The latter
86     * method must be called inside the TransliteratorRegistry mutex.
87     *
88     * Note: Only call parse() if isRuleBased() returns TRUE.
89     *
90     * This method must be called *outside* of the TransliteratorRegistry
91     * mutex, because it can instantiate Transliterators embedded in
92     * the rules via the "&Latin-Arabic()" syntax.
93     */
94    void parse(TransliteratorParser& parser,
95               UParseError& pe, UErrorCode& ec) const;
96
97 private:
98    // We actually come in three flavors:
99    // 1. Simple alias
100    //    Here aliasID is the alias string.  Everything else is
101    //    null, zero, empty.
102    // 2. CompoundRBT
103    //    Here ID is the ID, aliasID is the idBlock, trans is the
104    //    contained RBT, and idSplitPoint is the offet in aliasID
105    //    where the contained RBT goes.  compoundFilter is the
106    //    compound filter, and it is _not_ owned.
107    // 3. Rules
108    //    Here ID is the ID, aliasID is the rules string.
109    //    idSplitPoint is the UTransDirection.
110    UnicodeString ID;
111    UnicodeString aliasesOrRules;
112    UVector* transes; // owned
113    const UnicodeSet* compoundFilter; // alias
114    UTransDirection direction;
115    enum { SIMPLE, COMPOUND, RULES } type;
116
117    TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
118    TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
119};
120
121
122/**
123 * A registry of system transliterators.  This is the data structure
124 * that implements the mapping between transliterator IDs and the data
125 * or function pointers used to create the corresponding
126 * transliterators.  There is one instance of the registry that is
127 * created statically.
128 *
129 * The registry consists of a dynamic component -- a hashtable -- and
130 * a static component -- locale resource bundles.  The dynamic store
131 * is semantically overlaid on the static store, so the static mapping
132 * can be dynamically overridden.
133 *
134 * This is an internal class that is only used by Transliterator.
135 * Transliterator maintains one static instance of this class and
136 * delegates all registry-related operations to it.
137 *
138 * @author Alan Liu
139 */
140class TransliteratorRegistry : public UMemory {
141
142 public:
143
144    /**
145     * Contructor
146     * @param status Output param set to success/failure code.
147     */
148    TransliteratorRegistry(UErrorCode& status);
149
150    /**
151     * Nonvirtual destructor -- this class is not subclassable.
152     */
153    ~TransliteratorRegistry();
154
155    //------------------------------------------------------------------
156    // Basic public API
157    //------------------------------------------------------------------
158
159    /**
160     * Given a simple ID (forward direction, no inline filter, not
161     * compound) attempt to instantiate it from the registry.  Return
162     * 0 on failure.
163     *
164     * Return a non-NULL aliasReturn value if the ID points to an alias.
165     * We cannot instantiate it ourselves because the alias may contain
166     * filters or compounds, which we do not understand.  Caller should
167     * make aliasReturn NULL before calling.
168     * @param ID          the given ID
169     * @param aliasReturn output param to receive TransliteratorAlias;
170     *                    should be NULL on entry
171     * @param parseError  Struct to recieve information on position
172     *                    of error if an error is encountered
173     * @param status      Output param set to success/failure code.
174     */
175    Transliterator* get(const UnicodeString& ID,
176                        TransliteratorAlias*& aliasReturn,
177                        UErrorCode& status);
178
179    /**
180     * The caller must call this after calling get(), if [a] calling get()
181     * returns an alias, and [b] the alias is rule based.  In that
182     * situation the caller must call alias->parse() to do the parsing
183     * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
184     * instantiating the transliterator.
185     *
186     * Note: Another alias might be returned by this method.
187     *
188     * This method (like all public methods of this class) must be called
189     * from within the TransliteratorRegistry mutex.
190     *
191     * @param aliasReturn output param to receive TransliteratorAlias;
192     *                    should be NULL on entry
193     */
194    Transliterator* reget(const UnicodeString& ID,
195                          TransliteratorParser& parser,
196                          TransliteratorAlias*& aliasReturn,
197                          UErrorCode& status);
198
199    /**
200     * Register a prototype (adopted).  This adds an entry to the
201     * dynamic store, or replaces an existing entry.  Any entry in the
202     * underlying static locale resource store is masked.
203     */
204    void put(Transliterator* adoptedProto,
205             UBool visible,
206             UErrorCode& ec);
207
208    /**
209     * Register an ID and a factory function pointer.  This adds an
210     * entry to the dynamic store, or replaces an existing entry.  Any
211     * entry in the underlying static locale resource store is masked.
212     */
213    void put(const UnicodeString& ID,
214             Transliterator::Factory factory,
215             Transliterator::Token context,
216             UBool visible,
217             UErrorCode& ec);
218
219    /**
220     * Register an ID and a resource name.  This adds an entry to the
221     * dynamic store, or replaces an existing entry.  Any entry in the
222     * underlying static locale resource store is masked.
223     */
224    void put(const UnicodeString& ID,
225             const UnicodeString& resourceName,
226             UTransDirection dir,
227             UBool readonlyResourceAlias,
228             UBool visible,
229             UErrorCode& ec);
230
231    /**
232     * Register an ID and an alias ID.  This adds an entry to the
233     * dynamic store, or replaces an existing entry.  Any entry in the
234     * underlying static locale resource store is masked.
235     */
236    void put(const UnicodeString& ID,
237             const UnicodeString& alias,
238             UBool readonlyAliasAlias,
239             UBool visible,
240             UErrorCode& ec);
241
242    /**
243     * Unregister an ID.  This removes an entry from the dynamic store
244     * if there is one.  The static locale resource store is
245     * unaffected.
246     * @param ID    the given ID.
247     */
248    void remove(const UnicodeString& ID);
249
250    //------------------------------------------------------------------
251    // Public ID and spec management
252    //------------------------------------------------------------------
253
254    /**
255     * Return a StringEnumeration over the IDs currently registered
256     * with the system.
257     * @internal
258     */
259    StringEnumeration* getAvailableIDs() const;
260
261    /**
262     * == OBSOLETE - remove in ICU 3.4 ==
263     * Return the number of IDs currently registered with the system.
264     * To retrieve the actual IDs, call getAvailableID(i) with
265     * i from 0 to countAvailableIDs() - 1.
266     * @return the number of IDs currently registered with the system.
267     * @internal
268     */
269    int32_t countAvailableIDs(void) const;
270
271    /**
272     * == OBSOLETE - remove in ICU 3.4 ==
273     * Return the index-th available ID.  index must be between 0
274     * and countAvailableIDs() - 1, inclusive.  If index is out of
275     * range, the result of getAvailableID(0) is returned.
276     * @param index the given index.
277     * @return the index-th available ID.  index must be between 0
278     *         and countAvailableIDs() - 1, inclusive.  If index is out of
279     *         range, the result of getAvailableID(0) is returned.
280     * @internal
281     */
282    const UnicodeString& getAvailableID(int32_t index) const;
283
284    /**
285     * Return the number of registered source specifiers.
286     * @return the number of registered source specifiers.
287     */
288    int32_t countAvailableSources(void) const;
289
290    /**
291     * Return a registered source specifier.
292     * @param index which specifier to return, from 0 to n-1, where
293     * n = countAvailableSources()
294     * @param result fill-in paramter to receive the source specifier.
295     * If index is out of range, result will be empty.
296     * @return reference to result
297     */
298    UnicodeString& getAvailableSource(int32_t index,
299                                      UnicodeString& result) const;
300
301    /**
302     * Return the number of registered target specifiers for a given
303     * source specifier.
304     * @param source the given source specifier.
305     * @return the number of registered target specifiers for a given
306     *         source specifier.
307     */
308    int32_t countAvailableTargets(const UnicodeString& source) const;
309
310    /**
311     * Return a registered target specifier for a given source.
312     * @param index which specifier to return, from 0 to n-1, where
313     * n = countAvailableTargets(source)
314     * @param source the source specifier
315     * @param result fill-in paramter to receive the target specifier.
316     * If source is invalid or if index is out of range, result will
317     * be empty.
318     * @return reference to result
319     */
320    UnicodeString& getAvailableTarget(int32_t index,
321                                      const UnicodeString& source,
322                                      UnicodeString& result) const;
323
324    /**
325     * Return the number of registered variant specifiers for a given
326     * source-target pair.  There is always at least one variant: If
327     * just source-target is registered, then the single variant
328     * NO_VARIANT is returned.  If source-target/variant is registered
329     * then that variant is returned.
330     * @param source the source specifiers
331     * @param target the target specifiers
332     * @return the number of registered variant specifiers for a given
333     *         source-target pair.
334     */
335    int32_t countAvailableVariants(const UnicodeString& source,
336                                   const UnicodeString& target) const;
337
338    /**
339     * Return a registered variant specifier for a given source-target
340     * pair.  If NO_VARIANT is one of the variants, then it will be
341     * at index 0.
342     * @param index which specifier to return, from 0 to n-1, where
343     * n = countAvailableVariants(source, target)
344     * @param source the source specifier
345     * @param target the target specifier
346     * @param result fill-in paramter to receive the variant
347     * specifier.  If source is invalid or if target is invalid or if
348     * index is out of range, result will be empty.
349     * @return reference to result
350     */
351    UnicodeString& getAvailableVariant(int32_t index,
352                                       const UnicodeString& source,
353                                       const UnicodeString& target,
354                                       UnicodeString& result) const;
355
356 private:
357
358    //----------------------------------------------------------------
359    // Private implementation
360    //----------------------------------------------------------------
361
362    TransliteratorEntry* find(const UnicodeString& ID);
363
364    TransliteratorEntry* find(UnicodeString& source,
365                UnicodeString& target,
366                UnicodeString& variant);
367
368    TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src,
369                              const TransliteratorSpec& trg,
370                              const UnicodeString& variant) const;
371
372    TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src,
373                             const TransliteratorSpec& trg,
374                             const UnicodeString& variant);
375
376    static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen,
377                               const TransliteratorSpec& specToFind,
378                               const UnicodeString& variant,
379                               UTransDirection direction);
380
381    void registerEntry(const UnicodeString& source,
382                       const UnicodeString& target,
383                       const UnicodeString& variant,
384                       TransliteratorEntry* adopted,
385                       UBool visible);
386
387    void registerEntry(const UnicodeString& ID,
388                       TransliteratorEntry* adopted,
389                       UBool visible);
390
391    void registerEntry(const UnicodeString& ID,
392                       const UnicodeString& source,
393                       const UnicodeString& target,
394                       const UnicodeString& variant,
395                       TransliteratorEntry* adopted,
396                       UBool visible);
397
398    void registerSTV(const UnicodeString& source,
399                     const UnicodeString& target,
400                     const UnicodeString& variant);
401
402    void removeSTV(const UnicodeString& source,
403                   const UnicodeString& target,
404                   const UnicodeString& variant);
405
406    Transliterator* instantiateEntry(const UnicodeString& ID,
407                                     TransliteratorEntry *entry,
408                                     TransliteratorAlias*& aliasReturn,
409                                     UErrorCode& status);
410
411    /**
412     * A StringEnumeration over the registered IDs in this object.
413     */
414    class Enumeration : public StringEnumeration {
415    public:
416        Enumeration(const TransliteratorRegistry& reg);
417        virtual ~Enumeration();
418        virtual int32_t count(UErrorCode& status) const;
419        virtual const UnicodeString* snext(UErrorCode& status);
420        virtual void reset(UErrorCode& status);
421        static UClassID U_EXPORT2 getStaticClassID();
422        virtual UClassID getDynamicClassID() const;
423    private:
424        int32_t index;
425        const TransliteratorRegistry& reg;
426    };
427    friend class Enumeration;
428
429 private:
430
431    /**
432     * Dynamic registry mapping full IDs to Entry objects.  This
433     * contains both public and internal entities.  The visibility is
434     * controlled by whether an entry is listed in availableIDs and
435     * specDAG or not.
436     */
437    Hashtable registry;
438
439    /**
440     * DAG of visible IDs by spec.  Hashtable: source => (Hashtable:
441     * target => (UVector: variant)) The UVector of variants is never
442     * empty.  For a source-target with no variant, the special
443     * variant NO_VARIANT (the empty string) is stored in slot zero of
444     * the UVector.
445     */
446    Hashtable specDAG;
447
448    /**
449     * Vector of public full IDs.
450     */
451    UVector availableIDs;
452
453    TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
454    TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
455};
456
457U_NAMESPACE_END
458
459#endif /* #if !UCONFIG_NO_TRANSLITERATION */
460
461#endif
462//eof
463