1/*
2**********************************************************************
3* Copyright (C) 1999-2007, International Business Machines Corporation
4* and others. All Rights Reserved.
5**********************************************************************
6*   Date        Name        Description
7*   11/17/99    aliu        Creation.
8**********************************************************************
9*/
10#ifndef RBT_DATA_H
11#define RBT_DATA_H
12
13#include "unicode/utypes.h"
14#include "unicode/uclean.h"
15
16#if !UCONFIG_NO_TRANSLITERATION
17
18#include "unicode/uobject.h"
19#include "rbt_set.h"
20#include "hash.h"
21
22U_NAMESPACE_BEGIN
23
24class UnicodeFunctor;
25class UnicodeMatcher;
26class UnicodeReplacer;
27
28/**
29 * The rule data for a RuleBasedTransliterators.  RBT objects hold
30 * a const pointer to a TRD object that they do not own.  TRD objects
31 * are essentially the parsed rules in compact, usable form.  The
32 * TRD objects themselves are held for the life of the process in
33 * a static cache owned by Transliterator.
34 *
35 * This class' API is a little asymmetric.  There is a method to
36 * define a variable, but no way to define a set.  This is because the
37 * sets are defined by the parser in a UVector, and the vector is
38 * copied into a fixed-size array here.  Once this is done, no new
39 * sets may be defined.  In practice, there is no need to do so, since
40 * generating the data and using it are discrete phases.  When there
41 * is a need to access the set data during the parse phase, another
42 * data structure handles this.  See the parsing code for more
43 * details.
44 */
45class TransliterationRuleData : public UMemory {
46
47public:
48
49    // PUBLIC DATA MEMBERS
50
51    /**
52     * Rule table.  May be empty.
53     */
54    TransliterationRuleSet ruleSet;
55
56    /**
57     * Map variable name (String) to variable (UnicodeString).  A variable name
58     * corresponds to zero or more characters, stored in a UnicodeString in
59     * this hash.  One or more of these chars may also correspond to a
60     * UnicodeMatcher, in which case the character in the UnicodeString in this hash is
61     * a stand-in: it is an index for a secondary lookup in
62     * data.variables.  The stand-in also represents the UnicodeMatcher in
63     * the stored rules.
64     */
65    Hashtable variableNames;
66
67    /**
68     * Map category variable (UChar) to set (UnicodeFunctor).
69     * Variables that correspond to a set of characters are mapped
70     * from variable name to a stand-in character in data.variableNames.
71     * The stand-in then serves as a key in this hash to lookup the
72     * actual UnicodeFunctor object.  In addition, the stand-in is
73     * stored in the rule text to represent the set of characters.
74     * variables[i] represents character (variablesBase + i).
75     */
76    UnicodeFunctor** variables;
77
78    /**
79     * Flag that indicates whether the variables are owned (if a single
80     * call to Transliterator::createFromRules() produces a CompoundTransliterator
81     * with more than one RuleBasedTransliterator as children, they all share
82     * the same variables list, so only the first one is considered to own
83     * the variables)
84     */
85    UBool variablesAreOwned;
86
87    /**
88     * The character that represents variables[0].  Characters
89     * variablesBase through variablesBase +
90     * variablesLength - 1 represent UnicodeFunctor objects.
91     */
92    UChar variablesBase;
93
94    /**
95     * The length of variables.
96     */
97    int32_t variablesLength;
98
99public:
100
101    /**
102     * Constructor
103     * @param status Output param set to success/failure code on exit.
104     */
105    TransliterationRuleData(UErrorCode& status);
106
107    /**
108     * Copy Constructor
109     */
110    TransliterationRuleData(const TransliterationRuleData&);
111
112    /**
113     * destructor
114     */
115    ~TransliterationRuleData();
116
117    /**
118     * Given a stand-in character, return the UnicodeFunctor that it
119     * represents, or NULL if it doesn't represent anything.
120     * @param standIn    the given stand-in character.
121     * @return           the UnicodeFunctor that 'standIn' represents
122     */
123    UnicodeFunctor* lookup(UChar32 standIn) const;
124
125    /**
126     * Given a stand-in character, return the UnicodeMatcher that it
127     * represents, or NULL if it doesn't represent anything or if it
128     * represents something that is not a matcher.
129     * @param standIn    the given stand-in character.
130     * @return           return the UnicodeMatcher that 'standIn' represents
131     */
132    UnicodeMatcher* lookupMatcher(UChar32 standIn) const;
133
134    /**
135     * Given a stand-in character, return the UnicodeReplacer that it
136     * represents, or NULL if it doesn't represent anything or if it
137     * represents something that is not a replacer.
138     * @param standIn    the given stand-in character.
139     * @return           return the UnicodeReplacer that 'standIn' represents
140     */
141    UnicodeReplacer* lookupReplacer(UChar32 standIn) const;
142
143
144private:
145    TransliterationRuleData &operator=(const TransliterationRuleData &other); // forbid copying of this class
146};
147
148U_NAMESPACE_END
149
150#endif /* #if !UCONFIG_NO_TRANSLITERATION */
151
152#endif
153