1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (C) 1999-2007, International Business Machines Corporation
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and others. All Rights Reserved.
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Date        Name        Description
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   11/17/99    aliu        Creation.
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef RBT_DATA_H
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define RBT_DATA_H
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uclean.h"
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uobject.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbt_set.h"
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "hash.h"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeFunctor;
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeMatcher;
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass UnicodeReplacer;
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The rule data for a RuleBasedTransliterators.  RBT objects hold
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a const pointer to a TRD object that they do not own.  TRD objects
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * are essentially the parsed rules in compact, usable form.  The
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TRD objects themselves are held for the life of the process in
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a static cache owned by Transliterator.
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This class' API is a little asymmetric.  There is a method to
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * define a variable, but no way to define a set.  This is because the
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sets are defined by the parser in a UVector, and the vector is
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * copied into a fixed-size array here.  Once this is done, no new
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sets may be defined.  In practice, there is no need to do so, since
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * generating the data and using it are discrete phases.  When there
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is a need to access the set data during the parse phase, another
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * data structure handles this.  See the parsing code for more
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * details.
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass TransliterationRuleData : public UMemory {
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // PUBLIC DATA MEMBERS
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Rule table.  May be empty.
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TransliterationRuleSet ruleSet;
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Map variable name (String) to variable (UnicodeString).  A variable name
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * corresponds to zero or more characters, stored in a UnicodeString in
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * this hash.  One or more of these chars may also correspond to a
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * UnicodeMatcher, in which case the character in the UnicodeString in this hash is
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * a stand-in: it is an index for a secondary lookup in
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * data.variables.  The stand-in also represents the UnicodeMatcher in
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the stored rules.
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Hashtable variableNames;
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Map category variable (UChar) to set (UnicodeFunctor).
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Variables that correspond to a set of characters are mapped
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * from variable name to a stand-in character in data.variableNames.
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The stand-in then serves as a key in this hash to lookup the
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * actual UnicodeFunctor object.  In addition, the stand-in is
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * stored in the rule text to represent the set of characters.
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * variables[i] represents character (variablesBase + i).
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeFunctor** variables;
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Flag that indicates whether the variables are owned (if a single
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * call to Transliterator::createFromRules() produces a CompoundTransliterator
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * with more than one RuleBasedTransliterator as children, they all share
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the same variables list, so only the first one is considered to own
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the variables)
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool variablesAreOwned;
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The character that represents variables[0].  Characters
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * variablesBase through variablesBase +
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * variablesLength - 1 represent UnicodeFunctor objects.
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar variablesBase;
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The length of variables.
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t variablesLength;
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Constructor
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param status Output param set to success/failure code on exit.
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TransliterationRuleData(UErrorCode& status);
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Copy Constructor
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TransliterationRuleData(const TransliterationRuleData&);
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * destructor
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ~TransliterationRuleData();
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Given a stand-in character, return the UnicodeFunctor that it
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * represents, or NULL if it doesn't represent anything.
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param standIn    the given stand-in character.
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return           the UnicodeFunctor that 'standIn' represents
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeFunctor* lookup(UChar32 standIn) const;
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Given a stand-in character, return the UnicodeMatcher that it
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * represents, or NULL if it doesn't represent anything or if it
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * represents something that is not a matcher.
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param standIn    the given stand-in character.
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return           return the UnicodeMatcher that 'standIn' represents
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeMatcher* lookupMatcher(UChar32 standIn) const;
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Given a stand-in character, return the UnicodeReplacer that it
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * represents, or NULL if it doesn't represent anything or if it
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * represents something that is not a replacer.
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param standIn    the given stand-in character.
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return           return the UnicodeReplacer that 'standIn' represents
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeReplacer* lookupReplacer(UChar32 standIn) const;
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate:
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TransliterationRuleData &operator=(const TransliterationRuleData &other); // forbid copying of this class
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
153