1/*
2**********************************************************************
3*   Copyright (C) 1999-2013, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*   Date        Name        Description
7*   11/17/99    aliu        Creation.
8**********************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_TRANSLITERATION
14
15#include "unicode/rep.h"
16#include "unicode/uniset.h"
17#include "rbt_pars.h"
18#include "rbt_data.h"
19#include "rbt_rule.h"
20#include "rbt.h"
21#include "umutex.h"
22
23U_NAMESPACE_BEGIN
24
25UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator)
26
27static UMutex transliteratorDataMutex = U_MUTEX_INITIALIZER;
28static Replaceable *gLockedText = NULL;
29
30void RuleBasedTransliterator::_construct(const UnicodeString& rules,
31                                         UTransDirection direction,
32                                         UParseError& parseError,
33                                         UErrorCode& status) {
34    fData = 0;
35    isDataOwned = TRUE;
36    if (U_FAILURE(status)) {
37        return;
38    }
39
40    TransliteratorParser parser(status);
41    parser.parse(rules, direction, parseError, status);
42    if (U_FAILURE(status)) {
43        return;
44    }
45
46    if (parser.idBlockVector.size() != 0 ||
47        parser.compoundFilter != NULL ||
48        parser.dataVector.size() == 0) {
49        status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT
50        return;
51    }
52
53    fData = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
54    setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
55}
56
57/**
58 * Constructs a new transliterator from the given rules.
59 * @param id            the id for the transliterator.
60 * @param rules         rules, separated by ';'
61 * @param direction     either FORWARD or REVERSE.
62 * @param adoptedFilter the filter for this transliterator.
63 * @param parseError    Struct to recieve information on position
64 *                      of error if an error is encountered
65 * @param status        Output param set to success/failure code.
66 * @exception IllegalArgumentException if rules are malformed
67 * or direction is invalid.
68 */
69RuleBasedTransliterator::RuleBasedTransliterator(
70                            const UnicodeString& id,
71                            const UnicodeString& rules,
72                            UTransDirection direction,
73                            UnicodeFilter* adoptedFilter,
74                            UParseError& parseError,
75                            UErrorCode& status) :
76    Transliterator(id, adoptedFilter) {
77    _construct(rules, direction,parseError,status);
78}
79
80/**
81 * Constructs a new transliterator from the given rules.
82 * @param id            the id for the transliterator.
83 * @param rules         rules, separated by ';'
84 * @param direction     either FORWARD or REVERSE.
85 * @param adoptedFilter the filter for this transliterator.
86 * @param status        Output param set to success/failure code.
87 * @exception IllegalArgumentException if rules are malformed
88 * or direction is invalid.
89 */
90/*RuleBasedTransliterator::RuleBasedTransliterator(
91                            const UnicodeString& id,
92                            const UnicodeString& rules,
93                            UTransDirection direction,
94                            UnicodeFilter* adoptedFilter,
95                            UErrorCode& status) :
96    Transliterator(id, adoptedFilter) {
97    UParseError parseError;
98    _construct(rules, direction,parseError, status);
99}*/
100
101/**
102 * Covenience constructor with no filter.
103 */
104/*RuleBasedTransliterator::RuleBasedTransliterator(
105                            const UnicodeString& id,
106                            const UnicodeString& rules,
107                            UTransDirection direction,
108                            UErrorCode& status) :
109    Transliterator(id, 0) {
110    UParseError parseError;
111    _construct(rules, direction,parseError, status);
112}*/
113
114/**
115 * Covenience constructor with no filter and FORWARD direction.
116 */
117/*RuleBasedTransliterator::RuleBasedTransliterator(
118                            const UnicodeString& id,
119                            const UnicodeString& rules,
120                            UErrorCode& status) :
121    Transliterator(id, 0) {
122    UParseError parseError;
123    _construct(rules, UTRANS_FORWARD, parseError, status);
124}*/
125
126/**
127 * Covenience constructor with FORWARD direction.
128 */
129/*RuleBasedTransliterator::RuleBasedTransliterator(
130                            const UnicodeString& id,
131                            const UnicodeString& rules,
132                            UnicodeFilter* adoptedFilter,
133                            UErrorCode& status) :
134    Transliterator(id, adoptedFilter) {
135    UParseError parseError;
136    _construct(rules, UTRANS_FORWARD,parseError, status);
137}*/
138
139RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
140                                 const TransliterationRuleData* theData,
141                                 UnicodeFilter* adoptedFilter) :
142    Transliterator(id, adoptedFilter),
143    fData((TransliterationRuleData*)theData), // cast away const
144    isDataOwned(FALSE) {
145    setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
146}
147
148/**
149 * Internal constructor.
150 */
151RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
152                                                 TransliterationRuleData* theData,
153                                                 UBool isDataAdopted) :
154    Transliterator(id, 0),
155    fData(theData),
156    isDataOwned(isDataAdopted) {
157    setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
158}
159
160/**
161 * Copy constructor.
162 */
163RuleBasedTransliterator::RuleBasedTransliterator(
164        const RuleBasedTransliterator& other) :
165    Transliterator(other), fData(other.fData),
166    isDataOwned(other.isDataOwned) {
167
168    // The data object may or may not be owned.  If it is not owned we
169    // share it; it is invariant.  If it is owned, it's still
170    // invariant, but we need to copy it to prevent double-deletion.
171    // If this becomes a performance issue (if people do a lot of RBT
172    // copying -- unlikely) we can reference count the data object.
173
174    // Only do a deep copy if this is owned data, that is, data that
175    // will be later deleted.  System transliterators contain
176    // non-owned data.
177    if (isDataOwned) {
178        fData = new TransliterationRuleData(*other.fData);
179    }
180}
181
182/**
183 * Destructor.
184 */
185RuleBasedTransliterator::~RuleBasedTransliterator() {
186    // Delete the data object only if we own it.
187    if (isDataOwned) {
188        delete fData;
189    }
190}
191
192Transliterator* // Covariant return NOT ALLOWED (for portability)
193RuleBasedTransliterator::clone(void) const {
194    return new RuleBasedTransliterator(*this);
195}
196
197/**
198 * Implements {@link Transliterator#handleTransliterate}.
199 */
200void
201RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index,
202                                             UBool isIncremental) const {
203    /* We keep contextStart and contextLimit fixed the entire time,
204     * relative to the text -- contextLimit may move numerically if
205     * text is inserted or removed.  The start offset moves toward
206     * limit, with replacements happening under it.
207     *
208     * Example: rules 1. ab>x|y
209     *                2. yc>z
210     *
211     * |eabcd   begin - no match, advance start
212     * e|abcd   match rule 1 - change text & adjust start
213     * ex|ycd   match rule 2 - change text & adjust start
214     * exz|d    no match, advance start
215     * exzd|    done
216     */
217
218    /* A rule like
219     *   a>b|a
220     * creates an infinite loop. To prevent that, we put an arbitrary
221     * limit on the number of iterations that we take, one that is
222     * high enough that any reasonable rules are ok, but low enough to
223     * prevent a server from hanging.  The limit is 16 times the
224     * number of characters n, unless n is so large that 16n exceeds a
225     * uint32_t.
226     */
227    uint32_t loopCount = 0;
228    uint32_t loopLimit = index.limit - index.start;
229    if (loopLimit >= 0x10000000) {
230        loopLimit = 0xFFFFFFFF;
231    } else {
232        loopLimit <<= 4;
233    }
234
235    // Transliterator locking.  Rule-based Transliterators are not thread safe; concurrent
236    //   operations must be prevented.
237    // A Complication: compound transliterators can result in recursive entries to this
238    //   function, sometimes with different "This" objects, always with the same text.
239    //   Double-locking must be prevented in these cases.
240    //
241
242    // If the transliteration data is exclusively owned by this transliterator object,
243    //   we don't need to do any locking.  No sharing between transliterators is possible,
244    //   so no concurrent access from multiple threads is possible.
245    UBool    lockedMutexAtThisLevel = FALSE;
246    if (isDataOwned == FALSE) {
247        // Test whether this request is operating on the same text string as some
248        //   some other transliteration that is still in progress and holding the
249        //   transliteration mutex.  If so, do not lock the transliteration
250        //    mutex again.
251        // TODO(andy): Need a better scheme for handling this.
252        UBool needToLock;
253        umtx_lock(NULL);
254        needToLock = (&text != gLockedText);
255        umtx_unlock(NULL);
256        if (needToLock) {
257            umtx_lock(&transliteratorDataMutex);
258            gLockedText = &text;
259            lockedMutexAtThisLevel = TRUE;
260        }
261    }
262
263    // Check to make sure we don't dereference a null pointer.
264    if (fData != NULL) {
265	    while (index.start < index.limit &&
266	           loopCount <= loopLimit &&
267	           fData->ruleSet.transliterate(text, index, isIncremental)) {
268	        ++loopCount;
269	    }
270    }
271    if (lockedMutexAtThisLevel) {
272        gLockedText = NULL;
273        umtx_unlock(&transliteratorDataMutex);
274    }
275}
276
277UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource,
278                                                UBool escapeUnprintable) const {
279    return fData->ruleSet.toRules(rulesSource, escapeUnprintable);
280}
281
282/**
283 * Implement Transliterator framework
284 */
285void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const {
286    fData->ruleSet.getSourceTargetSet(result, FALSE);
287}
288
289/**
290 * Override Transliterator framework
291 */
292UnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const {
293    return fData->ruleSet.getSourceTargetSet(result, TRUE);
294}
295
296U_NAMESPACE_END
297
298#endif /* #if !UCONFIG_NO_TRANSLITERATION */
299