1/*
2**********************************************************************
3*   Copyright (C) 1999-2015, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*   Date        Name        Description
7*   11/17/99    aliu        Creation.
8**********************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_TRANSLITERATION
14
15#include "unicode/rep.h"
16#include "unicode/uniset.h"
17#include "rbt_pars.h"
18#include "rbt_data.h"
19#include "rbt_rule.h"
20#include "rbt.h"
21#include "mutex.h"
22#include "umutex.h"
23
24U_NAMESPACE_BEGIN
25
26UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator)
27
28static UMutex transliteratorDataMutex = U_MUTEX_INITIALIZER;
29static Replaceable *gLockedText = NULL;
30
31void RuleBasedTransliterator::_construct(const UnicodeString& rules,
32                                         UTransDirection direction,
33                                         UParseError& parseError,
34                                         UErrorCode& status) {
35    fData = 0;
36    isDataOwned = TRUE;
37    if (U_FAILURE(status)) {
38        return;
39    }
40
41    TransliteratorParser parser(status);
42    parser.parse(rules, direction, parseError, status);
43    if (U_FAILURE(status)) {
44        return;
45    }
46
47    if (parser.idBlockVector.size() != 0 ||
48        parser.compoundFilter != NULL ||
49        parser.dataVector.size() == 0) {
50        status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT
51        return;
52    }
53
54    fData = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
55    setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
56}
57
58/**
59 * Constructs a new transliterator from the given rules.
60 * @param id            the id for the transliterator.
61 * @param rules         rules, separated by ';'
62 * @param direction     either FORWARD or REVERSE.
63 * @param adoptedFilter the filter for this transliterator.
64 * @param parseError    Struct to recieve information on position
65 *                      of error if an error is encountered
66 * @param status        Output param set to success/failure code.
67 * @exception IllegalArgumentException if rules are malformed
68 * or direction is invalid.
69 */
70RuleBasedTransliterator::RuleBasedTransliterator(
71                            const UnicodeString& id,
72                            const UnicodeString& rules,
73                            UTransDirection direction,
74                            UnicodeFilter* adoptedFilter,
75                            UParseError& parseError,
76                            UErrorCode& status) :
77    Transliterator(id, adoptedFilter) {
78    _construct(rules, direction,parseError,status);
79}
80
81/**
82 * Constructs a new transliterator from the given rules.
83 * @param id            the id for the transliterator.
84 * @param rules         rules, separated by ';'
85 * @param direction     either FORWARD or REVERSE.
86 * @param adoptedFilter the filter for this transliterator.
87 * @param status        Output param set to success/failure code.
88 * @exception IllegalArgumentException if rules are malformed
89 * or direction is invalid.
90 */
91/*RuleBasedTransliterator::RuleBasedTransliterator(
92                            const UnicodeString& id,
93                            const UnicodeString& rules,
94                            UTransDirection direction,
95                            UnicodeFilter* adoptedFilter,
96                            UErrorCode& status) :
97    Transliterator(id, adoptedFilter) {
98    UParseError parseError;
99    _construct(rules, direction,parseError, status);
100}*/
101
102/**
103 * Covenience constructor with no filter.
104 */
105/*RuleBasedTransliterator::RuleBasedTransliterator(
106                            const UnicodeString& id,
107                            const UnicodeString& rules,
108                            UTransDirection direction,
109                            UErrorCode& status) :
110    Transliterator(id, 0) {
111    UParseError parseError;
112    _construct(rules, direction,parseError, status);
113}*/
114
115/**
116 * Covenience constructor with no filter and FORWARD direction.
117 */
118/*RuleBasedTransliterator::RuleBasedTransliterator(
119                            const UnicodeString& id,
120                            const UnicodeString& rules,
121                            UErrorCode& status) :
122    Transliterator(id, 0) {
123    UParseError parseError;
124    _construct(rules, UTRANS_FORWARD, parseError, status);
125}*/
126
127/**
128 * Covenience constructor with FORWARD direction.
129 */
130/*RuleBasedTransliterator::RuleBasedTransliterator(
131                            const UnicodeString& id,
132                            const UnicodeString& rules,
133                            UnicodeFilter* adoptedFilter,
134                            UErrorCode& status) :
135    Transliterator(id, adoptedFilter) {
136    UParseError parseError;
137    _construct(rules, UTRANS_FORWARD,parseError, status);
138}*/
139
140RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
141                                 const TransliterationRuleData* theData,
142                                 UnicodeFilter* adoptedFilter) :
143    Transliterator(id, adoptedFilter),
144    fData((TransliterationRuleData*)theData), // cast away const
145    isDataOwned(FALSE) {
146    setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
147}
148
149/**
150 * Internal constructor.
151 */
152RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
153                                                 TransliterationRuleData* theData,
154                                                 UBool isDataAdopted) :
155    Transliterator(id, 0),
156    fData(theData),
157    isDataOwned(isDataAdopted) {
158    setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
159}
160
161/**
162 * Copy constructor.
163 */
164RuleBasedTransliterator::RuleBasedTransliterator(
165        const RuleBasedTransliterator& other) :
166    Transliterator(other), fData(other.fData),
167    isDataOwned(other.isDataOwned) {
168
169    // The data object may or may not be owned.  If it is not owned we
170    // share it; it is invariant.  If it is owned, it's still
171    // invariant, but we need to copy it to prevent double-deletion.
172    // If this becomes a performance issue (if people do a lot of RBT
173    // copying -- unlikely) we can reference count the data object.
174
175    // Only do a deep copy if this is owned data, that is, data that
176    // will be later deleted.  System transliterators contain
177    // non-owned data.
178    if (isDataOwned) {
179        fData = new TransliterationRuleData(*other.fData);
180    }
181}
182
183/**
184 * Destructor.
185 */
186RuleBasedTransliterator::~RuleBasedTransliterator() {
187    // Delete the data object only if we own it.
188    if (isDataOwned) {
189        delete fData;
190    }
191}
192
193Transliterator* // Covariant return NOT ALLOWED (for portability)
194RuleBasedTransliterator::clone(void) const {
195    return new RuleBasedTransliterator(*this);
196}
197
198/**
199 * Implements {@link Transliterator#handleTransliterate}.
200 */
201void
202RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index,
203                                             UBool isIncremental) const {
204    /* We keep contextStart and contextLimit fixed the entire time,
205     * relative to the text -- contextLimit may move numerically if
206     * text is inserted or removed.  The start offset moves toward
207     * limit, with replacements happening under it.
208     *
209     * Example: rules 1. ab>x|y
210     *                2. yc>z
211     *
212     * |eabcd   begin - no match, advance start
213     * e|abcd   match rule 1 - change text & adjust start
214     * ex|ycd   match rule 2 - change text & adjust start
215     * exz|d    no match, advance start
216     * exzd|    done
217     */
218
219    /* A rule like
220     *   a>b|a
221     * creates an infinite loop. To prevent that, we put an arbitrary
222     * limit on the number of iterations that we take, one that is
223     * high enough that any reasonable rules are ok, but low enough to
224     * prevent a server from hanging.  The limit is 16 times the
225     * number of characters n, unless n is so large that 16n exceeds a
226     * uint32_t.
227     */
228    uint32_t loopCount = 0;
229    uint32_t loopLimit = index.limit - index.start;
230    if (loopLimit >= 0x10000000) {
231        loopLimit = 0xFFFFFFFF;
232    } else {
233        loopLimit <<= 4;
234    }
235
236    // Transliterator locking.  Rule-based Transliterators are not thread safe; concurrent
237    //   operations must be prevented.
238    // A Complication: compound transliterators can result in recursive entries to this
239    //   function, sometimes with different "This" objects, always with the same text.
240    //   Double-locking must be prevented in these cases.
241    //
242
243    // If the transliteration data is exclusively owned by this transliterator object,
244    //   we don't need to do any locking.  No sharing between transliterators is possible,
245    //   so no concurrent access from multiple threads is possible.
246    UBool    lockedMutexAtThisLevel = FALSE;
247    if (isDataOwned == FALSE) {
248        // Test whether this request is operating on the same text string as
249        //   some other transliteration that is still in progress and holding the
250        //   transliteration mutex.  If so, do not lock the transliteration
251        //    mutex again.
252        //
253        //  gLockedText variable is protected by the global ICU mutex.
254        //  Shared RBT data protected by transliteratorDataMutex.
255        //
256        // TODO(andy): Need a better scheme for handling this.
257        UBool needToLock;
258        {
259            Mutex m;
260            needToLock = (&text != gLockedText);
261        }
262        if (needToLock) {
263            umtx_lock(&transliteratorDataMutex);  // Contention, longish waits possible here.
264            Mutex m;
265            gLockedText = &text;
266            lockedMutexAtThisLevel = TRUE;
267        }
268    }
269
270    // Check to make sure we don't dereference a null pointer.
271    if (fData != NULL) {
272	    while (index.start < index.limit &&
273	           loopCount <= loopLimit &&
274	           fData->ruleSet.transliterate(text, index, isIncremental)) {
275	        ++loopCount;
276	    }
277    }
278    if (lockedMutexAtThisLevel) {
279        {
280            Mutex m;
281            gLockedText = NULL;
282        }
283        umtx_unlock(&transliteratorDataMutex);
284    }
285}
286
287UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource,
288                                                UBool escapeUnprintable) const {
289    return fData->ruleSet.toRules(rulesSource, escapeUnprintable);
290}
291
292/**
293 * Implement Transliterator framework
294 */
295void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const {
296    fData->ruleSet.getSourceTargetSet(result, FALSE);
297}
298
299/**
300 * Override Transliterator framework
301 */
302UnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const {
303    return fData->ruleSet.getSourceTargetSet(result, TRUE);
304}
305
306U_NAMESPACE_END
307
308#endif /* #if !UCONFIG_NO_TRANSLITERATION */
309