1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5*   Copyright (C) 1999-2015, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7**********************************************************************
8*   Date        Name        Description
9*   11/17/99    aliu        Creation.
10**********************************************************************
11*/
12
13#include "unicode/utypes.h"
14
15#if !UCONFIG_NO_TRANSLITERATION
16
17#include "unicode/rep.h"
18#include "unicode/uniset.h"
19#include "rbt_pars.h"
20#include "rbt_data.h"
21#include "rbt_rule.h"
22#include "rbt.h"
23#include "mutex.h"
24#include "umutex.h"
25
26U_NAMESPACE_BEGIN
27
28UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator)
29
30static UMutex transliteratorDataMutex = U_MUTEX_INITIALIZER;
31static Replaceable *gLockedText = NULL;
32
33void RuleBasedTransliterator::_construct(const UnicodeString& rules,
34                                         UTransDirection direction,
35                                         UParseError& parseError,
36                                         UErrorCode& status) {
37    fData = 0;
38    isDataOwned = TRUE;
39    if (U_FAILURE(status)) {
40        return;
41    }
42
43    TransliteratorParser parser(status);
44    parser.parse(rules, direction, parseError, status);
45    if (U_FAILURE(status)) {
46        return;
47    }
48
49    if (parser.idBlockVector.size() != 0 ||
50        parser.compoundFilter != NULL ||
51        parser.dataVector.size() == 0) {
52        status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT
53        return;
54    }
55
56    fData = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
57    setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
58}
59
60/**
61 * Constructs a new transliterator from the given rules.
62 * @param id            the id for the transliterator.
63 * @param rules         rules, separated by ';'
64 * @param direction     either FORWARD or REVERSE.
65 * @param adoptedFilter the filter for this transliterator.
66 * @param parseError    Struct to recieve information on position
67 *                      of error if an error is encountered
68 * @param status        Output param set to success/failure code.
69 * @exception IllegalArgumentException if rules are malformed
70 * or direction is invalid.
71 */
72RuleBasedTransliterator::RuleBasedTransliterator(
73                            const UnicodeString& id,
74                            const UnicodeString& rules,
75                            UTransDirection direction,
76                            UnicodeFilter* adoptedFilter,
77                            UParseError& parseError,
78                            UErrorCode& status) :
79    Transliterator(id, adoptedFilter) {
80    _construct(rules, direction,parseError,status);
81}
82
83/**
84 * Constructs a new transliterator from the given rules.
85 * @param id            the id for the transliterator.
86 * @param rules         rules, separated by ';'
87 * @param direction     either FORWARD or REVERSE.
88 * @param adoptedFilter the filter for this transliterator.
89 * @param status        Output param set to success/failure code.
90 * @exception IllegalArgumentException if rules are malformed
91 * or direction is invalid.
92 */
93/*RuleBasedTransliterator::RuleBasedTransliterator(
94                            const UnicodeString& id,
95                            const UnicodeString& rules,
96                            UTransDirection direction,
97                            UnicodeFilter* adoptedFilter,
98                            UErrorCode& status) :
99    Transliterator(id, adoptedFilter) {
100    UParseError parseError;
101    _construct(rules, direction,parseError, status);
102}*/
103
104/**
105 * Covenience constructor with no filter.
106 */
107/*RuleBasedTransliterator::RuleBasedTransliterator(
108                            const UnicodeString& id,
109                            const UnicodeString& rules,
110                            UTransDirection direction,
111                            UErrorCode& status) :
112    Transliterator(id, 0) {
113    UParseError parseError;
114    _construct(rules, direction,parseError, status);
115}*/
116
117/**
118 * Covenience constructor with no filter and FORWARD direction.
119 */
120/*RuleBasedTransliterator::RuleBasedTransliterator(
121                            const UnicodeString& id,
122                            const UnicodeString& rules,
123                            UErrorCode& status) :
124    Transliterator(id, 0) {
125    UParseError parseError;
126    _construct(rules, UTRANS_FORWARD, parseError, status);
127}*/
128
129/**
130 * Covenience constructor with FORWARD direction.
131 */
132/*RuleBasedTransliterator::RuleBasedTransliterator(
133                            const UnicodeString& id,
134                            const UnicodeString& rules,
135                            UnicodeFilter* adoptedFilter,
136                            UErrorCode& status) :
137    Transliterator(id, adoptedFilter) {
138    UParseError parseError;
139    _construct(rules, UTRANS_FORWARD,parseError, status);
140}*/
141
142RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
143                                 const TransliterationRuleData* theData,
144                                 UnicodeFilter* adoptedFilter) :
145    Transliterator(id, adoptedFilter),
146    fData((TransliterationRuleData*)theData), // cast away const
147    isDataOwned(FALSE) {
148    setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
149}
150
151/**
152 * Internal constructor.
153 */
154RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
155                                                 TransliterationRuleData* theData,
156                                                 UBool isDataAdopted) :
157    Transliterator(id, 0),
158    fData(theData),
159    isDataOwned(isDataAdopted) {
160    setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
161}
162
163/**
164 * Copy constructor.
165 */
166RuleBasedTransliterator::RuleBasedTransliterator(
167        const RuleBasedTransliterator& other) :
168    Transliterator(other), fData(other.fData),
169    isDataOwned(other.isDataOwned) {
170
171    // The data object may or may not be owned.  If it is not owned we
172    // share it; it is invariant.  If it is owned, it's still
173    // invariant, but we need to copy it to prevent double-deletion.
174    // If this becomes a performance issue (if people do a lot of RBT
175    // copying -- unlikely) we can reference count the data object.
176
177    // Only do a deep copy if this is owned data, that is, data that
178    // will be later deleted.  System transliterators contain
179    // non-owned data.
180    if (isDataOwned) {
181        fData = new TransliterationRuleData(*other.fData);
182    }
183}
184
185/**
186 * Destructor.
187 */
188RuleBasedTransliterator::~RuleBasedTransliterator() {
189    // Delete the data object only if we own it.
190    if (isDataOwned) {
191        delete fData;
192    }
193}
194
195Transliterator* // Covariant return NOT ALLOWED (for portability)
196RuleBasedTransliterator::clone(void) const {
197    return new RuleBasedTransliterator(*this);
198}
199
200/**
201 * Implements {@link Transliterator#handleTransliterate}.
202 */
203void
204RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index,
205                                             UBool isIncremental) const {
206    /* We keep contextStart and contextLimit fixed the entire time,
207     * relative to the text -- contextLimit may move numerically if
208     * text is inserted or removed.  The start offset moves toward
209     * limit, with replacements happening under it.
210     *
211     * Example: rules 1. ab>x|y
212     *                2. yc>z
213     *
214     * |eabcd   begin - no match, advance start
215     * e|abcd   match rule 1 - change text & adjust start
216     * ex|ycd   match rule 2 - change text & adjust start
217     * exz|d    no match, advance start
218     * exzd|    done
219     */
220
221    /* A rule like
222     *   a>b|a
223     * creates an infinite loop. To prevent that, we put an arbitrary
224     * limit on the number of iterations that we take, one that is
225     * high enough that any reasonable rules are ok, but low enough to
226     * prevent a server from hanging.  The limit is 16 times the
227     * number of characters n, unless n is so large that 16n exceeds a
228     * uint32_t.
229     */
230    uint32_t loopCount = 0;
231    uint32_t loopLimit = index.limit - index.start;
232    if (loopLimit >= 0x10000000) {
233        loopLimit = 0xFFFFFFFF;
234    } else {
235        loopLimit <<= 4;
236    }
237
238    // Transliterator locking.  Rule-based Transliterators are not thread safe; concurrent
239    //   operations must be prevented.
240    // A Complication: compound transliterators can result in recursive entries to this
241    //   function, sometimes with different "This" objects, always with the same text.
242    //   Double-locking must be prevented in these cases.
243    //
244
245    UBool    lockedMutexAtThisLevel = FALSE;
246
247    // Test whether this request is operating on the same text string as
248    //   some other transliteration that is still in progress and holding the
249    //   transliteration mutex.  If so, do not lock the transliteration
250    //    mutex again.
251    //
252    //  gLockedText variable is protected by the global ICU mutex.
253    //  Shared RBT data protected by transliteratorDataMutex.
254    //
255    // TODO(andy): Need a better scheme for handling this.
256    UBool needToLock;
257    {
258        Mutex m;
259        needToLock = (&text != gLockedText);
260    }
261    if (needToLock) {
262        umtx_lock(&transliteratorDataMutex);  // Contention, longish waits possible here.
263        Mutex m;
264        gLockedText = &text;
265        lockedMutexAtThisLevel = TRUE;
266    }
267
268    // Check to make sure we don't dereference a null pointer.
269    if (fData != NULL) {
270	    while (index.start < index.limit &&
271	           loopCount <= loopLimit &&
272	           fData->ruleSet.transliterate(text, index, isIncremental)) {
273	        ++loopCount;
274	    }
275    }
276    if (lockedMutexAtThisLevel) {
277        {
278            Mutex m;
279            gLockedText = NULL;
280        }
281        umtx_unlock(&transliteratorDataMutex);
282    }
283}
284
285UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource,
286                                                UBool escapeUnprintable) const {
287    return fData->ruleSet.toRules(rulesSource, escapeUnprintable);
288}
289
290/**
291 * Implement Transliterator framework
292 */
293void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const {
294    fData->ruleSet.getSourceTargetSet(result, FALSE);
295}
296
297/**
298 * Override Transliterator framework
299 */
300UnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const {
301    return fData->ruleSet.getSourceTargetSet(result, TRUE);
302}
303
304U_NAMESPACE_END
305
306#endif /* #if !UCONFIG_NO_TRANSLITERATION */
307