164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
5c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert*   Copyright (C) 1999-2015, International Business Machines
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Date        Name        Description
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   11/17/99    aliu        Creation.
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/rep.h"
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uniset.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbt_pars.h"
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbt_data.h"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbt_rule.h"
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbt.h"
23c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert#include "mutex.h"
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "umutex.h"
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator)
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
3054dcd9b6a06071f647dac967e9e267abb9410720Craig Corneliusstatic UMutex transliteratorDataMutex = U_MUTEX_INITIALIZER;
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic Replaceable *gLockedText = NULL;
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RuleBasedTransliterator::_construct(const UnicodeString& rules,
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         UTransDirection direction,
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         UParseError& parseError,
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         UErrorCode& status) {
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fData = 0;
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isDataOwned = TRUE;
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(status)) {
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    TransliteratorParser parser(status);
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    parser.parse(rules, direction, parseError, status);
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(status)) {
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (parser.idBlockVector.size() != 0 ||
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        parser.compoundFilter != NULL ||
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        parser.dataVector.size() == 0) {
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fData = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructs a new transliterator from the given rules.
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param id            the id for the transliterator.
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param rules         rules, separated by ';'
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param direction     either FORWARD or REVERSE.
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param adoptedFilter the filter for this transliterator.
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param parseError    Struct to recieve information on position
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *                      of error if an error is encountered
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status        Output param set to success/failure code.
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @exception IllegalArgumentException if rules are malformed
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or direction is invalid.
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRuleBasedTransliterator::RuleBasedTransliterator(
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            const UnicodeString& id,
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            const UnicodeString& rules,
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UTransDirection direction,
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UnicodeFilter* adoptedFilter,
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UParseError& parseError,
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UErrorCode& status) :
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator(id, adoptedFilter) {
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _construct(rules, direction,parseError,status);
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructs a new transliterator from the given rules.
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param id            the id for the transliterator.
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param rules         rules, separated by ';'
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param direction     either FORWARD or REVERSE.
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param adoptedFilter the filter for this transliterator.
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status        Output param set to success/failure code.
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @exception IllegalArgumentException if rules are malformed
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or direction is invalid.
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*RuleBasedTransliterator::RuleBasedTransliterator(
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            const UnicodeString& id,
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            const UnicodeString& rules,
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UTransDirection direction,
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UnicodeFilter* adoptedFilter,
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UErrorCode& status) :
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator(id, adoptedFilter) {
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UParseError parseError;
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _construct(rules, direction,parseError, status);
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Covenience constructor with no filter.
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*RuleBasedTransliterator::RuleBasedTransliterator(
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            const UnicodeString& id,
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            const UnicodeString& rules,
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UTransDirection direction,
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UErrorCode& status) :
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator(id, 0) {
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UParseError parseError;
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _construct(rules, direction,parseError, status);
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Covenience constructor with no filter and FORWARD direction.
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*RuleBasedTransliterator::RuleBasedTransliterator(
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            const UnicodeString& id,
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            const UnicodeString& rules,
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UErrorCode& status) :
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator(id, 0) {
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UParseError parseError;
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _construct(rules, UTRANS_FORWARD, parseError, status);
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Covenience constructor with FORWARD direction.
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*RuleBasedTransliterator::RuleBasedTransliterator(
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            const UnicodeString& id,
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            const UnicodeString& rules,
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UnicodeFilter* adoptedFilter,
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UErrorCode& status) :
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator(id, adoptedFilter) {
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UParseError parseError;
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    _construct(rules, UTRANS_FORWARD,parseError, status);
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                 const TransliterationRuleData* theData,
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                 UnicodeFilter* adoptedFilter) :
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator(id, adoptedFilter),
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fData((TransliterationRuleData*)theData), // cast away const
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isDataOwned(FALSE) {
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Internal constructor.
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                                 TransliterationRuleData* theData,
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                                 UBool isDataAdopted) :
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator(id, 0),
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fData(theData),
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isDataOwned(isDataAdopted) {
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor.
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRuleBasedTransliterator::RuleBasedTransliterator(
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const RuleBasedTransliterator& other) :
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator(other), fData(other.fData),
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isDataOwned(other.isDataOwned) {
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // The data object may or may not be owned.  If it is not owned we
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // share it; it is invariant.  If it is owned, it's still
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // invariant, but we need to copy it to prevent double-deletion.
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // If this becomes a performance issue (if people do a lot of RBT
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // copying -- unlikely) we can reference count the data object.
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Only do a deep copy if this is owned data, that is, data that
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // will be later deleted.  System transliterators contain
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // non-owned data.
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (isDataOwned) {
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fData = new TransliterationRuleData(*other.fData);
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor.
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRuleBasedTransliterator::~RuleBasedTransliterator() {
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Delete the data object only if we own it.
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (isDataOwned) {
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        delete fData;
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTransliterator* // Covariant return NOT ALLOWED (for portability)
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRuleBasedTransliterator::clone(void) const {
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return new RuleBasedTransliterator(*this);
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implements {@link Transliterator#handleTransliterate}.
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index,
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                             UBool isIncremental) const {
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* We keep contextStart and contextLimit fixed the entire time,
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * relative to the text -- contextLimit may move numerically if
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * text is inserted or removed.  The start offset moves toward
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * limit, with replacements happening under it.
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Example: rules 1. ab>x|y
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *                2. yc>z
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * |eabcd   begin - no match, advance start
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * e|abcd   match rule 1 - change text & adjust start
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * ex|ycd   match rule 2 - change text & adjust start
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * exz|d    no match, advance start
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * exzd|    done
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* A rule like
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *   a>b|a
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * creates an infinite loop. To prevent that, we put an arbitrary
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * limit on the number of iterations that we take, one that is
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * high enough that any reasonable rules are ok, but low enough to
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * prevent a server from hanging.  The limit is 16 times the
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * number of characters n, unless n is so large that 16n exceeds a
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * uint32_t.
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t loopCount = 0;
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t loopLimit = index.limit - index.start;
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (loopLimit >= 0x10000000) {
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        loopLimit = 0xFFFFFFFF;
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        loopLimit <<= 4;
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Transliterator locking.  Rule-based Transliterators are not thread safe; concurrent
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //   operations must be prevented.
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // A Complication: compound transliterators can result in recursive entries to this
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //   function, sometimes with different "This" objects, always with the same text.
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //   Double-locking must be prevented in these cases.
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool    lockedMutexAtThisLevel = FALSE;
2468de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert
2478de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    // Test whether this request is operating on the same text string as
2488de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    //   some other transliteration that is still in progress and holding the
2498de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    //   transliteration mutex.  If so, do not lock the transliteration
2508de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    //    mutex again.
2518de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    //
2528de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    //  gLockedText variable is protected by the global ICU mutex.
2538de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    //  Shared RBT data protected by transliteratorDataMutex.
2548de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    //
2558de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    // TODO(andy): Need a better scheme for handling this.
2568de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    UBool needToLock;
2578de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    {
2588de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert        Mutex m;
2598de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert        needToLock = (&text != gLockedText);
2608de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    }
2618de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert    if (needToLock) {
2628de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert        umtx_lock(&transliteratorDataMutex);  // Contention, longish waits possible here.
2638de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert        Mutex m;
2648de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert        gLockedText = &text;
2658de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert        lockedMutexAtThisLevel = TRUE;
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
26885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    // Check to make sure we don't dereference a null pointer.
26985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if (fData != NULL) {
27085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho	    while (index.start < index.limit &&
27185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho	           loopCount <= loopLimit &&
27285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho	           fData->ruleSet.transliterate(text, index, isIncremental)) {
27385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho	        ++loopCount;
27485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho	    }
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (lockedMutexAtThisLevel) {
277c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert        {
278c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert            Mutex m;
279c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert            gLockedText = NULL;
280c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert        }
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        umtx_unlock(&transliteratorDataMutex);
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource,
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                                UBool escapeUnprintable) const {
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return fData->ruleSet.toRules(rulesSource, escapeUnprintable);
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement Transliterator framework
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const {
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fData->ruleSet.getSourceTargetSet(result, FALSE);
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Override Transliterator framework
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const {
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return fData->ruleSet.getSourceTargetSet(result, TRUE);
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */
307