1/* 2********************************************************************** 3* Copyright (C) 1999-2015, International Business Machines 4* Corporation and others. All Rights Reserved. 5********************************************************************** 6* Date Name Description 7* 11/17/99 aliu Creation. 8********************************************************************** 9*/ 10 11#include "unicode/utypes.h" 12 13#if !UCONFIG_NO_TRANSLITERATION 14 15#include "unicode/rep.h" 16#include "unicode/uniset.h" 17#include "rbt_pars.h" 18#include "rbt_data.h" 19#include "rbt_rule.h" 20#include "rbt.h" 21#include "mutex.h" 22#include "umutex.h" 23 24U_NAMESPACE_BEGIN 25 26UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator) 27 28static UMutex transliteratorDataMutex = U_MUTEX_INITIALIZER; 29static Replaceable *gLockedText = NULL; 30 31void RuleBasedTransliterator::_construct(const UnicodeString& rules, 32 UTransDirection direction, 33 UParseError& parseError, 34 UErrorCode& status) { 35 fData = 0; 36 isDataOwned = TRUE; 37 if (U_FAILURE(status)) { 38 return; 39 } 40 41 TransliteratorParser parser(status); 42 parser.parse(rules, direction, parseError, status); 43 if (U_FAILURE(status)) { 44 return; 45 } 46 47 if (parser.idBlockVector.size() != 0 || 48 parser.compoundFilter != NULL || 49 parser.dataVector.size() == 0) { 50 status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT 51 return; 52 } 53 54 fData = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); 55 setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); 56} 57 58/** 59 * Constructs a new transliterator from the given rules. 60 * @param id the id for the transliterator. 61 * @param rules rules, separated by ';' 62 * @param direction either FORWARD or REVERSE. 63 * @param adoptedFilter the filter for this transliterator. 64 * @param parseError Struct to recieve information on position 65 * of error if an error is encountered 66 * @param status Output param set to success/failure code. 67 * @exception IllegalArgumentException if rules are malformed 68 * or direction is invalid. 69 */ 70RuleBasedTransliterator::RuleBasedTransliterator( 71 const UnicodeString& id, 72 const UnicodeString& rules, 73 UTransDirection direction, 74 UnicodeFilter* adoptedFilter, 75 UParseError& parseError, 76 UErrorCode& status) : 77 Transliterator(id, adoptedFilter) { 78 _construct(rules, direction,parseError,status); 79} 80 81/** 82 * Constructs a new transliterator from the given rules. 83 * @param id the id for the transliterator. 84 * @param rules rules, separated by ';' 85 * @param direction either FORWARD or REVERSE. 86 * @param adoptedFilter the filter for this transliterator. 87 * @param status Output param set to success/failure code. 88 * @exception IllegalArgumentException if rules are malformed 89 * or direction is invalid. 90 */ 91/*RuleBasedTransliterator::RuleBasedTransliterator( 92 const UnicodeString& id, 93 const UnicodeString& rules, 94 UTransDirection direction, 95 UnicodeFilter* adoptedFilter, 96 UErrorCode& status) : 97 Transliterator(id, adoptedFilter) { 98 UParseError parseError; 99 _construct(rules, direction,parseError, status); 100}*/ 101 102/** 103 * Covenience constructor with no filter. 104 */ 105/*RuleBasedTransliterator::RuleBasedTransliterator( 106 const UnicodeString& id, 107 const UnicodeString& rules, 108 UTransDirection direction, 109 UErrorCode& status) : 110 Transliterator(id, 0) { 111 UParseError parseError; 112 _construct(rules, direction,parseError, status); 113}*/ 114 115/** 116 * Covenience constructor with no filter and FORWARD direction. 117 */ 118/*RuleBasedTransliterator::RuleBasedTransliterator( 119 const UnicodeString& id, 120 const UnicodeString& rules, 121 UErrorCode& status) : 122 Transliterator(id, 0) { 123 UParseError parseError; 124 _construct(rules, UTRANS_FORWARD, parseError, status); 125}*/ 126 127/** 128 * Covenience constructor with FORWARD direction. 129 */ 130/*RuleBasedTransliterator::RuleBasedTransliterator( 131 const UnicodeString& id, 132 const UnicodeString& rules, 133 UnicodeFilter* adoptedFilter, 134 UErrorCode& status) : 135 Transliterator(id, adoptedFilter) { 136 UParseError parseError; 137 _construct(rules, UTRANS_FORWARD,parseError, status); 138}*/ 139 140RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id, 141 const TransliterationRuleData* theData, 142 UnicodeFilter* adoptedFilter) : 143 Transliterator(id, adoptedFilter), 144 fData((TransliterationRuleData*)theData), // cast away const 145 isDataOwned(FALSE) { 146 setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); 147} 148 149/** 150 * Internal constructor. 151 */ 152RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id, 153 TransliterationRuleData* theData, 154 UBool isDataAdopted) : 155 Transliterator(id, 0), 156 fData(theData), 157 isDataOwned(isDataAdopted) { 158 setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); 159} 160 161/** 162 * Copy constructor. 163 */ 164RuleBasedTransliterator::RuleBasedTransliterator( 165 const RuleBasedTransliterator& other) : 166 Transliterator(other), fData(other.fData), 167 isDataOwned(other.isDataOwned) { 168 169 // The data object may or may not be owned. If it is not owned we 170 // share it; it is invariant. If it is owned, it's still 171 // invariant, but we need to copy it to prevent double-deletion. 172 // If this becomes a performance issue (if people do a lot of RBT 173 // copying -- unlikely) we can reference count the data object. 174 175 // Only do a deep copy if this is owned data, that is, data that 176 // will be later deleted. System transliterators contain 177 // non-owned data. 178 if (isDataOwned) { 179 fData = new TransliterationRuleData(*other.fData); 180 } 181} 182 183/** 184 * Destructor. 185 */ 186RuleBasedTransliterator::~RuleBasedTransliterator() { 187 // Delete the data object only if we own it. 188 if (isDataOwned) { 189 delete fData; 190 } 191} 192 193Transliterator* // Covariant return NOT ALLOWED (for portability) 194RuleBasedTransliterator::clone(void) const { 195 return new RuleBasedTransliterator(*this); 196} 197 198/** 199 * Implements {@link Transliterator#handleTransliterate}. 200 */ 201void 202RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index, 203 UBool isIncremental) const { 204 /* We keep contextStart and contextLimit fixed the entire time, 205 * relative to the text -- contextLimit may move numerically if 206 * text is inserted or removed. The start offset moves toward 207 * limit, with replacements happening under it. 208 * 209 * Example: rules 1. ab>x|y 210 * 2. yc>z 211 * 212 * |eabcd begin - no match, advance start 213 * e|abcd match rule 1 - change text & adjust start 214 * ex|ycd match rule 2 - change text & adjust start 215 * exz|d no match, advance start 216 * exzd| done 217 */ 218 219 /* A rule like 220 * a>b|a 221 * creates an infinite loop. To prevent that, we put an arbitrary 222 * limit on the number of iterations that we take, one that is 223 * high enough that any reasonable rules are ok, but low enough to 224 * prevent a server from hanging. The limit is 16 times the 225 * number of characters n, unless n is so large that 16n exceeds a 226 * uint32_t. 227 */ 228 uint32_t loopCount = 0; 229 uint32_t loopLimit = index.limit - index.start; 230 if (loopLimit >= 0x10000000) { 231 loopLimit = 0xFFFFFFFF; 232 } else { 233 loopLimit <<= 4; 234 } 235 236 // Transliterator locking. Rule-based Transliterators are not thread safe; concurrent 237 // operations must be prevented. 238 // A Complication: compound transliterators can result in recursive entries to this 239 // function, sometimes with different "This" objects, always with the same text. 240 // Double-locking must be prevented in these cases. 241 // 242 243 // If the transliteration data is exclusively owned by this transliterator object, 244 // we don't need to do any locking. No sharing between transliterators is possible, 245 // so no concurrent access from multiple threads is possible. 246 UBool lockedMutexAtThisLevel = FALSE; 247 if (isDataOwned == FALSE) { 248 // Test whether this request is operating on the same text string as 249 // some other transliteration that is still in progress and holding the 250 // transliteration mutex. If so, do not lock the transliteration 251 // mutex again. 252 // 253 // gLockedText variable is protected by the global ICU mutex. 254 // Shared RBT data protected by transliteratorDataMutex. 255 // 256 // TODO(andy): Need a better scheme for handling this. 257 UBool needToLock; 258 { 259 Mutex m; 260 needToLock = (&text != gLockedText); 261 } 262 if (needToLock) { 263 umtx_lock(&transliteratorDataMutex); // Contention, longish waits possible here. 264 Mutex m; 265 gLockedText = &text; 266 lockedMutexAtThisLevel = TRUE; 267 } 268 } 269 270 // Check to make sure we don't dereference a null pointer. 271 if (fData != NULL) { 272 while (index.start < index.limit && 273 loopCount <= loopLimit && 274 fData->ruleSet.transliterate(text, index, isIncremental)) { 275 ++loopCount; 276 } 277 } 278 if (lockedMutexAtThisLevel) { 279 { 280 Mutex m; 281 gLockedText = NULL; 282 } 283 umtx_unlock(&transliteratorDataMutex); 284 } 285} 286 287UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource, 288 UBool escapeUnprintable) const { 289 return fData->ruleSet.toRules(rulesSource, escapeUnprintable); 290} 291 292/** 293 * Implement Transliterator framework 294 */ 295void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const { 296 fData->ruleSet.getSourceTargetSet(result, FALSE); 297} 298 299/** 300 * Override Transliterator framework 301 */ 302UnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const { 303 return fData->ruleSet.getSourceTargetSet(result, TRUE); 304} 305 306U_NAMESPACE_END 307 308#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 309