164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 5c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert* Copyright (C) 1999-2015, International Business Machines 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11/17/99 aliu Creation. 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/rep.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uniset.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbt_pars.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbt_data.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbt_rule.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbt.h" 23c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert#include "mutex.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "umutex.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator) 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 3054dcd9b6a06071f647dac967e9e267abb9410720Craig Corneliusstatic UMutex transliteratorDataMutex = U_MUTEX_INITIALIZER; 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic Replaceable *gLockedText = NULL; 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RuleBasedTransliterator::_construct(const UnicodeString& rules, 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTransDirection direction, 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UParseError& parseError, 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status) { 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fData = 0; 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isDataOwned = TRUE; 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TransliteratorParser parser(status); 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru parser.parse(rules, direction, parseError, status); 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (parser.idBlockVector.size() != 0 || 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru parser.compoundFilter != NULL || 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru parser.dataVector.size() == 0) { 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fData = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructs a new transliterator from the given rules. 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param id the id for the transliterator. 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param rules rules, separated by ';' 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param direction either FORWARD or REVERSE. 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param adoptedFilter the filter for this transliterator. 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param parseError Struct to recieve information on position 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of error if an error is encountered 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status Output param set to success/failure code. 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @exception IllegalArgumentException if rules are malformed 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or direction is invalid. 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRuleBasedTransliterator::RuleBasedTransliterator( 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& id, 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& rules, 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTransDirection direction, 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeFilter* adoptedFilter, 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UParseError& parseError, 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status) : 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(id, adoptedFilter) { 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _construct(rules, direction,parseError,status); 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructs a new transliterator from the given rules. 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param id the id for the transliterator. 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param rules rules, separated by ';' 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param direction either FORWARD or REVERSE. 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param adoptedFilter the filter for this transliterator. 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status Output param set to success/failure code. 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @exception IllegalArgumentException if rules are malformed 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or direction is invalid. 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*RuleBasedTransliterator::RuleBasedTransliterator( 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& id, 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& rules, 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTransDirection direction, 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeFilter* adoptedFilter, 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status) : 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(id, adoptedFilter) { 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UParseError parseError; 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _construct(rules, direction,parseError, status); 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/ 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Covenience constructor with no filter. 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*RuleBasedTransliterator::RuleBasedTransliterator( 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& id, 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& rules, 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTransDirection direction, 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status) : 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(id, 0) { 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UParseError parseError; 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _construct(rules, direction,parseError, status); 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/ 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Covenience constructor with no filter and FORWARD direction. 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*RuleBasedTransliterator::RuleBasedTransliterator( 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& id, 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& rules, 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status) : 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(id, 0) { 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UParseError parseError; 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _construct(rules, UTRANS_FORWARD, parseError, status); 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/ 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Covenience constructor with FORWARD direction. 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*RuleBasedTransliterator::RuleBasedTransliterator( 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& id, 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& rules, 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeFilter* adoptedFilter, 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status) : 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(id, adoptedFilter) { 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UParseError parseError; 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _construct(rules, UTRANS_FORWARD,parseError, status); 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}*/ 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id, 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const TransliterationRuleData* theData, 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeFilter* adoptedFilter) : 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(id, adoptedFilter), 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fData((TransliterationRuleData*)theData), // cast away const 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isDataOwned(FALSE) { 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Internal constructor. 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id, 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TransliterationRuleData* theData, 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isDataAdopted) : 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(id, 0), 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fData(theData), 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isDataOwned(isDataAdopted) { 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor. 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRuleBasedTransliterator::RuleBasedTransliterator( 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const RuleBasedTransliterator& other) : 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(other), fData(other.fData), 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isDataOwned(other.isDataOwned) { 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // The data object may or may not be owned. If it is not owned we 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // share it; it is invariant. If it is owned, it's still 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // invariant, but we need to copy it to prevent double-deletion. 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If this becomes a performance issue (if people do a lot of RBT 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // copying -- unlikely) we can reference count the data object. 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Only do a deep copy if this is owned data, that is, data that 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // will be later deleted. System transliterators contain 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // non-owned data. 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (isDataOwned) { 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fData = new TransliterationRuleData(*other.fData); 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor. 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRuleBasedTransliterator::~RuleBasedTransliterator() { 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Delete the data object only if we own it. 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (isDataOwned) { 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete fData; 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTransliterator* // Covariant return NOT ALLOWED (for portability) 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRuleBasedTransliterator::clone(void) const { 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return new RuleBasedTransliterator(*this); 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implements {@link Transliterator#handleTransliterate}. 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruRuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index, 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isIncremental) const { 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* We keep contextStart and contextLimit fixed the entire time, 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * relative to the text -- contextLimit may move numerically if 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * text is inserted or removed. The start offset moves toward 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * limit, with replacements happening under it. 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Example: rules 1. ab>x|y 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 2. yc>z 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * |eabcd begin - no match, advance start 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * e|abcd match rule 1 - change text & adjust start 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ex|ycd match rule 2 - change text & adjust start 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * exz|d no match, advance start 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * exzd| done 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* A rule like 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a>b|a 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * creates an infinite loop. To prevent that, we put an arbitrary 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * limit on the number of iterations that we take, one that is 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * high enough that any reasonable rules are ok, but low enough to 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * prevent a server from hanging. The limit is 16 times the 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * number of characters n, unless n is so large that 16n exceeds a 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * uint32_t. 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t loopCount = 0; 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t loopLimit = index.limit - index.start; 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (loopLimit >= 0x10000000) { 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru loopLimit = 0xFFFFFFFF; 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru loopLimit <<= 4; 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Transliterator locking. Rule-based Transliterators are not thread safe; concurrent 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // operations must be prevented. 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // A Complication: compound transliterators can result in recursive entries to this 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // function, sometimes with different "This" objects, always with the same text. 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Double-locking must be prevented in these cases. 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool lockedMutexAtThisLevel = FALSE; 2468de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert 2478de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert // Test whether this request is operating on the same text string as 2488de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert // some other transliteration that is still in progress and holding the 2498de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert // transliteration mutex. If so, do not lock the transliteration 2508de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert // mutex again. 2518de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert // 2528de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert // gLockedText variable is protected by the global ICU mutex. 2538de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert // Shared RBT data protected by transliteratorDataMutex. 2548de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert // 2558de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert // TODO(andy): Need a better scheme for handling this. 2568de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert UBool needToLock; 2578de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert { 2588de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert Mutex m; 2598de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert needToLock = (&text != gLockedText); 2608de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert } 2618de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert if (needToLock) { 2628de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert umtx_lock(&transliteratorDataMutex); // Contention, longish waits possible here. 2638de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert Mutex m; 2648de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert gLockedText = &text; 2658de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert lockedMutexAtThisLevel = TRUE; 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho // Check to make sure we don't dereference a null pointer. 26985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (fData != NULL) { 27085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho while (index.start < index.limit && 27185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho loopCount <= loopLimit && 27285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fData->ruleSet.transliterate(text, index, isIncremental)) { 27385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++loopCount; 27485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (lockedMutexAtThisLevel) { 277c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert { 278c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert Mutex m; 279c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert gLockedText = NULL; 280c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert } 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru umtx_unlock(&transliteratorDataMutex); 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource, 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool escapeUnprintable) const { 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return fData->ruleSet.toRules(rulesSource, escapeUnprintable); 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement Transliterator framework 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const { 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fData->ruleSet.getSourceTargetSet(result, FALSE); 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Override Transliterator framework 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const { 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return fData->ruleSet.getSourceTargetSet(result, TRUE); 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 307