16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org********************************************************************** 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (C) 1999-2013, International Business Machines 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Corporation and others. All Rights Reserved. 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org********************************************************************** 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Date Name Description 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 11/17/99 aliu Creation. 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org********************************************************************** 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_TRANSLITERATION 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/rep.h" 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uniset.h" 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "rbt_pars.h" 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "rbt_data.h" 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "rbt_rule.h" 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "rbt.h" 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "umutex.h" 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator) 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UMutex transliteratorDataMutex = U_MUTEX_INITIALIZER; 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic Replaceable *gLockedText = NULL; 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RuleBasedTransliterator::_construct(const UnicodeString& rules, 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTransDirection direction, 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError& parseError, 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode& status) { 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData = 0; 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isDataOwned = TRUE; 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org TransliteratorParser parser(status); 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org parser.parse(rules, direction, parseError, status); 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (parser.idBlockVector.size() != 0 || 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org parser.compoundFilter != NULL || 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org parser.dataVector.size() == 0) { 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Constructs a new transliterator from the given rules. 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param id the id for the transliterator. 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param rules rules, separated by ';' 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param direction either FORWARD or REVERSE. 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param adoptedFilter the filter for this transliterator. 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param parseError Struct to recieve information on position 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of error if an error is encountered 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param status Output param set to success/failure code. 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @exception IllegalArgumentException if rules are malformed 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * or direction is invalid. 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRuleBasedTransliterator::RuleBasedTransliterator( 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString& id, 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString& rules, 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTransDirection direction, 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeFilter* adoptedFilter, 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError& parseError, 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode& status) : 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Transliterator(id, adoptedFilter) { 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _construct(rules, direction,parseError,status); 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Constructs a new transliterator from the given rules. 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param id the id for the transliterator. 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param rules rules, separated by ';' 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param direction either FORWARD or REVERSE. 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param adoptedFilter the filter for this transliterator. 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param status Output param set to success/failure code. 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @exception IllegalArgumentException if rules are malformed 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * or direction is invalid. 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*RuleBasedTransliterator::RuleBasedTransliterator( 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString& id, 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString& rules, 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTransDirection direction, 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeFilter* adoptedFilter, 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode& status) : 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Transliterator(id, adoptedFilter) { 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError parseError; 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _construct(rules, direction,parseError, status); 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}*/ 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Covenience constructor with no filter. 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*RuleBasedTransliterator::RuleBasedTransliterator( 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString& id, 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString& rules, 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTransDirection direction, 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode& status) : 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Transliterator(id, 0) { 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError parseError; 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _construct(rules, direction,parseError, status); 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}*/ 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Covenience constructor with no filter and FORWARD direction. 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*RuleBasedTransliterator::RuleBasedTransliterator( 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString& id, 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString& rules, 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode& status) : 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Transliterator(id, 0) { 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError parseError; 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _construct(rules, UTRANS_FORWARD, parseError, status); 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}*/ 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Covenience constructor with FORWARD direction. 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*RuleBasedTransliterator::RuleBasedTransliterator( 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString& id, 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeString& rules, 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeFilter* adoptedFilter, 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode& status) : 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Transliterator(id, adoptedFilter) { 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UParseError parseError; 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _construct(rules, UTRANS_FORWARD,parseError, status); 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}*/ 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id, 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const TransliterationRuleData* theData, 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeFilter* adoptedFilter) : 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Transliterator(id, adoptedFilter), 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData((TransliterationRuleData*)theData), // cast away const 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isDataOwned(FALSE) { 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Internal constructor. 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id, 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org TransliterationRuleData* theData, 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool isDataAdopted) : 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Transliterator(id, 0), 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData(theData), 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isDataOwned(isDataAdopted) { 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org setMaximumContextLength(fData->ruleSet.getMaximumContextLength()); 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Copy constructor. 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRuleBasedTransliterator::RuleBasedTransliterator( 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const RuleBasedTransliterator& other) : 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Transliterator(other), fData(other.fData), 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isDataOwned(other.isDataOwned) { 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // The data object may or may not be owned. If it is not owned we 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // share it; it is invariant. If it is owned, it's still 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // invariant, but we need to copy it to prevent double-deletion. 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If this becomes a performance issue (if people do a lot of RBT 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // copying -- unlikely) we can reference count the data object. 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Only do a deep copy if this is owned data, that is, data that 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // will be later deleted. System transliterators contain 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // non-owned data. 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (isDataOwned) { 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData = new TransliterationRuleData(*other.fData); 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Destructor. 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRuleBasedTransliterator::~RuleBasedTransliterator() { 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Delete the data object only if we own it. 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (isDataOwned) { 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete fData; 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgTransliterator* // Covariant return NOT ALLOWED (for portability) 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRuleBasedTransliterator::clone(void) const { 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return new RuleBasedTransliterator(*this); 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Implements {@link Transliterator#handleTransliterate}. 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index, 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool isIncremental) const { 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* We keep contextStart and contextLimit fixed the entire time, 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * relative to the text -- contextLimit may move numerically if 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * text is inserted or removed. The start offset moves toward 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * limit, with replacements happening under it. 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Example: rules 1. ab>x|y 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2. yc>z 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * |eabcd begin - no match, advance start 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * e|abcd match rule 1 - change text & adjust start 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ex|ycd match rule 2 - change text & adjust start 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * exz|d no match, advance start 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * exzd| done 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* A rule like 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a>b|a 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * creates an infinite loop. To prevent that, we put an arbitrary 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * limit on the number of iterations that we take, one that is 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * high enough that any reasonable rules are ok, but low enough to 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * prevent a server from hanging. The limit is 16 times the 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * number of characters n, unless n is so large that 16n exceeds a 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * uint32_t. 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t loopCount = 0; 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t loopLimit = index.limit - index.start; 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (loopLimit >= 0x10000000) { 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org loopLimit = 0xFFFFFFFF; 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org loopLimit <<= 4; 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Transliterator locking. Rule-based Transliterators are not thread safe; concurrent 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // operations must be prevented. 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // A Complication: compound transliterators can result in recursive entries to this 2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // function, sometimes with different "This" objects, always with the same text. 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Double-locking must be prevented in these cases. 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If the transliteration data is exclusively owned by this transliterator object, 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // we don't need to do any locking. No sharing between transliterators is possible, 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // so no concurrent access from multiple threads is possible. 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool lockedMutexAtThisLevel = FALSE; 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (isDataOwned == FALSE) { 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Test whether this request is operating on the same text string as some 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // some other transliteration that is still in progress and holding the 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // transliteration mutex. If so, do not lock the transliteration 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // mutex again. 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // TODO(andy): Need a better scheme for handling this. 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool needToLock; 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org umtx_lock(NULL); 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org needToLock = (&text != gLockedText); 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org umtx_unlock(NULL); 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (needToLock) { 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org umtx_lock(&transliteratorDataMutex); 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gLockedText = &text; 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lockedMutexAtThisLevel = TRUE; 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Check to make sure we don't dereference a null pointer. 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (fData != NULL) { 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while (index.start < index.limit && 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org loopCount <= loopLimit && 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData->ruleSet.transliterate(text, index, isIncremental)) { 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++loopCount; 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (lockedMutexAtThisLevel) { 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org gLockedText = NULL; 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org umtx_unlock(&transliteratorDataMutex); 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource, 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool escapeUnprintable) const { 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fData->ruleSet.toRules(rulesSource, escapeUnprintable); 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Implement Transliterator framework 2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const { 2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fData->ruleSet.getSourceTargetSet(result, FALSE); 2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Override Transliterator framework 2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const { 2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fData->ruleSet.getSourceTargetSet(result, TRUE); 2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END 2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 299