16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org**********************************************************************
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Copyright (C) 1999-2013, International Business Machines
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Corporation and others.  All Rights Reserved.
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org**********************************************************************
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Date        Name        Description
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   11/17/99    aliu        Creation.
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org**********************************************************************
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_TRANSLITERATION
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/rep.h"
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uniset.h"
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "rbt_pars.h"
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "rbt_data.h"
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "rbt_rule.h"
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "rbt.h"
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "umutex.h"
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator)
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UMutex transliteratorDataMutex = U_MUTEX_INITIALIZER;
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic Replaceable *gLockedText = NULL;
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RuleBasedTransliterator::_construct(const UnicodeString& rules,
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                         UTransDirection direction,
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                         UParseError& parseError,
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                         UErrorCode& status) {
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fData = 0;
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    isDataOwned = TRUE;
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    TransliteratorParser parser(status);
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    parser.parse(rules, direction, parseError, status);
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (parser.idBlockVector.size() != 0 ||
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        parser.compoundFilter != NULL ||
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        parser.dataVector.size() == 0) {
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fData = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Constructs a new transliterator from the given rules.
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param id            the id for the transliterator.
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param rules         rules, separated by ';'
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param direction     either FORWARD or REVERSE.
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param adoptedFilter the filter for this transliterator.
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param parseError    Struct to recieve information on position
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *                      of error if an error is encountered
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param status        Output param set to success/failure code.
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @exception IllegalArgumentException if rules are malformed
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * or direction is invalid.
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRuleBasedTransliterator::RuleBasedTransliterator(
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            const UnicodeString& id,
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            const UnicodeString& rules,
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            UTransDirection direction,
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            UnicodeFilter* adoptedFilter,
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            UParseError& parseError,
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            UErrorCode& status) :
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Transliterator(id, adoptedFilter) {
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    _construct(rules, direction,parseError,status);
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Constructs a new transliterator from the given rules.
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param id            the id for the transliterator.
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param rules         rules, separated by ';'
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param direction     either FORWARD or REVERSE.
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param adoptedFilter the filter for this transliterator.
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param status        Output param set to success/failure code.
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @exception IllegalArgumentException if rules are malformed
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * or direction is invalid.
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*RuleBasedTransliterator::RuleBasedTransliterator(
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            const UnicodeString& id,
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            const UnicodeString& rules,
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            UTransDirection direction,
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            UnicodeFilter* adoptedFilter,
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            UErrorCode& status) :
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Transliterator(id, adoptedFilter) {
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError parseError;
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    _construct(rules, direction,parseError, status);
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}*/
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Covenience constructor with no filter.
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*RuleBasedTransliterator::RuleBasedTransliterator(
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            const UnicodeString& id,
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            const UnicodeString& rules,
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            UTransDirection direction,
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            UErrorCode& status) :
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Transliterator(id, 0) {
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError parseError;
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    _construct(rules, direction,parseError, status);
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}*/
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Covenience constructor with no filter and FORWARD direction.
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*RuleBasedTransliterator::RuleBasedTransliterator(
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            const UnicodeString& id,
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            const UnicodeString& rules,
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            UErrorCode& status) :
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Transliterator(id, 0) {
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError parseError;
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    _construct(rules, UTRANS_FORWARD, parseError, status);
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}*/
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Covenience constructor with FORWARD direction.
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*RuleBasedTransliterator::RuleBasedTransliterator(
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            const UnicodeString& id,
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            const UnicodeString& rules,
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            UnicodeFilter* adoptedFilter,
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                            UErrorCode& status) :
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Transliterator(id, adoptedFilter) {
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UParseError parseError;
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    _construct(rules, UTRANS_FORWARD,parseError, status);
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}*/
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                 const TransliterationRuleData* theData,
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                 UnicodeFilter* adoptedFilter) :
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Transliterator(id, adoptedFilter),
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fData((TransliterationRuleData*)theData), // cast away const
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    isDataOwned(FALSE) {
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Internal constructor.
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                 TransliterationRuleData* theData,
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                 UBool isDataAdopted) :
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Transliterator(id, 0),
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fData(theData),
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    isDataOwned(isDataAdopted) {
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Copy constructor.
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRuleBasedTransliterator::RuleBasedTransliterator(
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const RuleBasedTransliterator& other) :
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    Transliterator(other), fData(other.fData),
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    isDataOwned(other.isDataOwned) {
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // The data object may or may not be owned.  If it is not owned we
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // share it; it is invariant.  If it is owned, it's still
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // invariant, but we need to copy it to prevent double-deletion.
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // If this becomes a performance issue (if people do a lot of RBT
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // copying -- unlikely) we can reference count the data object.
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Only do a deep copy if this is owned data, that is, data that
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // will be later deleted.  System transliterators contain
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // non-owned data.
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (isDataOwned) {
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fData = new TransliterationRuleData(*other.fData);
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Destructor.
1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRuleBasedTransliterator::~RuleBasedTransliterator() {
1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Delete the data object only if we own it.
1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (isDataOwned) {
1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete fData;
1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgTransliterator* // Covariant return NOT ALLOWED (for portability)
1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRuleBasedTransliterator::clone(void) const {
1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return new RuleBasedTransliterator(*this);
1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Implements {@link Transliterator#handleTransliterate}.
1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid
2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgRuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index,
2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                             UBool isIncremental) const {
2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* We keep contextStart and contextLimit fixed the entire time,
2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * relative to the text -- contextLimit may move numerically if
2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * text is inserted or removed.  The start offset moves toward
2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * limit, with replacements happening under it.
2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Example: rules 1. ab>x|y
2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                2. yc>z
2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * |eabcd   begin - no match, advance start
2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * e|abcd   match rule 1 - change text & adjust start
2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * ex|ycd   match rule 2 - change text & adjust start
2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * exz|d    no match, advance start
2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * exzd|    done
2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* A rule like
2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *   a>b|a
2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * creates an infinite loop. To prevent that, we put an arbitrary
2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * limit on the number of iterations that we take, one that is
2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * high enough that any reasonable rules are ok, but low enough to
2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * prevent a server from hanging.  The limit is 16 times the
2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * number of characters n, unless n is so large that 16n exceeds a
2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * uint32_t.
2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint32_t loopCount = 0;
2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint32_t loopLimit = index.limit - index.start;
2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (loopLimit >= 0x10000000) {
2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        loopLimit = 0xFFFFFFFF;
2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        loopLimit <<= 4;
2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Transliterator locking.  Rule-based Transliterators are not thread safe; concurrent
2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   operations must be prevented.
2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // A Complication: compound transliterators can result in recursive entries to this
2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   function, sometimes with different "This" objects, always with the same text.
2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   Double-locking must be prevented in these cases.
2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //
2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // If the transliteration data is exclusively owned by this transliterator object,
2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   we don't need to do any locking.  No sharing between transliterators is possible,
2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    //   so no concurrent access from multiple threads is possible.
2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool    lockedMutexAtThisLevel = FALSE;
2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (isDataOwned == FALSE) {
2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // Test whether this request is operating on the same text string as some
2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   some other transliteration that is still in progress and holding the
2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //   transliteration mutex.  If so, do not lock the transliteration
2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        //    mutex again.
2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        // TODO(andy): Need a better scheme for handling this.
2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UBool needToLock;
2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        umtx_lock(NULL);
2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        needToLock = (&text != gLockedText);
2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        umtx_unlock(NULL);
2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if (needToLock) {
2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            umtx_lock(&transliteratorDataMutex);
2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            gLockedText = &text;
2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            lockedMutexAtThisLevel = TRUE;
2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Check to make sure we don't dereference a null pointer.
2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (fData != NULL) {
2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org	    while (index.start < index.limit &&
2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org	           loopCount <= loopLimit &&
2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org	           fData->ruleSet.transliterate(text, index, isIncremental)) {
2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org	        ++loopCount;
2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org	    }
2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (lockedMutexAtThisLevel) {
2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        gLockedText = NULL;
2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        umtx_unlock(&transliteratorDataMutex);
2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource,
2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                                                UBool escapeUnprintable) const {
2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fData->ruleSet.toRules(rulesSource, escapeUnprintable);
2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Implement Transliterator framework
2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const {
2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fData->ruleSet.getSourceTargetSet(result, FALSE);
2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Override Transliterator framework
2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const {
2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fData->ruleSet.getSourceTargetSet(result, TRUE);
2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END
2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* #if !UCONFIG_NO_TRANSLITERATION */
299