1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************** 354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * Copyright (C) 1999-2012, International Business Machines 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Corporation and others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************** 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Date Name Description 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 11/17/99 aliu Creation. 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************** 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius#include "utypeinfo.h" // for 'typeid' to work 1227f654740f2a26ad62a5c155af9199af9e69b889claireho 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/putil.h" 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/translit.h" 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/locid.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/msgfmt.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/rep.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/resbund.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unifilt.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uscript.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/strenum.h" 27103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cpdtrans.h" 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "nultrans.h" 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "rbt_data.h" 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "rbt_pars.h" 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "rbt.h" 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "transreg.h" 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "name2uni.h" 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "nortrans.h" 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "remtrans.h" 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "titletrn.h" 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "tolowtrn.h" 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "toupptrn.h" 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uni2name.h" 41c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "brktrans.h" 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "esctrn.h" 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unesctrn.h" 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "tridpars.h" 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "anytrans.h" 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "util.h" 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "hash.h" 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "mutex.h" 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucln_in.h" 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uassert.h" 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uinvchar.h" 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar TARGET_SEP = 0x002D; /*-*/ 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar ID_DELIM = 0x003B; /*;*/ 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar VARIANT_SEP = 0x002F; // '/' 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Prefix for resource bundle key for the display name for a 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterator. The ID is appended to this to form the key. 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The resource bundle value should be a String. 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char RB_DISPLAY_NAME_PREFIX[] = "%Translit%%"; 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Prefix for resource bundle key for the display name for a 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterator SCRIPT. The ID is appended to this to form the key. 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The resource bundle value should be a String. 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char RB_SCRIPT_DISPLAY_NAME_PREFIX[] = "%Translit%"; 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Resource bundle key for display name pattern. 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The resource bundle value should be a String forming a 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * MessageFormat pattern, e.g.: 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * "{0,choice,0#|1#{1} Transliterator|2#{1} to {2} Transliterator}". 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char RB_DISPLAY_NAME_PATTERN[] = "TransliteratorNamePattern"; 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Resource bundle key for the list of RuleBasedTransliterator IDs. 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The resource bundle value should be a String[] with each element 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * being a valid ID. The ID will be appended to RB_RULE_BASED_PREFIX 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to obtain the class name in which the RB_RULE key will be sought. 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs"; 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The mutex controlling access to registry object. 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 9254dcd9b6a06071f647dac967e9e267abb9410720Craig Corneliusstatic UMutex registryMutex = U_MUTEX_INITIALIZER; 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * System transliterator registry; non-null when initialized. 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 97103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic icu::TransliteratorRegistry* registry = 0; 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Macro to check/initialize the registry. ONLY USE WITHIN 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// MUTEX. Avoids function call when registry is initialized. 101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#define HAVE_REGISTRY(status) (registry!=0 || initializeRegistry(status)) 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(Transliterator) 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return TRUE if the given UTransPosition is valid for text of 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the given length. 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UBool positionIsValid(UTransPosition& index, int32_t len) { 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return !(index.contextStart < 0 || 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.start < index.contextStart || 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.limit < index.start || 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.contextLimit < index.limit || 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len < index.contextLimit); 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Default constructor. 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param theID the string identifier for this transliterator 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param theFilter the filter. Any character for which 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <tt>filter.contains()</tt> returns <tt>FALSE</tt> will not be 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * altered by this transliterator. If <tt>filter</tt> is 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <tt>null</tt> then no filtering is applied. 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator::Transliterator(const UnicodeString& theID, 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeFilter* adoptedFilter) : 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UObject(), ID(theID), filter(adoptedFilter), 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maximumContextLength(0) 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // NUL-terminate the ID string, which is a non-aliased copy. 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ID.append((UChar)0); 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ID.truncate(ID.length()-1); 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Destructor. 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator::~Transliterator() { 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (filter) { 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete filter; 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Copy constructor. 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator::Transliterator(const Transliterator& other) : 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UObject(other), ID(other.ID), filter(0), 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maximumContextLength(other.maximumContextLength) 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // NUL-terminate the ID string, which is a non-aliased copy. 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ID.append((UChar)0); 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ID.truncate(ID.length()-1); 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (other.filter != 0) { 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We own the filter, so we must have our own copy 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filter = (UnicodeFilter*) other.filter->clone(); 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* Transliterator::clone() const { 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Assignment operator. 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator& Transliterator::operator=(const Transliterator& other) { 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ID = other.ID; 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // NUL-terminate the ID string 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ID.getTerminatedBuffer(); 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maximumContextLength = other.maximumContextLength; 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru adoptFilter((other.filter == 0) ? 0 : (UnicodeFilter*) other.filter->clone()); 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Transliterates a segment of a string. <code>Transliterator</code> API. 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param text the string to be transliterated 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start the beginning index, inclusive; <code>0 <= start 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <= limit</code>. 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param limit the ending index, exclusive; <code>start <= limit 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <= text.length()</code>. 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the new limit index, or -1 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Transliterator::transliterate(Replaceable& text, 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start, int32_t limit) const { 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (start < 0 || 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit < start || 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru text.length() < limit) { 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransPosition offsets; 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru offsets.contextStart= start; 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru offsets.contextLimit = limit; 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru offsets.start = start; 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru offsets.limit = limit; 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filteredTransliterate(text, offsets, FALSE, TRUE); 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return offsets.limit; 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Transliterates an entire string in place. Convenience method. 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param text the string to be transliterated 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::transliterate(Replaceable& text) const { 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru transliterate(text, 0, text.length()); 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Transliterates the portion of the text buffer that can be 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterated unambiguosly after new text has been inserted, 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * typically as a result of a keyboard event. The new text in 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>insertion</code> will be inserted into <code>text</code> 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * at <code>index.contextLimit</code>, advancing 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.contextLimit</code> by <code>insertion.length()</code>. 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Then the transliterator will try to transliterate characters of 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>text</code> between <code>index.start</code> and 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.contextLimit</code>. Characters before 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.start</code> will not be changed. 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Upon return, values in <code>index</code> will be updated. 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.contextStart</code> will be advanced to the first 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * character that future calls to this method will read. 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.start</code> and <code>index.contextLimit</code> will 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * be adjusted to delimit the range of text that future calls to 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * this method may change. 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Typical usage of this method begins with an initial call 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * with <code>index.contextStart</code> and <code>index.contextLimit</code> 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * set to indicate the portion of <code>text</code> to be 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterated, and <code>index.start == index.contextStart</code>. 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Thereafter, <code>index</code> can be used without 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * modification in future calls, provided that all changes to 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>text</code> are made via this method. 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>This method assumes that future calls may be made that will 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * insert new text into the buffer. As a result, it only performs 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * unambiguous transliterations. After the last call to this 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * method, there may be untransliterated text that is waiting for 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * more input to resolve an ambiguity. In order to perform these 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pending transliterations, clients should call {@link 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * #finishKeyboardTransliteration} after the last call to this 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * method has been made. 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param text the buffer holding transliterated and untransliterated text 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param index an array of three integers. 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <ul><li><code>index.contextStart</code>: the beginning index, 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * inclusive; <code>0 <= index.contextStart <= index.contextLimit</code>. 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <li><code>index.contextLimit</code>: the ending index, exclusive; 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.contextStart <= index.contextLimit <= text.length()</code>. 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>insertion</code> is inserted at 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.contextLimit</code>. 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <li><code>index.start</code>: the next character to be 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * considered for transliteration; <code>index.contextStart <= 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * index.start <= index.contextLimit</code>. Characters before 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.start</code> will not be changed by future calls 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this method.</ul> 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param insertion text to be inserted and possibly 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterated into the translation buffer at 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.contextLimit</code>. If <code>null</code> then no text 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is inserted. 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #START 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #LIMIT 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #CURSOR 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #handleTransliterate 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @exception IllegalArgumentException if <code>index</code> 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is invalid 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::transliterate(Replaceable& text, 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransPosition& index, 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& insertion, 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) const { 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _transliterate(text, index, &insertion, status); 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Transliterates the portion of the text buffer that can be 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterated unambiguosly after a new character has been 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * inserted, typically as a result of a keyboard event. This is a 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * convenience method; see {@link 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * #transliterate(Replaceable, int[], String)} for details. 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param text the buffer holding transliterated and 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * untransliterated text 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param index an array of three integers. See {@link 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * #transliterate(Replaceable, int[], String)}. 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param insertion text to be inserted and possibly 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterated into the translation buffer at 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.contextLimit</code>. 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #transliterate(Replaceable, int[], String) 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::transliterate(Replaceable& text, 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransPosition& index, 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 insertion, 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode& status) const { 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(insertion); 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _transliterate(text, index, &str, status); 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Transliterates the portion of the text buffer that can be 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterated unambiguosly. This is a convenience method; see 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * {@link #transliterate(Replaceable, int[], String)} for 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * details. 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param text the buffer holding transliterated and 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * untransliterated text 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param index an array of three integers. See {@link 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * #transliterate(Replaceable, int[], String)}. 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #transliterate(Replaceable, int[], String) 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::transliterate(Replaceable& text, 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransPosition& index, 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode& status) const { 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _transliterate(text, index, 0, status); 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Finishes any pending transliterations that were waiting for 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * more characters. Clients should call this method as the last 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * call after a sequence of one or more calls to 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>transliterate()</code>. 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param text the buffer holding transliterated and 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * untransliterated text. 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param index the array of indices previously passed to {@link 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * #transliterate} 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::finishTransliteration(Replaceable& text, 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransPosition& index) const { 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!positionIsValid(index, text.length())) { 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filteredTransliterate(text, index, FALSE, TRUE); 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This internal method does keyboard transliteration. If the 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 'insertion' is non-null then we append it to 'text' before 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * proceeding. This method calls through to the pure virtual 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * framework method handleTransliterate() to do the actual 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * work. 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::_transliterate(Replaceable& text, 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransPosition& index, 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString* insertion, 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) const { 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!positionIsValid(index, text.length())) { 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ILLEGAL_ARGUMENT_ERROR; 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// int32_t originalStart = index.contextStart; 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (insertion != 0) { 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru text.handleReplaceBetween(index.limit, index.limit, *insertion); 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.limit += insertion->length(); 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.contextLimit += insertion->length(); 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (index.limit > 0 && 372103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U16_IS_LEAD(text.charAt(index.limit - 1))) { 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Oops, there is a dangling lead surrogate in the buffer. 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This will break most transliterators, since they will 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // assume it is part of a pair. Don't transliterate until 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // more text comes in. 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filteredTransliterate(text, index, TRUE, TRUE); 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // I CAN'T DO what I'm attempting below now that the Kleene star 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // operator is supported. For example, in the rule 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ([:Lu:]+) { x } > $1; 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // what is the maximum context length? getMaximumContextLength() 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // will return 1, but this is just the length of the ante context 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // part of the pattern string -- 1 character, which is a standin 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for a Quantifier, which contains a StringMatcher, which 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // contains a UnicodeSet. 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // There is a complicated way to make this work again, and that's 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to add a "maximum left context" protocol into the 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // UnicodeMatcher hierarchy. At present I'm not convinced this is 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // worth it. 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // --- 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The purpose of the code below is to keep the context small 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // while doing incremental transliteration. When part of the left 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // context (between contextStart and start) is no longer needed, 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we try to advance contextStart past that portion. We use the 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // maximum context length to do so. 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t newCS = index.start; 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t n = getMaximumContextLength(); 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (newCS > originalStart && n-- > 0) { 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --newCS; 411103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius newCS -= U16_LENGTH(text.char32At(newCS)) - 1; 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.contextStart = uprv_max(newCS, originalStart); 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This method breaks up the input text into runs of unfiltered 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * characters. It passes each such run to 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <subclass>.handleTransliterate(). Subclasses that can handle the 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * filter logic more efficiently themselves may override this method. 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * All transliteration calls in this class go through this method. 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::filteredTransliterate(Replaceable& text, 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransPosition& index, 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool incremental, 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool rollback) const { 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Short circuit path for transliterators with no filter in 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // non-incremental mode. 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (filter == 0 && !rollback) { 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru handleTransliterate(text, index, incremental); 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //---------------------------------------------------------------------- 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This method processes text in two groupings: 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // RUNS -- A run is a contiguous group of characters which are contained 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // in the filter for this transliterator (filter.contains(ch) == TRUE). 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Text outside of runs may appear as context but it is not modified. 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The start and limit Position values are narrowed to each run. 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // PASSES (incremental only) -- To make incremental mode work correctly, 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // each run is broken up into n passes, where n is the length (in code 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // points) of the run. Each pass contains the first n characters. If a 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // pass is completely transliterated, it is committed, and further passes 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // include characters after the committed text. If a pass is blocked, 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and does not transliterate completely, then this method rolls back 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the changes made during the pass, extends the pass by one code point, 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and tries again. 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //---------------------------------------------------------------------- 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // globalLimit is the limit value for the entire operation. We 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // set index.limit to the end of each unfiltered run before 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // calling handleTransliterate(), so we need to maintain the real 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // value of index.limit here. After each transliteration, we 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // update globalLimit for insertions or deletions that have 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // happened. 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t globalLimit = index.limit; 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If there is a non-null filter, then break the input text up. Say the 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // input text has the form: 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // xxxabcxxdefxx 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // where 'x' represents a filtered character (filter.contains('x') == 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // false). Then we break this up into: 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // xxxabc xxdef xx 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Each pass through the loop consumes a run of filtered 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // characters (which are ignored) and a subsequent run of 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // unfiltered characters (which are transliterated). 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (filter != NULL) { 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Narrow the range to be transliterated to the first segment 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // of unfiltered characters at or after index.start. 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Advance past filtered chars 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (index.start < globalLimit && 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru !filter->contains(c=text.char32At(index.start))) { 482103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius index.start += U16_LENGTH(c); 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Find the end of this run of unfiltered chars 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.limit = index.start; 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (index.limit < globalLimit && 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filter->contains(c=text.char32At(index.limit))) { 489103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius index.limit += U16_LENGTH(c); 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check to see if the unfiltered run is empty. This only 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // happens at the end of the string when all the remaining 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // characters are filtered. 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (index.limit == index.start) { 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // assert(index.start == globalLimit); 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Is this run incremental? If there is additional 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // filtered text (if limit < globalLimit) then we pass in 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // an incremental value of FALSE to force the subclass to 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // complete the transliteration for this run. 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isIncrementalRun = 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (index.limit < globalLimit ? FALSE : incremental); 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t delta; 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Implement rollback. To understand the need for rollback, 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // consider the following transliterator: 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // "t" is "a > A;" 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // "u" is "A > b;" 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // "v" is a compound of "t; NFD; u" with a filter [:Ll:] 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Now apply "c" to the input text "a". The result is "b". But if 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the transliteration is done incrementally, then the NFD holds 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // things up after "t" has already transformed "a" to "A". When 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // finishTransliterate() is called, "A" is _not_ processed because 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // it gets excluded by the [:Ll:] filter, and the end result is "A" 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // -- incorrect. The problem is that the filter is applied to a 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // partially-transliterated result, when we only want it to apply to 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // input text. Although this example hinges on a compound 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // transliterator containing NFD and a specific filter, it can 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // actually happen with any transliterator which may do a partial 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // transformation in incremental mode into characters outside its 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // filter. 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // To handle this, when in incremental mode we supply characters to 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // handleTransliterate() in several passes. Each pass adds one more 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // input character to the input text. That is, for input "ABCD", we 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // first try "A", then "AB", then "ABC", and finally "ABCD". If at 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // any point we block (upon return, start < limit) then we roll 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // back. If at any point we complete the run (upon return start == 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // limit) then we commit that run. 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (rollback && isIncrementalRun) { 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t runStart = index.start; 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t runLimit = index.limit; 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t runLength = runLimit - runStart; 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Make a rollback copy at the end of the string 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t rollbackOrigin = text.length(); 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru text.copy(runStart, runLimit, rollbackOrigin); 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Variables reflecting the commitment of completely 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // transliterated text. passStart is the runStart, advanced 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // past committed text. rollbackStart is the rollbackOrigin, 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // advanced past rollback text that corresponds to committed 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // text. 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t passStart = runStart; 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t rollbackStart = rollbackOrigin; 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The limit for each pass; we advance by one code point with 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // each iteration. 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t passLimit = index.start; 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Total length, in 16-bit code units, of uncommitted text. 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This is the length to be rolled back. 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t uncommittedLength = 0; 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Total delta (change in length) for all passes 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t totalDelta = 0; 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // PASS MAIN LOOP -- Start with a single character, and extend 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the text by one character at a time. Roll back partial 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // transliterations and commit complete transliterations. 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Length of additional code point, either one or two 572103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int32_t charLength = U16_LENGTH(text.char32At(passLimit)); 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru passLimit += charLength; 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (passLimit > runLimit) { 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uncommittedLength += charLength; 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.limit = passLimit; 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Delegate to subclass for actual transliteration. Upon 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // return, start will be updated to point after the 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // transliterated text, and limit and contextLimit will be 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // adjusted for length changes. 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru handleTransliterate(text, index, TRUE); 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delta = index.limit - passLimit; // change in length 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We failed to completely transliterate this pass. 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Roll back the text. Indices remain unchanged; reset 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // them where necessary. 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (index.start != index.limit) { 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Find the rollbackStart, adjusted for length changes 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and the deletion of partially transliterated text. 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t rs = rollbackStart + delta - (index.limit - passStart); 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Delete the partially transliterated text 598103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius text.handleReplaceBetween(passStart, index.limit, UnicodeString()); 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Copy the rollback text back 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru text.copy(rs, rs + uncommittedLength, passStart); 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Restore indices to their original values 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.start = passStart; 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.limit = passLimit; 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.contextLimit -= delta; 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We did completely transliterate this pass. Update the 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // commit indices to record how far we got. Adjust indices 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for length change. 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Move the pass indices past the committed text. 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru passStart = passLimit = index.start; 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Adjust the rollbackStart for length changes and move 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // it past the committed text. All characters we've 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // processed to this point are committed now, so zero 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // out the uncommittedLength. 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rollbackStart += delta + uncommittedLength; 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uncommittedLength = 0; 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Adjust indices for length changes. 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru runLimit += delta; 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru totalDelta += delta; 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Adjust overall limit and rollbackOrigin for insertions and 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // deletions. Don't need to worry about contextLimit because 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // handleTransliterate() maintains that. 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rollbackOrigin += totalDelta; 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru globalLimit += totalDelta; 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Delete the rollback copy 636103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius text.handleReplaceBetween(rollbackOrigin, rollbackOrigin + runLength, UnicodeString()); 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Move start past committed text 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.start = passStart; 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Delegate to subclass for actual transliteration. 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t limit = index.limit; 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru handleTransliterate(text, index, isIncrementalRun); 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delta = index.limit - limit; // change in length 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // In a properly written transliterator, start == limit after 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // handleTransliterate() returns when incremental is false. 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Catch cases where the subclass doesn't do this, and throw 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // an exception. (Just pinning start to limit is a bad idea, 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // because what's probably happening is that the subclass 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // isn't transliterating all the way to the end, and it should 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // in non-incremental mode.) 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!incremental && index.start != index.limit) { 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We can't throw an exception, so just fudge things 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.start = index.limit; 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Adjust overall limit for insertions/deletions. Don't need 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to worry about contextLimit because handleTransliterate() 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // maintains that. 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru globalLimit += delta; 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (filter == NULL || isIncrementalRun) { 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we did completely transliterate this 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // run, then repeat with the next unfiltered run. 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Start is valid where it is. Limit needs to be put back where 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // it was, modulo adjustments for deletions/insertions. 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.limit = globalLimit; 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::filteredTransliterate(Replaceable& text, 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransPosition& index, 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool incremental) const { 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filteredTransliterate(text, index, incremental, FALSE); 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Method for subclasses to use to set the maximum context length. 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getMaximumContextLength 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::setMaximumContextLength(int32_t maxContextLength) { 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maximumContextLength = maxContextLength; 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a programmatic identifier for this transliterator. 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this identifier is passed to <code>getInstance()</code>, it 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * will return this object, if it has been registered. 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #registerInstance 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getAvailableIDs 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UnicodeString& Transliterator::getID(void) const { 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ID; 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a name for this transliterator that is appropriate for 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * display to the user in the default locale. See {@link 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * #getDisplayName(Locale)} for details. 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& U_EXPORT2 Transliterator::getDisplayName(const UnicodeString& ID, 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result) { 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return getDisplayName(ID, Locale::getDefault(), result); 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a name for this transliterator that is appropriate for 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * display to the user in the given locale. This name is taken 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from the locale resource data in the standard manner of the 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>java.text</code> package. 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>If no localized names exist in the system resource bundles, 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a name is synthesized using a localized 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>MessageFormat</code> pattern from the resource data. The 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * arguments to this pattern are an integer followed by one or two 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * strings. The integer is the number of strings, either 1 or 2. 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The strings are formed by splitting the ID for this 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterator at the first TARGET_SEP. If there is no TARGET_SEP, then the 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * entire ID forms the only string. 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param inLocale the Locale in which the display name should be 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * localized. 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see java.text.MessageFormat 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& U_EXPORT2 Transliterator::getDisplayName(const UnicodeString& id, 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const Locale& inLocale, 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result) { 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ResourceBundle bundle(U_ICUDATA_TRANSLIT, inLocale, status); 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Suspend checking status until later... 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.truncate(0); 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Normalize the ID 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString source, target, variant; 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool sawSource; 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TransliteratorIDParser::IDtoSTV(id, source, target, variant, sawSource); 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (target.length() < 1) { 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // No target; malformed id 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (variant.length() > 0) { // Change "Foo" to "/Foo" 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru variant.insert(0, VARIANT_SEP); 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString ID(source); 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ID.append(TARGET_SEP).append(target).append(variant); 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // build the char* key 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (uprv_isInvariantUString(ID.getBuffer(), ID.length())) { 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char key[200]; 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_strcpy(key, RB_DISPLAY_NAME_PREFIX); 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length=(int32_t)uprv_strlen(RB_DISPLAY_NAME_PREFIX); 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ID.extract(0, (int32_t)(sizeof(key)-length), key+length, (int32_t)(sizeof(key)-length), US_INV); 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Try to retrieve a UnicodeString from the bundle. 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString resString = bundle.getStringEx(key, status); 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status) && resString.length() != 0) { 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result = resString; // [sic] assign & return 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_FORMATTING 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We have failed to get a name from the locale data. This is 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // typical, since most transliterators will not have localized 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // name data. The next step is to retrieve the MessageFormat 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // pattern from the locale data and to use it to synthesize the 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // name from the ID. 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resString = bundle.getStringEx(RB_DISPLAY_NAME_PATTERN, status); 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status) && resString.length() != 0) { 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MessageFormat msg(resString, inLocale, status); 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Suspend checking status until later... 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We pass either 2 or 3 Formattable objects to msg. 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Formattable args[3]; 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t nargs; 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru args[0].setLong(2); // # of args to follow 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru args[1].setString(source); 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru args[2].setString(target); 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nargs = 3; 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Use display names for the scripts, if they exist 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s; 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=(int32_t)uprv_strlen(RB_SCRIPT_DISPLAY_NAME_PREFIX); 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int j=1; j<=2; ++j) { 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_strcpy(key, RB_SCRIPT_DISPLAY_NAME_PREFIX); 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru args[j].getString(s); 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (uprv_isInvariantUString(s.getBuffer(), s.length())) { 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.extract(0, sizeof(key)-length-1, key+length, (int32_t)sizeof(key)-length-1, US_INV); 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resString = bundle.getStringEx(key, status); 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru args[j] = resString; 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FieldPosition pos; // ignored by msg 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru msg.format(args, nargs, result, pos, status); 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.append(variant); 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We should not reach this point unless there is something 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // wrong with the build or the RB_DISPLAY_NAME_PATTERN has 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // been deleted from the root RB_LOCALE_ELEMENTS resource. 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = ID; 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the filter used by this transliterator, or <tt>null</tt> 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * if this transliterator uses no filter. Caller musn't delete 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the result! 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UnicodeFilter* Transliterator::getFilter(void) const { 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return filter; 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the filter used by this transliterator, or 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <tt>NULL</tt> if this transliterator uses no filter. The 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * caller must eventually delete the result. After this call, 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * this transliterator's filter is set to <tt>NULL</tt>. 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeFilter* Transliterator::orphanFilter(void) { 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeFilter *result = filter; 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filter = NULL; 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Changes the filter used by this transliterator. If the filter 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is set to <tt>null</tt> then no filtering will occur. 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Callers must take care if a transliterator is in use by 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * multiple threads. The filter should not be changed by one 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * thread while another thread may be transliterating. 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::adoptFilter(UnicodeFilter* filterToAdopt) { 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete filter; 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filter = filterToAdopt; 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns this transliterator's inverse. See the class 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * documentation for details. This implementation simply inverts 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the two entities in the ID and attempts to retrieve the 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * resulting transliterator. That is, if <code>getID()</code> 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns "A-B", then this method will return the result of 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>getInstance("B-A")</code>, or <code>null</code> if that 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * call fails. 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>This method does not take filtering into account. The 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returned transliterator will have no filter. 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Subclasses with knowledge of their inverse may wish to 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * override this method. 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a transliterator that is an inverse, not necessarily 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * exact, of this transliterator, or <code>null</code> if no such 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterator is registered. 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #registerInstance 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* Transliterator::createInverse(UErrorCode& status) const { 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError parseError; 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return Transliterator::createInstance(ID, UTRANS_REVERSE,parseError,status); 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* U_EXPORT2 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator::createInstance(const UnicodeString& ID, 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransDirection dir, 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode& status) 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError parseError; 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return createInstance(ID, dir, parseError, status); 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a <code>Transliterator</code> object given its ID. 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The ID must be either a system transliterator ID or a ID registered 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * using <code>registerInstance()</code>. 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code> 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return A <code>Transliterator</code> object with the given ID 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #registerInstance 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getAvailableIDs 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getID 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* U_EXPORT2 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator::createInstance(const UnicodeString& ID, 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransDirection dir, 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError& parseError, 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode& status) 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString canonID; 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector list(status); 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet* globalFilter; 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO add code for parseError...currently unused, but 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // later may be used by parsing code... 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!TransliteratorIDParser::parseCompoundID(ID, dir, canonID, list, globalFilter)) { 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_INVALID_ID; 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TransliteratorIDParser::instantiateList(list, status); 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(list.size() > 0); 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator* t = NULL; 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (list.size() > 1 || canonID.indexOf(ID_DELIM) >= 0) { 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // [NOTE: If it's a compoundID, we instantiate a CompoundTransliterator even if it only 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // has one child transliterator. This is so that toRules() will return the right thing 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (without any inactive ID), but our main ID still comes out correct. That is, if we 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // instantiate "(Lower);Latin-Greek;", we want the rules to come out as "::Latin-Greek;" 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // even though the ID is "(Lower);Latin-Greek;". 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = new CompoundTransliterator(list, parseError, status); 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = (Transliterator*)list.elementAt(0); 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 951c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check null pointer 952c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (t != NULL) { 953c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t->setID(canonID); 954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (globalFilter != NULL) { 955c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t->adoptFilter(globalFilter); 956c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 957c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 958c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (U_SUCCESS(status)) { 959c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return t; 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Create a transliterator from a basic ID. This is an ID 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * containing only the forward direction source, target, and 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * variant. 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id a basic ID of the form S-T or S-T/V. 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a newly created Transliterator or null if the ID is 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * invalid. 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* Transliterator::createBasicInstance(const UnicodeString& id, 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString* canon) { 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TransliteratorAlias* alias = 0; 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator* t = 0; 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_lock(®istryMutex); 980c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (HAVE_REGISTRY(ec)) { 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = registry->get(id, alias, ec); 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_unlock(®istryMutex); 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete t; 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete alias; 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We may have not gotten a transliterator: Because we can't 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // instantiate a transliterator from inside TransliteratorRegistry:: 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // get() (that would deadlock), we sometimes pass back an alias. This 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // contains the data we need to finish the instantiation outside the 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // registry mutex. The alias may, in turn, generate another alias, so 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we handle aliases in a loop. The max times through the loop is two. 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // [alan] 998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (alias != 0) { 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(t==0); 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule-based aliases are handled with TransliteratorAlias:: 1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // parse(), followed by TransliteratorRegistry::reget(). 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Other aliases are handled with TransliteratorAlias::create(). 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (alias->isRuleBased()) { 1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Step 1. parse 1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TransliteratorParser parser(ec); 1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru alias->parse(parser, pe, ec); 1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete alias; 1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru alias = 0; 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Step 2. reget 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_lock(®istryMutex); 1012c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (HAVE_REGISTRY(ec)) { 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = registry->reget(id, parser, alias, ec); 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_unlock(®istryMutex); 1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Step 3. Loop back around! 1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = alias->create(pe, ec); 1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete alias; 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru alias = 0; 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete t; 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete alias; 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = NULL; 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t != NULL && canon != NULL) { 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->setID(*canon); 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return t; 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a <code>Transliterator</code> object constructed from 1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the given rule string. This will be a RuleBasedTransliterator, 1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * if the rule string contains only rules, or a 1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * CompoundTransliterator, if it contains ID blocks, or a 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * NullTransliterator, if it contains ID blocks which parse as 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * empty for the given direction. 1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* U_EXPORT2 1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator::createFromRules(const UnicodeString& ID, 1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& rules, 1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransDirection dir, 1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError& parseError, 1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode& status) 1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator* t = NULL; 1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TransliteratorParser parser(status); 1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru parser.parse(rules, dir, parseError, status); 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // NOTE: The logic here matches that in TransliteratorRegistry. 1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 0) { 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = new NullTransliterator(); 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 1) { 1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = new RuleBasedTransliterator(ID, (TransliterationRuleData*)parser.dataVector.orphanElementAt(0), TRUE); 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (parser.idBlockVector.size() == 1 && parser.dataVector.size() == 0) { 1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // idBlock, no data -- this is an alias. The ID has 1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // been munged from reverse into forward mode, if 1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // necessary, so instantiate the ID in the forward 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // direction. 1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (parser.compoundFilter != NULL) { 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString filterPattern; 1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru parser.compoundFilter->toPattern(filterPattern, FALSE); 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = createInstance(filterPattern + UnicodeString(ID_DELIM) 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + *((UnicodeString*)parser.idBlockVector.elementAt(0)), UTRANS_FORWARD, parseError, status); 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else 1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = createInstance(*((UnicodeString*)parser.idBlockVector.elementAt(0)), UTRANS_FORWARD, parseError, status); 1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t != NULL) { 1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->setID(ID); 1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector transliterators(status); 1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t passNumber = 1; 1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t limit = parser.idBlockVector.size(); 1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (parser.dataVector.size() > limit) 1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit = parser.dataVector.size(); 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < limit; i++) { 1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i < parser.idBlockVector.size()) { 1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i); 1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!idBlock->isEmpty()) { 1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator* temp = createInstance(*idBlock, UTRANS_FORWARD, parseError, status); 110227f654740f2a26ad62a5c155af9199af9e69b889claireho if (temp != NULL && typeid(*temp) != typeid(NullTransliterator)) 1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru transliterators.addElement(temp, status); 1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else 1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete temp; 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!parser.dataVector.isEmpty()) { 1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); 1110103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")? 1111103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius RuleBasedTransliterator* temprbt = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++), 1112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru data, TRUE); 1113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check if NULL before adding it to transliterators to avoid future usage of NULL pointer. 1114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (temprbt == NULL) { 1115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 1116c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return t; 1117c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1118c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru transliterators.addElement(temprbt, status); 1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = new CompoundTransliterator(transliterators, passNumber - 1, parseError, status); 1123c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Null pointer check 1124c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (t != NULL) { 1125c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t->setID(ID); 1126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t->adoptFilter(parser.orphanCompoundFilter()); 1127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_SUCCESS(status) && t == NULL) { 1130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return t; 1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& Transliterator::toRules(UnicodeString& rulesSource, 1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool escapeUnprintable) const { 1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The base class implementation of toRules munges the ID into 1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the correct format. That is: foo => ::foo 1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (escapeUnprintable) { 1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rulesSource.truncate(0); 1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString id = getID(); 1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<id.length();) { 1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c = id.char32At(i); 1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!ICU_Utility::escapeUnprintable(rulesSource, c)) { 1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rulesSource.append(c); 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1147103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius i += U16_LENGTH(c); 1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rulesSource = getID(); 1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // KEEP in sync with rbt_pars 1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rulesSource.insert(0, UNICODE_STRING_SIMPLE("::")); 1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rulesSource.append(ID_DELIM); 1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return rulesSource; 1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Transliterator::countElements() const { 115927f654740f2a26ad62a5c155af9199af9e69b889claireho const CompoundTransliterator* ct = dynamic_cast<const CompoundTransliterator*>(this); 116027f654740f2a26ad62a5c155af9199af9e69b889claireho return ct != NULL ? ct->getCount() : 0; 1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst Transliterator& Transliterator::getElement(int32_t index, UErrorCode& ec) const { 1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 116727f654740f2a26ad62a5c155af9199af9e69b889claireho const CompoundTransliterator* cpd = dynamic_cast<const CompoundTransliterator*>(this); 1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t n = (cpd == NULL) ? 1 : cpd->getCount(); 1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (index < 0 || index >= n) { 1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec = U_INDEX_OUTOFBOUNDS_ERROR; 1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (n == 1) ? *this : cpd->getTransliterator(index); 1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& Transliterator::getSourceSet(UnicodeSet& result) const { 1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru handleGetSourceSet(result); 1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (filter != NULL) { 118027f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeSet* filterSet = dynamic_cast<UnicodeSet*>(filter); 1181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool deleteFilterSet = FALSE; 1182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Most, but not all filters will be UnicodeSets. Optimize for 1183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // the high-runner case. 118427f654740f2a26ad62a5c155af9199af9e69b889claireho if (filterSet == NULL) { 1185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru filterSet = new UnicodeSet(); 1186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check null pointer 1187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (filterSet == NULL) { 1188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return result; 1189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru deleteFilterSet = TRUE; 1191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru filter->addMatchSetTo(*filterSet); 1192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru result.retainAll(*filterSet); 1194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (deleteFilterSet) { 1195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete filterSet; 1196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::handleGetSourceSet(UnicodeSet& result) const { 1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.clear(); 1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& Transliterator::getTargetSet(UnicodeSet& result) const { 1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result.clear(); 1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// For public consumption 1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid U_EXPORT2 Transliterator::registerFactory(const UnicodeString& id, 1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator::Factory factory, 1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator::Token context) { 1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1214c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1215c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (HAVE_REGISTRY(ec)) { 1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _registerFactory(id, factory, context); 1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// To be called only by Transliterator subclasses that are called 1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// to register themselves by initializeRegistry(). 1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::_registerFactory(const UnicodeString& id, 1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator::Factory factory, 1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator::Token context) { 1225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru registry->put(id, factory, context, TRUE, ec); 1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// To be called only by Transliterator subclasses that are called 1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// to register themselves by initializeRegistry(). 1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::_registerSpecialInverse(const UnicodeString& target, 1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& inverseTarget, 1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool bidirectional) { 1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TransliteratorIDParser::registerSpecialInverse(target, inverseTarget, bidirectional, status); 1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Registers a instance <tt>obj</tt> of a subclass of 1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>Transliterator</code> with the system. This object must 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * implement the <tt>clone()</tt> method. When 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <tt>getInstance()</tt> is called with an ID string that is 1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * equal to <tt>obj.getID()</tt>, then <tt>obj.clone()</tt> is 1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returned. 1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param obj an instance of subclass of 1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>Transliterator</code> that defines <tt>clone()</tt> 1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getInstance 1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #unregister 1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid U_EXPORT2 Transliterator::registerInstance(Transliterator* adoptedPrototype) { 1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1254c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (HAVE_REGISTRY(ec)) { 1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _registerInstance(adoptedPrototype); 1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::_registerInstance(Transliterator* adoptedPrototype) { 1260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru registry->put(adoptedPrototype, TRUE, ec); 1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid U_EXPORT2 Transliterator::registerAlias(const UnicodeString& aliasID, 1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& realID) { 1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1267c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1268c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (HAVE_REGISTRY(ec)) { 1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _registerAlias(aliasID, realID); 1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::_registerAlias(const UnicodeString& aliasID, 1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& realID) { 1275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru registry->put(aliasID, realID, FALSE, TRUE, ec); 1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Unregisters a transliterator or class. This may be either 1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a system transliterator or a user transliterator or class. 1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param ID the ID of the transliterator or class 1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #registerInstance 1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid U_EXPORT2 Transliterator::unregister(const UnicodeString& ID) { 1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (HAVE_REGISTRY(ec)) { 1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry->remove(ID); 1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * == OBSOLETE - remove in ICU 3.4 == 1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return the number of IDs currently registered with the system. 1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * To retrieve the actual IDs, call getAvailableID(i) with 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * i from 0 to countAvailableIDs() - 1. 1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t U_EXPORT2 Transliterator::countAvailableIDs(void) { 1302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t retVal = 0; 1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (HAVE_REGISTRY(ec)) { 1306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru retVal = registry->countAvailableIDs(); 1307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return retVal; 1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * == OBSOLETE - remove in ICU 3.4 == 1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return the index-th available ID. index must be between 0 1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and countAvailableIDs() - 1, inclusive. If index is out of 1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * range, the result of getAvailableID(0) is returned. 1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UnicodeString& U_EXPORT2 Transliterator::getAvailableID(int32_t index) { 1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString* result = NULL; 1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_lock(®istryMutex); 1320c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1321c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (HAVE_REGISTRY(ec)) { 1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = ®istry->getAvailableID(index); 1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_unlock(®istryMutex); 1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(result != NULL); // fail if no registry 1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *result; 1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruStringEnumeration* U_EXPORT2 Transliterator::getAvailableIDs(UErrorCode& ec) { 1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) return NULL; 1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru StringEnumeration* result = NULL; 1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_lock(®istryMutex); 1333c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (HAVE_REGISTRY(ec)) { 1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = registry->getAvailableIDs(); 1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_unlock(®istryMutex); 1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (result == NULL) { 1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec = U_INTERNAL_TRANSLITERATOR_ERROR; 1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t U_EXPORT2 Transliterator::countAvailableSources(void) { 1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1345c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1346c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return HAVE_REGISTRY(ec) ? _countAvailableSources() : 0; 1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& U_EXPORT2 Transliterator::getAvailableSource(int32_t index, 1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result) { 1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1352c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1353c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (HAVE_REGISTRY(ec)) { 1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _getAvailableSource(index, result); 1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t U_EXPORT2 Transliterator::countAvailableTargets(const UnicodeString& source) { 1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1361c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return HAVE_REGISTRY(ec) ? _countAvailableTargets(source) : 0; 1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& U_EXPORT2 Transliterator::getAvailableTarget(int32_t index, 1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& source, 1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result) { 1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (HAVE_REGISTRY(ec)) { 1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _getAvailableTarget(index, source, result); 1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t U_EXPORT2 Transliterator::countAvailableVariants(const UnicodeString& source, 1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& target) { 1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1379c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1380c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return HAVE_REGISTRY(ec) ? _countAvailableVariants(source, target) : 0; 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& U_EXPORT2 Transliterator::getAvailableVariant(int32_t index, 1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& source, 1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& target, 1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result) { 1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1388c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 1389c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (HAVE_REGISTRY(ec)) { 1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _getAvailableVariant(index, source, target, result); 1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Transliterator::_countAvailableSources(void) { 1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return registry->countAvailableSources(); 1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& Transliterator::_getAvailableSource(int32_t index, 1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result) { 1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return registry->getAvailableSource(index, result); 1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Transliterator::_countAvailableTargets(const UnicodeString& source) { 1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return registry->countAvailableTargets(source); 1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& Transliterator::_getAvailableTarget(int32_t index, 1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& source, 1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result) { 1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return registry->getAvailableTarget(index, source, result); 1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Transliterator::_countAvailableVariants(const UnicodeString& source, 1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& target) { 1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return registry->countAvailableVariants(source, target); 1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& Transliterator::_getAvailableVariant(int32_t index, 1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& source, 1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& target, 1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result) { 1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return registry->getAvailableVariant(index, source, target, result); 1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef U_USE_DEPRECATED_TRANSLITERATOR_API 1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Method for subclasses to use to obtain a character in the given 1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * string, with filtering. 1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @deprecated the new architecture provides filtering at the top 1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * level. This method will be removed Dec 31 2001. 1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar Transliterator::filteredCharAt(const Replaceable& text, int32_t i) const { 1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c; 1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeFilter* localFilter = getFilter(); 1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (localFilter == 0) ? text.charAt(i) : 1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (localFilter->contains(c = text.charAt(i)) ? c : (UChar)0xFFFE); 1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If the registry is initialized, return TRUE. If not, initialize it 1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and return TRUE. If the registry cannot be initialized, return 1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * FALSE (rare). 1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 144850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * IMPORTANT: Upon entry, registryMutex must be LOCKED. The entire 1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * initialization is done with the lock held. There is NO REASON to 1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * unlock, since no other thread that is waiting on the registryMutex 1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * cannot itself proceed until the registry is initialized. 1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruUBool Transliterator::initializeRegistry(UErrorCode &status) { 1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (registry != 0) { 1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry = new TransliteratorRegistry(status); 1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (registry == 0 || U_FAILURE(status)) { 1460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete registry; 1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry = 0; 1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; // can't create registry, no recovery 1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* The following code parses the index table located in 1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * icu/data/translit/root.txt. The index is an n x 4 table 1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * that follows this format: 1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <id>{ 1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * file{ 1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * resource{"<resource>"} 1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * direction{"<direction>"} 1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * } 1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * } 1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <id>{ 1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * internal{ 1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * resource{"<resource>"} 1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * direction{"<direction"} 1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * } 1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * } 1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <id>{ 1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * alias{"<getInstanceArg"} 1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * } 1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <id> is the ID of the system transliterator being defined. These 1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * are public IDs enumerated by Transliterator.getAvailableIDs(), 1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * unless the second field is "internal". 1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <resource> is a ResourceReader resource name. Currently these refer 1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to file names under com/ibm/text/resources. This string is passed 1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * directly to ResourceReader, together with <encoding>. 1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <direction> is either "FORWARD" or "REVERSE". 1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <getInstanceArg> is a string to be passed directly to 1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Transliterator.getInstance(). The returned Transliterator object 1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then has its ID changed to <id> and is returned. 1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The extra blank field on "alias" lines is to make the array square. 1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //static const char translit_index[] = "translit_index"; 1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UResourceBundle *bundle, *transIDs, *colBund; 1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open default locale*/, &status); 1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status); 1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t row, maxRows; 1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxRows = ures_getSize(transIDs); 1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (row = 0; row < maxRows; row++) { 1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru colBund = ures_getByIndex(transIDs, row, 0, &status); 1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString id(ures_getKey(colBund), -1, US_INV); 1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UResourceBundle* res = ures_getNextResource(colBund, NULL, &status); 1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* typeStr = ures_getKey(res); 1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar type; 1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_charsToUChars(typeStr, &type, 1); 1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t len = 0; 1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *resString; 1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (type) { 1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0x66: // 'f' 1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0x69: // 'i' 1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 'file' or 'internal'; 1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // row[2]=resource, row[3]=direction 1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resString = ures_getStringByKey(res, "resource", &len, &status); 1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool visible = (type == 0x0066 /*f*/); 1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransDirection dir = 1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (ures_getUnicodeStringByKey(res, "direction", &status).charAt(0) == 1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x0046 /*F*/) ? 1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRANS_FORWARD : UTRANS_REVERSE; 1533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru registry->put(id, UnicodeString(TRUE, resString, len), dir, TRUE, visible, status); 1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0x61: // 'a' 1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 'alias'; row[2]=createInstance argument 1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resString = ures_getString(res, &len, &status); 1539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru registry->put(id, UnicodeString(TRUE, resString, len), TRUE, TRUE, status); 1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ures_close(res); 1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ures_close(colBund); 1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ures_close(transIDs); 1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ures_close(bundle); 1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Manually add prototypes that the system knows about to the 1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // cache. This is how new non-rule-based transliterators are 1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // added to the system. 1555c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // This is to allow for null pointer check 1557c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru NullTransliterator* tempNullTranslit = new NullTransliterator(); 1558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru LowercaseTransliterator* tempLowercaseTranslit = new LowercaseTransliterator(); 1559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UppercaseTransliterator* tempUppercaseTranslit = new UppercaseTransliterator(); 1560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru TitlecaseTransliterator* tempTitlecaseTranslit = new TitlecaseTransliterator(); 1561c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeNameTransliterator* tempUnicodeTranslit = new UnicodeNameTransliterator(); 1562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru NameUnicodeTransliterator* tempNameUnicodeTranslit = new NameUnicodeTransliterator(); 1563c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 1564c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // TODO: could or should these transliterators be referenced polymorphically once constructed? 1565c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru BreakTransliterator* tempBreakTranslit = new BreakTransliterator(); 1566c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#endif 1567c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check for null pointers 1568c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (tempNullTranslit == NULL || tempLowercaseTranslit == NULL || tempUppercaseTranslit == NULL || 1569c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tempTitlecaseTranslit == NULL || tempUnicodeTranslit == NULL || 1570c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 1571c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tempBreakTranslit == NULL || 1572c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#endif 1573c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tempNameUnicodeTranslit == NULL ) 1574c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1575c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete tempNullTranslit; 1576c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete tempLowercaseTranslit; 1577c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete tempUppercaseTranslit; 1578c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete tempTitlecaseTranslit; 1579c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete tempUnicodeTranslit; 1580c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete tempNameUnicodeTranslit; 1581c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 1582c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete tempBreakTranslit; 1583c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#endif 1584c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Since there was an error, remove registry 1585c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete registry; 1586c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru registry = NULL; 1587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1588c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 1589c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 1590c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1591c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1592c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru registry->put(tempNullTranslit, TRUE, status); 1593c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru registry->put(tempLowercaseTranslit, TRUE, status); 1594c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru registry->put(tempUppercaseTranslit, TRUE, status); 1595c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru registry->put(tempTitlecaseTranslit, TRUE, status); 1596c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru registry->put(tempUnicodeTranslit, TRUE, status); 1597c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru registry->put(tempNameUnicodeTranslit, TRUE, status); 1598c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 1599c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru registry->put(tempBreakTranslit, FALSE, status); // FALSE means invisible. 1600c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#endif 1601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RemoveTransliterator::registerIDs(); // Must be within mutex 1603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru EscapeTransliterator::registerIDs(); 1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnescapeTransliterator::registerIDs(); 1605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NormalizationTransliterator::registerIDs(); 1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru AnyTransliterator::registerIDs(); 1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _registerSpecialInverse(UNICODE_STRING_SIMPLE("Null"), 1609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNICODE_STRING_SIMPLE("Null"), FALSE); 1610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _registerSpecialInverse(UNICODE_STRING_SIMPLE("Upper"), 1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNICODE_STRING_SIMPLE("Lower"), TRUE); 1612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _registerSpecialInverse(UNICODE_STRING_SIMPLE("Title"), 1613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNICODE_STRING_SIMPLE("Lower"), FALSE); 1614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1615b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ucln_i18n_registerCleanup(UCLN_I18N_TRANSLITERATOR, utrans_transliterator_cleanup); 1616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 1618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 1621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Defined in ucln_in.h: 1623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Release all static memory held by transliterator. This will 1626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * necessarily invalidate any rule-based transliterators held by the 1627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * user, because RBTs hold pointers to common data objects. 1628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1629b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CFUNC UBool utrans_transliterator_cleanup(void) { 1630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_NAMESPACE_USE 1631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TransliteratorIDParser::cleanup(); 1632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (registry) { 1633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete registry; 1634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry = NULL; 1635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 1637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 1640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//eof 1642